1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
24 #include "hash-table.h"
29 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-inline.h"
33 #include "pointer-set.h"
34 #include "tree-ssa-alias.h"
35 #include "internal-fn.h"
36 #include "gimple-fold.h"
38 #include "gimple-expr.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "gimple-ssa.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
54 #include "tree-iterator.h"
55 #include "tree-pass.h"
56 #include "alloc-pool.h"
57 #include "langhooks.h"
62 #include "diagnostic-core.h"
64 /* This is a simple global reassociation pass. It is, in part, based
65 on the LLVM pass of the same name (They do some things more/less
66 than we do, in different orders, etc).
68 It consists of five steps:
70 1. Breaking up subtract operations into addition + negate, where
71 it would promote the reassociation of adds.
73 2. Left linearization of the expression trees, so that (A+B)+(C+D)
74 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
75 During linearization, we place the operands of the binary
76 expressions into a vector of operand_entry_t
78 3. Optimization of the operand lists, eliminating things like a +
81 3a. Combine repeated factors with the same occurrence counts
82 into a __builtin_powi call that will later be optimized into
83 an optimal number of multiplies.
85 4. Rewrite the expression trees we linearized and optimized so
86 they are in proper rank order.
88 5. Repropagate negates, as nothing else will clean it up ATM.
90 A bit of theory on #4, since nobody seems to write anything down
91 about why it makes sense to do it the way they do it:
93 We could do this much nicer theoretically, but don't (for reasons
94 explained after how to do it theoretically nice :P).
96 In order to promote the most redundancy elimination, you want
97 binary expressions whose operands are the same rank (or
98 preferably, the same value) exposed to the redundancy eliminator,
99 for possible elimination.
101 So the way to do this if we really cared, is to build the new op
102 tree from the leaves to the roots, merging as you go, and putting the
103 new op on the end of the worklist, until you are left with one
104 thing on the worklist.
106 IE if you have to rewrite the following set of operands (listed with
107 rank in parentheses), with opcode PLUS_EXPR:
109 a (1), b (1), c (1), d (2), e (2)
112 We start with our merge worklist empty, and the ops list with all of
115 You want to first merge all leaves of the same rank, as much as
118 So first build a binary op of
120 mergetmp = a + b, and put "mergetmp" on the merge worklist.
122 Because there is no three operand form of PLUS_EXPR, c is not going to
123 be exposed to redundancy elimination as a rank 1 operand.
125 So you might as well throw it on the merge worklist (you could also
126 consider it to now be a rank two operand, and merge it with d and e,
127 but in this case, you then have evicted e from a binary op. So at
128 least in this situation, you can't win.)
130 Then build a binary op of d + e
133 and put mergetmp2 on the merge worklist.
135 so merge worklist = {mergetmp, c, mergetmp2}
137 Continue building binary ops of these operations until you have only
138 one operation left on the worklist.
143 mergetmp3 = mergetmp + c
145 worklist = {mergetmp2, mergetmp3}
147 mergetmp4 = mergetmp2 + mergetmp3
149 worklist = {mergetmp4}
151 because we have one operation left, we can now just set the original
152 statement equal to the result of that operation.
154 This will at least expose a + b and d + e to redundancy elimination
155 as binary operations.
157 For extra points, you can reuse the old statements to build the
158 mergetmps, since you shouldn't run out.
160 So why don't we do this?
162 Because it's expensive, and rarely will help. Most trees we are
163 reassociating have 3 or less ops. If they have 2 ops, they already
164 will be written into a nice single binary op. If you have 3 ops, a
165 single simple check suffices to tell you whether the first two are of the
166 same rank. If so, you know to order it
169 newstmt = mergetmp + op3
173 newstmt = mergetmp + op1
175 If all three are of the same rank, you can't expose them all in a
176 single binary operator anyway, so the above is *still* the best you
179 Thus, this is what we do. When we have three ops left, we check to see
180 what order to put them in, and call it a day. As a nod to vector sum
181 reduction, we check if any of the ops are really a phi node that is a
182 destructive update for the associating op, and keep the destructive
183 update together for vector sum reduction recognition. */
190 int constants_eliminated
;
193 int pows_encountered
;
197 /* Operator, rank pair. */
198 typedef struct operand_entry
206 static alloc_pool operand_entry_pool
;
208 /* This is used to assign a unique ID to each struct operand_entry
209 so that qsort results are identical on different hosts. */
210 static int next_operand_entry_id
;
212 /* Starting rank number for a given basic block, so that we can rank
213 operations using unmovable instructions in that BB based on the bb
215 static long *bb_rank
;
217 /* Operand->rank hashtable. */
218 static struct pointer_map_t
*operand_rank
;
221 static long get_rank (tree
);
224 /* Bias amount for loop-carried phis. We want this to be larger than
225 the depth of any reassociation tree we can see, but not larger than
226 the rank difference between two blocks. */
227 #define PHI_LOOP_BIAS (1 << 15)
229 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
230 an innermost loop, and the phi has only a single use which is inside
231 the loop, then the rank is the block rank of the loop latch plus an
232 extra bias for the loop-carried dependence. This causes expressions
233 calculated into an accumulator variable to be independent for each
234 iteration of the loop. If STMT is some other phi, the rank is the
235 block rank of its containing block. */
237 phi_rank (gimple stmt
)
239 basic_block bb
= gimple_bb (stmt
);
240 struct loop
*father
= bb
->loop_father
;
246 /* We only care about real loops (those with a latch). */
248 return bb_rank
[bb
->index
];
250 /* Interesting phis must be in headers of innermost loops. */
251 if (bb
!= father
->header
253 return bb_rank
[bb
->index
];
255 /* Ignore virtual SSA_NAMEs. */
256 res
= gimple_phi_result (stmt
);
257 if (virtual_operand_p (res
))
258 return bb_rank
[bb
->index
];
260 /* The phi definition must have a single use, and that use must be
261 within the loop. Otherwise this isn't an accumulator pattern. */
262 if (!single_imm_use (res
, &use
, &use_stmt
)
263 || gimple_bb (use_stmt
)->loop_father
!= father
)
264 return bb_rank
[bb
->index
];
266 /* Look for phi arguments from within the loop. If found, bias this phi. */
267 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
269 tree arg
= gimple_phi_arg_def (stmt
, i
);
270 if (TREE_CODE (arg
) == SSA_NAME
271 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
273 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
274 if (gimple_bb (def_stmt
)->loop_father
== father
)
275 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
279 /* Must be an uninteresting phi. */
280 return bb_rank
[bb
->index
];
283 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
284 loop-carried dependence of an innermost loop, return TRUE; else
287 loop_carried_phi (tree exp
)
292 if (TREE_CODE (exp
) != SSA_NAME
293 || SSA_NAME_IS_DEFAULT_DEF (exp
))
296 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
298 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
301 /* Non-loop-carried phis have block rank. Loop-carried phis have
302 an additional bias added in. If this phi doesn't have block rank,
303 it's biased and should not be propagated. */
304 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
306 if (phi_rank (phi_stmt
) != block_rank
)
312 /* Return the maximum of RANK and the rank that should be propagated
313 from expression OP. For most operands, this is just the rank of OP.
314 For loop-carried phis, the value is zero to avoid undoing the bias
315 in favor of the phi. */
317 propagate_rank (long rank
, tree op
)
321 if (loop_carried_phi (op
))
324 op_rank
= get_rank (op
);
326 return MAX (rank
, op_rank
);
329 /* Look up the operand rank structure for expression E. */
332 find_operand_rank (tree e
)
334 void **slot
= pointer_map_contains (operand_rank
, e
);
335 return slot
? (long) (intptr_t) *slot
: -1;
338 /* Insert {E,RANK} into the operand rank hashtable. */
341 insert_operand_rank (tree e
, long rank
)
344 gcc_assert (rank
> 0);
345 slot
= pointer_map_insert (operand_rank
, e
);
347 *slot
= (void *) (intptr_t) rank
;
350 /* Given an expression E, return the rank of the expression. */
355 /* Constants have rank 0. */
356 if (is_gimple_min_invariant (e
))
359 /* SSA_NAME's have the rank of the expression they are the result
361 For globals and uninitialized values, the rank is 0.
362 For function arguments, use the pre-setup rank.
363 For PHI nodes, stores, asm statements, etc, we use the rank of
365 For simple operations, the rank is the maximum rank of any of
366 its operands, or the bb_rank, whichever is less.
367 I make no claims that this is optimal, however, it gives good
370 /* We make an exception to the normal ranking system to break
371 dependences of accumulator variables in loops. Suppose we
372 have a simple one-block loop containing:
379 As shown, each iteration of the calculation into x is fully
380 dependent upon the iteration before it. We would prefer to
381 see this in the form:
388 If the loop is unrolled, the calculations of b and c from
389 different iterations can be interleaved.
391 To obtain this result during reassociation, we bias the rank
392 of the phi definition x_1 upward, when it is recognized as an
393 accumulator pattern. The artificial rank causes it to be
394 added last, providing the desired independence. */
396 if (TREE_CODE (e
) == SSA_NAME
)
403 if (SSA_NAME_IS_DEFAULT_DEF (e
))
404 return find_operand_rank (e
);
406 stmt
= SSA_NAME_DEF_STMT (e
);
407 if (gimple_code (stmt
) == GIMPLE_PHI
)
408 return phi_rank (stmt
);
410 if (!is_gimple_assign (stmt
)
411 || gimple_vdef (stmt
))
412 return bb_rank
[gimple_bb (stmt
)->index
];
414 /* If we already have a rank for this expression, use that. */
415 rank
= find_operand_rank (e
);
419 /* Otherwise, find the maximum rank for the operands. As an
420 exception, remove the bias from loop-carried phis when propagating
421 the rank so that dependent operations are not also biased. */
423 if (gimple_assign_single_p (stmt
))
425 tree rhs
= gimple_assign_rhs1 (stmt
);
426 n
= TREE_OPERAND_LENGTH (rhs
);
428 rank
= propagate_rank (rank
, rhs
);
431 for (i
= 0; i
< n
; i
++)
433 op
= TREE_OPERAND (rhs
, i
);
436 rank
= propagate_rank (rank
, op
);
442 n
= gimple_num_ops (stmt
);
443 for (i
= 1; i
< n
; i
++)
445 op
= gimple_op (stmt
, i
);
447 rank
= propagate_rank (rank
, op
);
451 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
453 fprintf (dump_file
, "Rank for ");
454 print_generic_expr (dump_file
, e
, 0);
455 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
458 /* Note the rank in the hashtable so we don't recompute it. */
459 insert_operand_rank (e
, (rank
+ 1));
463 /* Globals, etc, are rank 0 */
468 /* We want integer ones to end up last no matter what, since they are
469 the ones we can do the most with. */
470 #define INTEGER_CONST_TYPE 1 << 3
471 #define FLOAT_CONST_TYPE 1 << 2
472 #define OTHER_CONST_TYPE 1 << 1
474 /* Classify an invariant tree into integer, float, or other, so that
475 we can sort them to be near other constants of the same type. */
477 constant_type (tree t
)
479 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
480 return INTEGER_CONST_TYPE
;
481 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
482 return FLOAT_CONST_TYPE
;
484 return OTHER_CONST_TYPE
;
487 /* qsort comparison function to sort operand entries PA and PB by rank
488 so that the sorted array is ordered by rank in decreasing order. */
490 sort_by_operand_rank (const void *pa
, const void *pb
)
492 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
493 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
495 /* It's nicer for optimize_expression if constants that are likely
496 to fold when added/multiplied//whatever are put next to each
497 other. Since all constants have rank 0, order them by type. */
498 if (oeb
->rank
== 0 && oea
->rank
== 0)
500 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
501 return constant_type (oeb
->op
) - constant_type (oea
->op
);
503 /* To make sorting result stable, we use unique IDs to determine
505 return oeb
->id
- oea
->id
;
508 /* Lastly, make sure the versions that are the same go next to each
509 other. We use SSA_NAME_VERSION because it's stable. */
510 if ((oeb
->rank
- oea
->rank
== 0)
511 && TREE_CODE (oea
->op
) == SSA_NAME
512 && TREE_CODE (oeb
->op
) == SSA_NAME
)
514 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
515 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
517 return oeb
->id
- oea
->id
;
520 if (oeb
->rank
!= oea
->rank
)
521 return oeb
->rank
- oea
->rank
;
523 return oeb
->id
- oea
->id
;
526 /* Add an operand entry to *OPS for the tree operand OP. */
529 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
531 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
534 oe
->rank
= get_rank (op
);
535 oe
->id
= next_operand_entry_id
++;
540 /* Add an operand entry to *OPS for the tree operand OP with repeat
544 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
545 HOST_WIDE_INT repeat
)
547 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
550 oe
->rank
= get_rank (op
);
551 oe
->id
= next_operand_entry_id
++;
555 reassociate_stats
.pows_encountered
++;
558 /* Return true if STMT is reassociable operation containing a binary
559 operation with tree code CODE, and is inside LOOP. */
562 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
564 basic_block bb
= gimple_bb (stmt
);
566 if (gimple_bb (stmt
) == NULL
)
569 if (!flow_bb_inside_loop_p (loop
, bb
))
572 if (is_gimple_assign (stmt
)
573 && gimple_assign_rhs_code (stmt
) == code
574 && has_single_use (gimple_assign_lhs (stmt
)))
581 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
582 operand of the negate operation. Otherwise, return NULL. */
585 get_unary_op (tree name
, enum tree_code opcode
)
587 gimple stmt
= SSA_NAME_DEF_STMT (name
);
589 if (!is_gimple_assign (stmt
))
592 if (gimple_assign_rhs_code (stmt
) == opcode
)
593 return gimple_assign_rhs1 (stmt
);
597 /* If CURR and LAST are a pair of ops that OPCODE allows us to
598 eliminate through equivalences, do so, remove them from OPS, and
599 return true. Otherwise, return false. */
602 eliminate_duplicate_pair (enum tree_code opcode
,
603 vec
<operand_entry_t
> *ops
,
606 operand_entry_t curr
,
607 operand_entry_t last
)
610 /* If we have two of the same op, and the opcode is & |, min, or max,
611 we can eliminate one of them.
612 If we have two of the same op, and the opcode is ^, we can
613 eliminate both of them. */
615 if (last
&& last
->op
== curr
->op
)
623 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
625 fprintf (dump_file
, "Equivalence: ");
626 print_generic_expr (dump_file
, curr
->op
, 0);
627 fprintf (dump_file
, " [&|minmax] ");
628 print_generic_expr (dump_file
, last
->op
, 0);
629 fprintf (dump_file
, " -> ");
630 print_generic_stmt (dump_file
, last
->op
, 0);
633 ops
->ordered_remove (i
);
634 reassociate_stats
.ops_eliminated
++;
639 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
641 fprintf (dump_file
, "Equivalence: ");
642 print_generic_expr (dump_file
, curr
->op
, 0);
643 fprintf (dump_file
, " ^ ");
644 print_generic_expr (dump_file
, last
->op
, 0);
645 fprintf (dump_file
, " -> nothing\n");
648 reassociate_stats
.ops_eliminated
+= 2;
650 if (ops
->length () == 2)
653 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
658 ops
->ordered_remove (i
-1);
659 ops
->ordered_remove (i
-1);
671 static vec
<tree
> plus_negates
;
673 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
674 expression, look in OPS for a corresponding positive operation to cancel
675 it out. If we find one, remove the other from OPS, replace
676 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
680 eliminate_plus_minus_pair (enum tree_code opcode
,
681 vec
<operand_entry_t
> *ops
,
682 unsigned int currindex
,
683 operand_entry_t curr
)
690 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
693 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
694 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
695 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
698 /* Any non-negated version will have a rank that is one less than
699 the current rank. So once we hit those ranks, if we don't find
702 for (i
= currindex
+ 1;
703 ops
->iterate (i
, &oe
)
704 && oe
->rank
>= curr
->rank
- 1 ;
707 if (oe
->op
== negateop
)
710 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
712 fprintf (dump_file
, "Equivalence: ");
713 print_generic_expr (dump_file
, negateop
, 0);
714 fprintf (dump_file
, " + -");
715 print_generic_expr (dump_file
, oe
->op
, 0);
716 fprintf (dump_file
, " -> 0\n");
719 ops
->ordered_remove (i
);
720 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
721 ops
->ordered_remove (currindex
);
722 reassociate_stats
.ops_eliminated
++;
726 else if (oe
->op
== notop
)
728 tree op_type
= TREE_TYPE (oe
->op
);
730 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
732 fprintf (dump_file
, "Equivalence: ");
733 print_generic_expr (dump_file
, notop
, 0);
734 fprintf (dump_file
, " + ~");
735 print_generic_expr (dump_file
, oe
->op
, 0);
736 fprintf (dump_file
, " -> -1\n");
739 ops
->ordered_remove (i
);
740 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
741 ops
->ordered_remove (currindex
);
742 reassociate_stats
.ops_eliminated
++;
748 /* CURR->OP is a negate expr in a plus expr: save it for later
749 inspection in repropagate_negates(). */
750 if (negateop
!= NULL_TREE
)
751 plus_negates
.safe_push (curr
->op
);
756 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
757 bitwise not expression, look in OPS for a corresponding operand to
758 cancel it out. If we find one, remove the other from OPS, replace
759 OPS[CURRINDEX] with 0, and return true. Otherwise, return
763 eliminate_not_pairs (enum tree_code opcode
,
764 vec
<operand_entry_t
> *ops
,
765 unsigned int currindex
,
766 operand_entry_t curr
)
772 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
773 || TREE_CODE (curr
->op
) != SSA_NAME
)
776 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
777 if (notop
== NULL_TREE
)
780 /* Any non-not version will have a rank that is one less than
781 the current rank. So once we hit those ranks, if we don't find
784 for (i
= currindex
+ 1;
785 ops
->iterate (i
, &oe
)
786 && oe
->rank
>= curr
->rank
- 1;
791 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
793 fprintf (dump_file
, "Equivalence: ");
794 print_generic_expr (dump_file
, notop
, 0);
795 if (opcode
== BIT_AND_EXPR
)
796 fprintf (dump_file
, " & ~");
797 else if (opcode
== BIT_IOR_EXPR
)
798 fprintf (dump_file
, " | ~");
799 print_generic_expr (dump_file
, oe
->op
, 0);
800 if (opcode
== BIT_AND_EXPR
)
801 fprintf (dump_file
, " -> 0\n");
802 else if (opcode
== BIT_IOR_EXPR
)
803 fprintf (dump_file
, " -> -1\n");
806 if (opcode
== BIT_AND_EXPR
)
807 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
808 else if (opcode
== BIT_IOR_EXPR
)
809 oe
->op
= build_low_bits_mask (TREE_TYPE (oe
->op
),
810 TYPE_PRECISION (TREE_TYPE (oe
->op
)));
812 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
814 ops
->quick_push (oe
);
822 /* Use constant value that may be present in OPS to try to eliminate
823 operands. Note that this function is only really used when we've
824 eliminated ops for other reasons, or merged constants. Across
825 single statements, fold already does all of this, plus more. There
826 is little point in duplicating logic, so I've only included the
827 identities that I could ever construct testcases to trigger. */
830 eliminate_using_constants (enum tree_code opcode
,
831 vec
<operand_entry_t
> *ops
)
833 operand_entry_t oelast
= ops
->last ();
834 tree type
= TREE_TYPE (oelast
->op
);
836 if (oelast
->rank
== 0
837 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
842 if (integer_zerop (oelast
->op
))
844 if (ops
->length () != 1)
846 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
847 fprintf (dump_file
, "Found & 0, removing all other ops\n");
849 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
852 ops
->quick_push (oelast
);
856 else if (integer_all_onesp (oelast
->op
))
858 if (ops
->length () != 1)
860 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
861 fprintf (dump_file
, "Found & -1, removing\n");
863 reassociate_stats
.ops_eliminated
++;
868 if (integer_all_onesp (oelast
->op
))
870 if (ops
->length () != 1)
872 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
873 fprintf (dump_file
, "Found | -1, removing all other ops\n");
875 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
878 ops
->quick_push (oelast
);
882 else if (integer_zerop (oelast
->op
))
884 if (ops
->length () != 1)
886 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
887 fprintf (dump_file
, "Found | 0, removing\n");
889 reassociate_stats
.ops_eliminated
++;
894 if (integer_zerop (oelast
->op
)
895 || (FLOAT_TYPE_P (type
)
896 && !HONOR_NANS (TYPE_MODE (type
))
897 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type
))
898 && real_zerop (oelast
->op
)))
900 if (ops
->length () != 1)
902 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
903 fprintf (dump_file
, "Found * 0, removing all other ops\n");
905 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
907 ops
->quick_push (oelast
);
911 else if (integer_onep (oelast
->op
)
912 || (FLOAT_TYPE_P (type
)
913 && !HONOR_SNANS (TYPE_MODE (type
))
914 && real_onep (oelast
->op
)))
916 if (ops
->length () != 1)
918 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
919 fprintf (dump_file
, "Found * 1, removing\n");
921 reassociate_stats
.ops_eliminated
++;
929 if (integer_zerop (oelast
->op
)
930 || (FLOAT_TYPE_P (type
)
931 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
932 && fold_real_zero_addition_p (type
, oelast
->op
,
933 opcode
== MINUS_EXPR
)))
935 if (ops
->length () != 1)
937 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
938 fprintf (dump_file
, "Found [|^+] 0, removing\n");
940 reassociate_stats
.ops_eliminated
++;
952 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
955 /* Structure for tracking and counting operands. */
956 typedef struct oecount_s
{
959 enum tree_code oecode
;
964 /* The heap for the oecount hashtable and the sorted list of operands. */
965 static vec
<oecount
> cvec
;
968 /* Oecount hashtable helpers. */
970 struct oecount_hasher
: typed_noop_remove
<void>
972 /* Note that this hash table stores integers, not pointers.
973 So, observe the casting in the member functions. */
974 typedef void value_type
;
975 typedef void compare_type
;
976 static inline hashval_t
hash (const value_type
*);
977 static inline bool equal (const value_type
*, const compare_type
*);
980 /* Hash function for oecount. */
983 oecount_hasher::hash (const value_type
*p
)
985 const oecount
*c
= &cvec
[(size_t)p
- 42];
986 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
989 /* Comparison function for oecount. */
992 oecount_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
994 const oecount
*c1
= &cvec
[(size_t)p1
- 42];
995 const oecount
*c2
= &cvec
[(size_t)p2
- 42];
996 return (c1
->oecode
== c2
->oecode
997 && c1
->op
== c2
->op
);
1000 /* Comparison function for qsort sorting oecount elements by count. */
1003 oecount_cmp (const void *p1
, const void *p2
)
1005 const oecount
*c1
= (const oecount
*)p1
;
1006 const oecount
*c2
= (const oecount
*)p2
;
1007 if (c1
->cnt
!= c2
->cnt
)
1008 return c1
->cnt
- c2
->cnt
;
1010 /* If counts are identical, use unique IDs to stabilize qsort. */
1011 return c1
->id
- c2
->id
;
1014 /* Return TRUE iff STMT represents a builtin call that raises OP
1015 to some exponent. */
1018 stmt_is_power_of_op (gimple stmt
, tree op
)
1022 if (!is_gimple_call (stmt
))
1025 fndecl
= gimple_call_fndecl (stmt
);
1028 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1031 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1033 CASE_FLT_FN (BUILT_IN_POW
):
1034 CASE_FLT_FN (BUILT_IN_POWI
):
1035 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1042 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1043 in place and return the result. Assumes that stmt_is_power_of_op
1044 was previously called for STMT and returned TRUE. */
1046 static HOST_WIDE_INT
1047 decrement_power (gimple stmt
)
1049 REAL_VALUE_TYPE c
, cint
;
1050 HOST_WIDE_INT power
;
1053 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1055 CASE_FLT_FN (BUILT_IN_POW
):
1056 arg1
= gimple_call_arg (stmt
, 1);
1057 c
= TREE_REAL_CST (arg1
);
1058 power
= real_to_integer (&c
) - 1;
1059 real_from_integer (&cint
, VOIDmode
, power
, 0, 0);
1060 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1063 CASE_FLT_FN (BUILT_IN_POWI
):
1064 arg1
= gimple_call_arg (stmt
, 1);
1065 power
= TREE_INT_CST_LOW (arg1
) - 1;
1066 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1074 /* Find the single immediate use of STMT's LHS, and replace it
1075 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1076 replace *DEF with OP as well. */
1079 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1084 gimple_stmt_iterator gsi
;
1086 if (is_gimple_call (stmt
))
1087 lhs
= gimple_call_lhs (stmt
);
1089 lhs
= gimple_assign_lhs (stmt
);
1091 gcc_assert (has_single_use (lhs
));
1092 single_imm_use (lhs
, &use
, &use_stmt
);
1096 if (TREE_CODE (op
) != SSA_NAME
)
1097 update_stmt (use_stmt
);
1098 gsi
= gsi_for_stmt (stmt
);
1099 unlink_stmt_vdef (stmt
);
1100 gsi_remove (&gsi
, true);
1101 release_defs (stmt
);
1104 /* Walks the linear chain with result *DEF searching for an operation
1105 with operand OP and code OPCODE removing that from the chain. *DEF
1106 is updated if there is only one operand but no operation left. */
1109 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1111 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1117 if (opcode
== MULT_EXPR
1118 && stmt_is_power_of_op (stmt
, op
))
1120 if (decrement_power (stmt
) == 1)
1121 propagate_op_to_single_use (op
, stmt
, def
);
1125 name
= gimple_assign_rhs1 (stmt
);
1127 /* If this is the operation we look for and one of the operands
1128 is ours simply propagate the other operand into the stmts
1130 if (gimple_assign_rhs_code (stmt
) == opcode
1132 || gimple_assign_rhs2 (stmt
) == op
))
1135 name
= gimple_assign_rhs2 (stmt
);
1136 propagate_op_to_single_use (name
, stmt
, def
);
1140 /* We might have a multiply of two __builtin_pow* calls, and
1141 the operand might be hiding in the rightmost one. */
1142 if (opcode
== MULT_EXPR
1143 && gimple_assign_rhs_code (stmt
) == opcode
1144 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1145 && has_single_use (gimple_assign_rhs2 (stmt
)))
1147 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1148 if (stmt_is_power_of_op (stmt2
, op
))
1150 if (decrement_power (stmt2
) == 1)
1151 propagate_op_to_single_use (op
, stmt2
, def
);
1156 /* Continue walking the chain. */
1157 gcc_assert (name
!= op
1158 && TREE_CODE (name
) == SSA_NAME
);
1159 stmt
= SSA_NAME_DEF_STMT (name
);
1164 /* Returns true if statement S1 dominates statement S2. Like
1165 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1168 reassoc_stmt_dominates_stmt_p (gimple s1
, gimple s2
)
1170 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1172 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1173 SSA_NAME. Assume it lives at the beginning of function and
1174 thus dominates everything. */
1175 if (!bb1
|| s1
== s2
)
1178 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1184 /* PHIs in the same basic block are assumed to be
1185 executed all in parallel, if only one stmt is a PHI,
1186 it dominates the other stmt in the same basic block. */
1187 if (gimple_code (s1
) == GIMPLE_PHI
)
1190 if (gimple_code (s2
) == GIMPLE_PHI
)
1193 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1195 if (gimple_uid (s1
) < gimple_uid (s2
))
1198 if (gimple_uid (s1
) > gimple_uid (s2
))
1201 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1202 unsigned int uid
= gimple_uid (s1
);
1203 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1205 gimple s
= gsi_stmt (gsi
);
1206 if (gimple_uid (s
) != uid
)
1215 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1218 /* Insert STMT after INSERT_POINT. */
1221 insert_stmt_after (gimple stmt
, gimple insert_point
)
1223 gimple_stmt_iterator gsi
;
1226 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1227 bb
= gimple_bb (insert_point
);
1228 else if (!stmt_ends_bb_p (insert_point
))
1230 gsi
= gsi_for_stmt (insert_point
);
1231 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1232 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1236 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1237 thus if it must end a basic block, it should be a call that can
1238 throw, or some assignment that can throw. If it throws, the LHS
1239 of it will not be initialized though, so only valid places using
1240 the SSA_NAME should be dominated by the fallthru edge. */
1241 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1242 gsi
= gsi_after_labels (bb
);
1243 if (gsi_end_p (gsi
))
1245 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1246 gimple_set_uid (stmt
,
1247 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1250 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1251 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1254 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1255 the result. Places the statement after the definition of either
1256 OP1 or OP2. Returns the new statement. */
1259 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1261 gimple op1def
= NULL
, op2def
= NULL
;
1262 gimple_stmt_iterator gsi
;
1266 /* Create the addition statement. */
1267 op
= make_ssa_name (type
, NULL
);
1268 sum
= gimple_build_assign_with_ops (opcode
, op
, op1
, op2
);
1270 /* Find an insertion place and insert. */
1271 if (TREE_CODE (op1
) == SSA_NAME
)
1272 op1def
= SSA_NAME_DEF_STMT (op1
);
1273 if (TREE_CODE (op2
) == SSA_NAME
)
1274 op2def
= SSA_NAME_DEF_STMT (op2
);
1275 if ((!op1def
|| gimple_nop_p (op1def
))
1276 && (!op2def
|| gimple_nop_p (op2def
)))
1278 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1279 if (gsi_end_p (gsi
))
1281 gimple_stmt_iterator gsi2
1282 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1283 gimple_set_uid (sum
,
1284 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1287 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1288 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1292 gimple insert_point
;
1293 if ((!op1def
|| gimple_nop_p (op1def
))
1294 || (op2def
&& !gimple_nop_p (op2def
)
1295 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1296 insert_point
= op2def
;
1298 insert_point
= op1def
;
1299 insert_stmt_after (sum
, insert_point
);
1306 /* Perform un-distribution of divisions and multiplications.
1307 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1308 to (A + B) / X for real X.
1310 The algorithm is organized as follows.
1312 - First we walk the addition chain *OPS looking for summands that
1313 are defined by a multiplication or a real division. This results
1314 in the candidates bitmap with relevant indices into *OPS.
1316 - Second we build the chains of multiplications or divisions for
1317 these candidates, counting the number of occurrences of (operand, code)
1318 pairs in all of the candidates chains.
1320 - Third we sort the (operand, code) pairs by number of occurrence and
1321 process them starting with the pair with the most uses.
1323 * For each such pair we walk the candidates again to build a
1324 second candidate bitmap noting all multiplication/division chains
1325 that have at least one occurrence of (operand, code).
1327 * We build an alternate addition chain only covering these
1328 candidates with one (operand, code) operation removed from their
1329 multiplication/division chain.
1331 * The first candidate gets replaced by the alternate addition chain
1332 multiplied/divided by the operand.
1334 * All candidate chains get disabled for further processing and
1335 processing of (operand, code) pairs continues.
1337 The alternate addition chains built are re-processed by the main
1338 reassociation algorithm which allows optimizing a * x * y + b * y * x
1339 to (a + b ) * x * y in one invocation of the reassociation pass. */
1342 undistribute_ops_list (enum tree_code opcode
,
1343 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1345 unsigned int length
= ops
->length ();
1346 operand_entry_t oe1
;
1348 sbitmap candidates
, candidates2
;
1349 unsigned nr_candidates
, nr_candidates2
;
1350 sbitmap_iterator sbi0
;
1351 vec
<operand_entry_t
> *subops
;
1352 hash_table
<oecount_hasher
> ctable
;
1353 bool changed
= false;
1354 int next_oecount_id
= 0;
1357 || opcode
!= PLUS_EXPR
)
1360 /* Build a list of candidates to process. */
1361 candidates
= sbitmap_alloc (length
);
1362 bitmap_clear (candidates
);
1364 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1366 enum tree_code dcode
;
1369 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1371 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1372 if (!is_gimple_assign (oe1def
))
1374 dcode
= gimple_assign_rhs_code (oe1def
);
1375 if ((dcode
!= MULT_EXPR
1376 && dcode
!= RDIV_EXPR
)
1377 || !is_reassociable_op (oe1def
, dcode
, loop
))
1380 bitmap_set_bit (candidates
, i
);
1384 if (nr_candidates
< 2)
1386 sbitmap_free (candidates
);
1390 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1392 fprintf (dump_file
, "searching for un-distribute opportunities ");
1393 print_generic_expr (dump_file
,
1394 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1395 fprintf (dump_file
, " %d\n", nr_candidates
);
1398 /* Build linearized sub-operand lists and the counting table. */
1401 /* ??? Macro arguments cannot have multi-argument template types in
1402 them. This typedef is needed to workaround that limitation. */
1403 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1404 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1405 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1408 enum tree_code oecode
;
1411 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1412 oecode
= gimple_assign_rhs_code (oedef
);
1413 linearize_expr_tree (&subops
[i
], oedef
,
1414 associative_tree_code (oecode
), false);
1416 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1423 c
.id
= next_oecount_id
++;
1426 idx
= cvec
.length () + 41;
1427 slot
= ctable
.find_slot ((void *)idx
, INSERT
);
1430 *slot
= (void *)idx
;
1435 cvec
[(size_t)*slot
- 42].cnt
++;
1441 /* Sort the counting table. */
1442 cvec
.qsort (oecount_cmp
);
1444 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1447 fprintf (dump_file
, "Candidates:\n");
1448 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1450 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1451 c
->oecode
== MULT_EXPR
1452 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1453 print_generic_expr (dump_file
, c
->op
, 0);
1454 fprintf (dump_file
, "\n");
1458 /* Process the (operand, code) pairs in order of most occurrence. */
1459 candidates2
= sbitmap_alloc (length
);
1460 while (!cvec
.is_empty ())
1462 oecount
*c
= &cvec
.last ();
1466 /* Now collect the operands in the outer chain that contain
1467 the common operand in their inner chain. */
1468 bitmap_clear (candidates2
);
1470 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1473 enum tree_code oecode
;
1475 tree op
= (*ops
)[i
]->op
;
1477 /* If we undistributed in this chain already this may be
1479 if (TREE_CODE (op
) != SSA_NAME
)
1482 oedef
= SSA_NAME_DEF_STMT (op
);
1483 oecode
= gimple_assign_rhs_code (oedef
);
1484 if (oecode
!= c
->oecode
)
1487 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1489 if (oe1
->op
== c
->op
)
1491 bitmap_set_bit (candidates2
, i
);
1498 if (nr_candidates2
>= 2)
1500 operand_entry_t oe1
, oe2
;
1502 int first
= bitmap_first_set_bit (candidates2
);
1504 /* Build the new addition chain. */
1505 oe1
= (*ops
)[first
];
1506 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1508 fprintf (dump_file
, "Building (");
1509 print_generic_expr (dump_file
, oe1
->op
, 0);
1511 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1512 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1516 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1518 fprintf (dump_file
, " + ");
1519 print_generic_expr (dump_file
, oe2
->op
, 0);
1521 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1522 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1523 oe1
->op
, oe2
->op
, opcode
);
1524 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1526 oe1
->op
= gimple_get_lhs (sum
);
1529 /* Apply the multiplication/division. */
1530 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1531 oe1
->op
, c
->op
, c
->oecode
);
1532 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1534 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1535 print_generic_expr (dump_file
, c
->op
, 0);
1536 fprintf (dump_file
, "\n");
1539 /* Record it in the addition chain and disable further
1540 undistribution with this op. */
1541 oe1
->op
= gimple_assign_lhs (prod
);
1542 oe1
->rank
= get_rank (oe1
->op
);
1543 subops
[first
].release ();
1551 for (i
= 0; i
< ops
->length (); ++i
)
1552 subops
[i
].release ();
1555 sbitmap_free (candidates
);
1556 sbitmap_free (candidates2
);
1561 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1562 expression, examine the other OPS to see if any of them are comparisons
1563 of the same values, which we may be able to combine or eliminate.
1564 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1567 eliminate_redundant_comparison (enum tree_code opcode
,
1568 vec
<operand_entry_t
> *ops
,
1569 unsigned int currindex
,
1570 operand_entry_t curr
)
1573 enum tree_code lcode
, rcode
;
1578 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1581 /* Check that CURR is a comparison. */
1582 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1584 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1585 if (!is_gimple_assign (def1
))
1587 lcode
= gimple_assign_rhs_code (def1
);
1588 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1590 op1
= gimple_assign_rhs1 (def1
);
1591 op2
= gimple_assign_rhs2 (def1
);
1593 /* Now look for a similar comparison in the remaining OPS. */
1594 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1598 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1600 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1601 if (!is_gimple_assign (def2
))
1603 rcode
= gimple_assign_rhs_code (def2
);
1604 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1607 /* If we got here, we have a match. See if we can combine the
1609 if (opcode
== BIT_IOR_EXPR
)
1610 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1611 rcode
, gimple_assign_rhs1 (def2
),
1612 gimple_assign_rhs2 (def2
));
1614 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1615 rcode
, gimple_assign_rhs1 (def2
),
1616 gimple_assign_rhs2 (def2
));
1620 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1621 always give us a boolean_type_node value back. If the original
1622 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1623 we need to convert. */
1624 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1625 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1627 if (TREE_CODE (t
) != INTEGER_CST
1628 && !operand_equal_p (t
, curr
->op
, 0))
1630 enum tree_code subcode
;
1631 tree newop1
, newop2
;
1632 if (!COMPARISON_CLASS_P (t
))
1634 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1635 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1636 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1637 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1641 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1643 fprintf (dump_file
, "Equivalence: ");
1644 print_generic_expr (dump_file
, curr
->op
, 0);
1645 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1646 print_generic_expr (dump_file
, oe
->op
, 0);
1647 fprintf (dump_file
, " -> ");
1648 print_generic_expr (dump_file
, t
, 0);
1649 fprintf (dump_file
, "\n");
1652 /* Now we can delete oe, as it has been subsumed by the new combined
1654 ops
->ordered_remove (i
);
1655 reassociate_stats
.ops_eliminated
++;
1657 /* If t is the same as curr->op, we're done. Otherwise we must
1658 replace curr->op with t. Special case is if we got a constant
1659 back, in which case we add it to the end instead of in place of
1660 the current entry. */
1661 if (TREE_CODE (t
) == INTEGER_CST
)
1663 ops
->ordered_remove (currindex
);
1664 add_to_ops_vec (ops
, t
);
1666 else if (!operand_equal_p (t
, curr
->op
, 0))
1669 enum tree_code subcode
;
1672 gcc_assert (COMPARISON_CLASS_P (t
));
1673 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1674 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1675 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1676 gcc_checking_assert (is_gimple_val (newop1
)
1677 && is_gimple_val (newop2
));
1678 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1679 curr
->op
= gimple_get_lhs (sum
);
1687 /* Perform various identities and other optimizations on the list of
1688 operand entries, stored in OPS. The tree code for the binary
1689 operation between all the operands is OPCODE. */
1692 optimize_ops_list (enum tree_code opcode
,
1693 vec
<operand_entry_t
> *ops
)
1695 unsigned int length
= ops
->length ();
1698 operand_entry_t oelast
= NULL
;
1699 bool iterate
= false;
1704 oelast
= ops
->last ();
1706 /* If the last two are constants, pop the constants off, merge them
1707 and try the next two. */
1708 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1710 operand_entry_t oelm1
= (*ops
)[length
- 2];
1712 if (oelm1
->rank
== 0
1713 && is_gimple_min_invariant (oelm1
->op
)
1714 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1715 TREE_TYPE (oelast
->op
)))
1717 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1718 oelm1
->op
, oelast
->op
);
1720 if (folded
&& is_gimple_min_invariant (folded
))
1722 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1723 fprintf (dump_file
, "Merging constants\n");
1728 add_to_ops_vec (ops
, folded
);
1729 reassociate_stats
.constants_eliminated
++;
1731 optimize_ops_list (opcode
, ops
);
1737 eliminate_using_constants (opcode
, ops
);
1740 for (i
= 0; ops
->iterate (i
, &oe
);)
1744 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1746 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1747 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1748 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1760 length
= ops
->length ();
1761 oelast
= ops
->last ();
1764 optimize_ops_list (opcode
, ops
);
1767 /* The following functions are subroutines to optimize_range_tests and allow
1768 it to try to change a logical combination of comparisons into a range
1772 X == 2 || X == 5 || X == 3 || X == 4
1776 (unsigned) (X - 2) <= 3
1778 For more information see comments above fold_test_range in fold-const.c,
1779 this implementation is for GIMPLE. */
1787 bool strict_overflow_p
;
1788 unsigned int idx
, next
;
1791 /* This is similar to make_range in fold-const.c, but on top of
1792 GIMPLE instead of trees. If EXP is non-NULL, it should be
1793 an SSA_NAME and STMT argument is ignored, otherwise STMT
1794 argument should be a GIMPLE_COND. */
1797 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1801 bool is_bool
, strict_overflow_p
;
1805 r
->strict_overflow_p
= false;
1807 r
->high
= NULL_TREE
;
1808 if (exp
!= NULL_TREE
1809 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1812 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1813 and see if we can refine the range. Some of the cases below may not
1814 happen, but it doesn't seem worth worrying about this. We "continue"
1815 the outer loop when we've changed something; otherwise we "break"
1816 the switch, which will "break" the while. */
1817 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1820 strict_overflow_p
= false;
1822 if (exp
== NULL_TREE
)
1824 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1826 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1831 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1836 enum tree_code code
;
1837 tree arg0
, arg1
, exp_type
;
1841 if (exp
!= NULL_TREE
)
1843 if (TREE_CODE (exp
) != SSA_NAME
)
1846 stmt
= SSA_NAME_DEF_STMT (exp
);
1847 if (!is_gimple_assign (stmt
))
1850 code
= gimple_assign_rhs_code (stmt
);
1851 arg0
= gimple_assign_rhs1 (stmt
);
1852 arg1
= gimple_assign_rhs2 (stmt
);
1853 exp_type
= TREE_TYPE (exp
);
1857 code
= gimple_cond_code (stmt
);
1858 arg0
= gimple_cond_lhs (stmt
);
1859 arg1
= gimple_cond_rhs (stmt
);
1860 exp_type
= boolean_type_node
;
1863 if (TREE_CODE (arg0
) != SSA_NAME
)
1865 loc
= gimple_location (stmt
);
1869 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1870 /* Ensure the range is either +[-,0], +[0,0],
1871 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1872 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1873 or similar expression of unconditional true or
1874 false, it should not be negated. */
1875 && ((high
&& integer_zerop (high
))
1876 || (low
&& integer_onep (low
))))
1889 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1891 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1896 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1911 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1913 &strict_overflow_p
);
1914 if (nexp
!= NULL_TREE
)
1917 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
1930 r
->strict_overflow_p
= strict_overflow_p
;
1934 /* Comparison function for qsort. Sort entries
1935 without SSA_NAME exp first, then with SSA_NAMEs sorted
1936 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1937 by increasing ->low and if ->low is the same, by increasing
1938 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1942 range_entry_cmp (const void *a
, const void *b
)
1944 const struct range_entry
*p
= (const struct range_entry
*) a
;
1945 const struct range_entry
*q
= (const struct range_entry
*) b
;
1947 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
1949 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1951 /* Group range_entries for the same SSA_NAME together. */
1952 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
1954 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
1956 /* If ->low is different, NULL low goes first, then by
1958 if (p
->low
!= NULL_TREE
)
1960 if (q
->low
!= NULL_TREE
)
1962 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1964 if (tem
&& integer_onep (tem
))
1966 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
1968 if (tem
&& integer_onep (tem
))
1974 else if (q
->low
!= NULL_TREE
)
1976 /* If ->high is different, NULL high goes last, before that by
1978 if (p
->high
!= NULL_TREE
)
1980 if (q
->high
!= NULL_TREE
)
1982 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1984 if (tem
&& integer_onep (tem
))
1986 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
1988 if (tem
&& integer_onep (tem
))
1994 else if (p
->high
!= NULL_TREE
)
1996 /* If both ranges are the same, sort below by ascending idx. */
2001 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2004 if (p
->idx
< q
->idx
)
2008 gcc_checking_assert (p
->idx
> q
->idx
);
2013 /* Helper routine of optimize_range_test.
2014 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2015 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2016 OPCODE and OPS are arguments of optimize_range_tests. Return
2017 true if the range merge has been successful.
2018 If OPCODE is ERROR_MARK, this is called from within
2019 maybe_optimize_range_tests and is performing inter-bb range optimization.
2020 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2024 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2025 unsigned int count
, enum tree_code opcode
,
2026 vec
<operand_entry_t
> *ops
, tree exp
, bool in_p
,
2027 tree low
, tree high
, bool strict_overflow_p
)
2029 operand_entry_t oe
= (*ops
)[range
->idx
];
2031 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) :
2032 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2033 location_t loc
= gimple_location (stmt
);
2034 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2035 tree tem
= build_range_check (loc
, optype
, exp
, in_p
, low
, high
);
2036 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2037 gimple_stmt_iterator gsi
;
2039 if (tem
== NULL_TREE
)
2042 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2043 warning_at (loc
, OPT_Wstrict_overflow
,
2044 "assuming signed overflow does not occur "
2045 "when simplifying range test");
2047 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2049 struct range_entry
*r
;
2050 fprintf (dump_file
, "Optimizing range tests ");
2051 print_generic_expr (dump_file
, range
->exp
, 0);
2052 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2053 print_generic_expr (dump_file
, range
->low
, 0);
2054 fprintf (dump_file
, ", ");
2055 print_generic_expr (dump_file
, range
->high
, 0);
2056 fprintf (dump_file
, "]");
2057 for (r
= otherrange
; r
< otherrange
+ count
; r
++)
2059 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2060 print_generic_expr (dump_file
, r
->low
, 0);
2061 fprintf (dump_file
, ", ");
2062 print_generic_expr (dump_file
, r
->high
, 0);
2063 fprintf (dump_file
, "]");
2065 fprintf (dump_file
, "\n into ");
2066 print_generic_expr (dump_file
, tem
, 0);
2067 fprintf (dump_file
, "\n");
2070 if (opcode
== BIT_IOR_EXPR
2071 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2072 tem
= invert_truthvalue_loc (loc
, tem
);
2074 tem
= fold_convert_loc (loc
, optype
, tem
);
2075 gsi
= gsi_for_stmt (stmt
);
2076 /* In rare cases range->exp can be equal to lhs of stmt.
2077 In that case we have to insert after the stmt rather then before
2079 if (op
== range
->exp
)
2080 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2081 GSI_CONTINUE_LINKING
);
2084 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2088 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2089 if (gimple_uid (gsi_stmt (gsi
)))
2092 gimple_set_uid (gsi_stmt (gsi
), gimple_uid (stmt
));
2099 range
->strict_overflow_p
= false;
2101 for (range
= otherrange
; range
< otherrange
+ count
; range
++)
2103 oe
= (*ops
)[range
->idx
];
2104 /* Now change all the other range test immediate uses, so that
2105 those tests will be optimized away. */
2106 if (opcode
== ERROR_MARK
)
2109 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2110 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2112 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2113 ? boolean_false_node
: boolean_true_node
);
2116 oe
->op
= error_mark_node
;
2117 range
->exp
= NULL_TREE
;
2122 /* Optimize X == CST1 || X == CST2
2123 if popcount (CST1 ^ CST2) == 1 into
2124 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2125 Similarly for ranges. E.g.
2126 X != 2 && X != 3 && X != 10 && X != 11
2127 will be transformed by the previous optimization into
2128 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2129 and this loop can transform that into
2130 !(((X & ~8) - 2U) <= 1U). */
2133 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2134 tree lowi
, tree lowj
, tree highi
, tree highj
,
2135 vec
<operand_entry_t
> *ops
,
2136 struct range_entry
*rangei
,
2137 struct range_entry
*rangej
)
2139 tree lowxor
, highxor
, tem
, exp
;
2140 /* Check highi ^ lowi == highj ^ lowj and
2141 popcount (highi ^ lowi) == 1. */
2142 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2143 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2145 if (tree_log2 (lowxor
) < 0)
2147 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2148 if (!tree_int_cst_equal (lowxor
, highxor
))
2151 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2152 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2153 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2154 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2155 if (update_range_test (rangei
, rangej
, 1, opcode
, ops
, exp
,
2156 rangei
->in_p
, lowj
, highj
,
2157 rangei
->strict_overflow_p
2158 || rangej
->strict_overflow_p
))
2163 /* Optimize X == CST1 || X == CST2
2164 if popcount (CST2 - CST1) == 1 into
2165 ((X - CST1) & ~(CST2 - CST1)) == 0.
2166 Similarly for ranges. E.g.
2167 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2168 || X == 75 || X == 45
2169 will be transformed by the previous optimization into
2170 (X - 43U) <= 3U || (X - 75U) <= 3U
2171 and this loop can transform that into
2172 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2174 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2175 tree lowi
, tree lowj
, tree highi
, tree highj
,
2176 vec
<operand_entry_t
> *ops
,
2177 struct range_entry
*rangei
,
2178 struct range_entry
*rangej
)
2180 tree tem1
, tem2
, mask
;
2181 /* Check highi - lowi == highj - lowj. */
2182 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2183 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2185 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2186 if (!tree_int_cst_equal (tem1
, tem2
))
2188 /* Check popcount (lowj - lowi) == 1. */
2189 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2190 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2192 if (tree_log2 (tem1
) < 0)
2195 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2196 tem1
= fold_binary (MINUS_EXPR
, type
, rangei
->exp
, lowi
);
2197 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2198 lowj
= build_int_cst (type
, 0);
2199 if (update_range_test (rangei
, rangej
, 1, opcode
, ops
, tem1
,
2200 rangei
->in_p
, lowj
, tem2
,
2201 rangei
->strict_overflow_p
2202 || rangej
->strict_overflow_p
))
2207 /* It does some common checks for function optimize_range_tests_xor and
2208 optimize_range_tests_diff.
2209 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2210 Else it calls optimize_range_tests_diff. */
2213 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2214 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2215 struct range_entry
*ranges
)
2218 bool any_changes
= false;
2219 for (i
= first
; i
< length
; i
++)
2221 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2223 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2225 type
= TREE_TYPE (ranges
[i
].exp
);
2226 if (!INTEGRAL_TYPE_P (type
))
2228 lowi
= ranges
[i
].low
;
2229 if (lowi
== NULL_TREE
)
2230 lowi
= TYPE_MIN_VALUE (type
);
2231 highi
= ranges
[i
].high
;
2232 if (highi
== NULL_TREE
)
2234 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2237 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2239 lowj
= ranges
[j
].low
;
2240 if (lowj
== NULL_TREE
)
2242 highj
= ranges
[j
].high
;
2243 if (highj
== NULL_TREE
)
2244 highj
= TYPE_MAX_VALUE (type
);
2245 /* Check lowj > highi. */
2246 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2248 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2251 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2253 ranges
+ i
, ranges
+ j
);
2255 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2257 ranges
+ i
, ranges
+ j
);
2268 /* Optimize range tests, similarly how fold_range_test optimizes
2269 it on trees. The tree code for the binary
2270 operation between all the operands is OPCODE.
2271 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2272 maybe_optimize_range_tests for inter-bb range optimization.
2273 In that case if oe->op is NULL, oe->id is bb->index whose
2274 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2275 the actual opcode. */
2278 optimize_range_tests (enum tree_code opcode
,
2279 vec
<operand_entry_t
> *ops
)
2281 unsigned int length
= ops
->length (), i
, j
, first
;
2283 struct range_entry
*ranges
;
2284 bool any_changes
= false;
2289 ranges
= XNEWVEC (struct range_entry
, length
);
2290 for (i
= 0; i
< length
; i
++)
2294 init_range_entry (ranges
+ i
, oe
->op
,
2296 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2297 /* For | invert it now, we will invert it again before emitting
2298 the optimized expression. */
2299 if (opcode
== BIT_IOR_EXPR
2300 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2301 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2304 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2305 for (i
= 0; i
< length
; i
++)
2306 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2309 /* Try to merge ranges. */
2310 for (first
= i
; i
< length
; i
++)
2312 tree low
= ranges
[i
].low
;
2313 tree high
= ranges
[i
].high
;
2314 int in_p
= ranges
[i
].in_p
;
2315 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2316 int update_fail_count
= 0;
2318 for (j
= i
+ 1; j
< length
; j
++)
2320 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2322 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2323 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2325 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2331 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, j
- i
- 1, opcode
,
2332 ops
, ranges
[i
].exp
, in_p
, low
, high
,
2338 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2339 while update_range_test would fail. */
2340 else if (update_fail_count
== 64)
2343 ++update_fail_count
;
2346 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2349 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2350 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2353 if (any_changes
&& opcode
!= ERROR_MARK
)
2356 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2358 if (oe
->op
== error_mark_node
)
2367 XDELETEVEC (ranges
);
2371 /* Return true if STMT is a cast like:
2377 # _345 = PHI <_123(N), 1(...), 1(...)>
2378 where _234 has bool type, _123 has single use and
2379 bb N has a single successor M. This is commonly used in
2380 the last block of a range test. */
2383 final_range_test_p (gimple stmt
)
2385 basic_block bb
, rhs_bb
;
2388 use_operand_p use_p
;
2391 if (!gimple_assign_cast_p (stmt
))
2393 bb
= gimple_bb (stmt
);
2394 if (!single_succ_p (bb
))
2396 e
= single_succ_edge (bb
);
2397 if (e
->flags
& EDGE_COMPLEX
)
2400 lhs
= gimple_assign_lhs (stmt
);
2401 rhs
= gimple_assign_rhs1 (stmt
);
2402 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2403 || TREE_CODE (rhs
) != SSA_NAME
2404 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2407 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2408 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2411 if (gimple_code (use_stmt
) != GIMPLE_PHI
2412 || gimple_bb (use_stmt
) != e
->dest
)
2415 /* And that the rhs is defined in the same loop. */
2416 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2418 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2424 /* Return true if BB is suitable basic block for inter-bb range test
2425 optimization. If BACKWARD is true, BB should be the only predecessor
2426 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2427 or compared with to find a common basic block to which all conditions
2428 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2429 be the only predecessor of BB. */
2432 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2435 edge_iterator ei
, ei2
;
2438 gimple_stmt_iterator gsi
;
2439 bool other_edge_seen
= false;
2444 /* Check last stmt first. */
2445 stmt
= last_stmt (bb
);
2447 || (gimple_code (stmt
) != GIMPLE_COND
2448 && (backward
|| !final_range_test_p (stmt
)))
2449 || gimple_visited_p (stmt
)
2450 || stmt_could_throw_p (stmt
)
2453 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2456 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2457 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2458 to *OTHER_BB (if not set yet, try to find it out). */
2459 if (EDGE_COUNT (bb
->succs
) != 2)
2461 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2463 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2465 if (e
->dest
== test_bb
)
2474 if (*other_bb
== NULL
)
2476 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2477 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2479 else if (e
->dest
== e2
->dest
)
2480 *other_bb
= e
->dest
;
2481 if (*other_bb
== NULL
)
2484 if (e
->dest
== *other_bb
)
2485 other_edge_seen
= true;
2489 if (*other_bb
== NULL
|| !other_edge_seen
)
2492 else if (single_succ (bb
) != *other_bb
)
2495 /* Now check all PHIs of *OTHER_BB. */
2496 e
= find_edge (bb
, *other_bb
);
2497 e2
= find_edge (test_bb
, *other_bb
);
2498 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2500 gimple phi
= gsi_stmt (gsi
);
2501 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2502 corresponding to BB and TEST_BB predecessor must be the same. */
2503 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2504 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2506 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2507 one of the PHIs should have the lhs of the last stmt in
2508 that block as PHI arg and that PHI should have 0 or 1
2509 corresponding to it in all other range test basic blocks
2513 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2514 == gimple_assign_lhs (stmt
)
2515 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2516 || integer_onep (gimple_phi_arg_def (phi
,
2522 gimple test_last
= last_stmt (test_bb
);
2523 if (gimple_code (test_last
) != GIMPLE_COND
2524 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2525 == gimple_assign_lhs (test_last
)
2526 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2527 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2537 /* Return true if BB doesn't have side-effects that would disallow
2538 range test optimization, all SSA_NAMEs set in the bb are consumed
2539 in the bb and there are no PHIs. */
2542 no_side_effect_bb (basic_block bb
)
2544 gimple_stmt_iterator gsi
;
2547 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2549 last
= last_stmt (bb
);
2550 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2552 gimple stmt
= gsi_stmt (gsi
);
2554 imm_use_iterator imm_iter
;
2555 use_operand_p use_p
;
2557 if (is_gimple_debug (stmt
))
2559 if (gimple_has_side_effects (stmt
))
2563 if (!is_gimple_assign (stmt
))
2565 lhs
= gimple_assign_lhs (stmt
);
2566 if (TREE_CODE (lhs
) != SSA_NAME
)
2568 if (gimple_assign_rhs_could_trap_p (stmt
))
2570 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2572 gimple use_stmt
= USE_STMT (use_p
);
2573 if (is_gimple_debug (use_stmt
))
2575 if (gimple_bb (use_stmt
) != bb
)
2582 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2583 return true and fill in *OPS recursively. */
2586 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2589 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2593 if (!is_reassociable_op (stmt
, code
, loop
))
2596 rhs
[0] = gimple_assign_rhs1 (stmt
);
2597 rhs
[1] = gimple_assign_rhs2 (stmt
);
2598 gimple_set_visited (stmt
, true);
2599 for (i
= 0; i
< 2; i
++)
2600 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2601 && !get_ops (rhs
[i
], code
, ops
, loop
)
2602 && has_single_use (rhs
[i
]))
2604 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
2610 ops
->safe_push (oe
);
2615 /* Find the ops that were added by get_ops starting from VAR, see if
2616 they were changed during update_range_test and if yes, create new
2620 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
2621 unsigned int *pidx
, struct loop
*loop
)
2623 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2627 if (!is_reassociable_op (stmt
, code
, loop
))
2630 rhs
[0] = gimple_assign_rhs1 (stmt
);
2631 rhs
[1] = gimple_assign_rhs2 (stmt
);
2634 for (i
= 0; i
< 2; i
++)
2635 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
2637 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
2638 if (rhs
[2 + i
] == NULL_TREE
)
2640 if (has_single_use (rhs
[i
]))
2641 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
2643 rhs
[2 + i
] = rhs
[i
];
2646 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
2647 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
2649 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
2650 var
= make_ssa_name (TREE_TYPE (var
), NULL
);
2651 gimple g
= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
2652 var
, rhs
[2], rhs
[3]);
2653 gimple_set_uid (g
, gimple_uid (stmt
));
2654 gimple_set_visited (g
, true);
2655 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2660 /* Structure to track the initial value passed to get_ops and
2661 the range in the ops vector for each basic block. */
2663 struct inter_bb_range_test_entry
2666 unsigned int first_idx
, last_idx
;
2669 /* Inter-bb range test optimization. */
2672 maybe_optimize_range_tests (gimple stmt
)
2674 basic_block first_bb
= gimple_bb (stmt
);
2675 basic_block last_bb
= first_bb
;
2676 basic_block other_bb
= NULL
;
2680 auto_vec
<operand_entry_t
> ops
;
2681 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
2682 bool any_changes
= false;
2684 /* Consider only basic blocks that end with GIMPLE_COND or
2685 a cast statement satisfying final_range_test_p. All
2686 but the last bb in the first_bb .. last_bb range
2687 should end with GIMPLE_COND. */
2688 if (gimple_code (stmt
) == GIMPLE_COND
)
2690 if (EDGE_COUNT (first_bb
->succs
) != 2)
2693 else if (final_range_test_p (stmt
))
2694 other_bb
= single_succ (first_bb
);
2698 if (stmt_could_throw_p (stmt
))
2701 /* As relative ordering of post-dominator sons isn't fixed,
2702 maybe_optimize_range_tests can be called first on any
2703 bb in the range we want to optimize. So, start searching
2704 backwards, if first_bb can be set to a predecessor. */
2705 while (single_pred_p (first_bb
))
2707 basic_block pred_bb
= single_pred (first_bb
);
2708 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
2710 if (!no_side_effect_bb (first_bb
))
2714 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2715 Before starting forward search in last_bb successors, find
2716 out the other_bb. */
2717 if (first_bb
== last_bb
)
2720 /* As non-GIMPLE_COND last stmt always terminates the range,
2721 if forward search didn't discover anything, just give up. */
2722 if (gimple_code (stmt
) != GIMPLE_COND
)
2724 /* Look at both successors. Either it ends with a GIMPLE_COND
2725 and satisfies suitable_cond_bb, or ends with a cast and
2726 other_bb is that cast's successor. */
2727 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
2728 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
2729 || e
->dest
== first_bb
)
2731 else if (single_pred_p (e
->dest
))
2733 stmt
= last_stmt (e
->dest
);
2735 && gimple_code (stmt
) == GIMPLE_COND
2736 && EDGE_COUNT (e
->dest
->succs
) == 2)
2738 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
2744 && final_range_test_p (stmt
)
2745 && find_edge (first_bb
, single_succ (e
->dest
)))
2747 other_bb
= single_succ (e
->dest
);
2748 if (other_bb
== first_bb
)
2752 if (other_bb
== NULL
)
2755 /* Now do the forward search, moving last_bb to successor bbs
2756 that aren't other_bb. */
2757 while (EDGE_COUNT (last_bb
->succs
) == 2)
2759 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
2760 if (e
->dest
!= other_bb
)
2764 if (!single_pred_p (e
->dest
))
2766 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
2768 if (!no_side_effect_bb (e
->dest
))
2772 if (first_bb
== last_bb
)
2774 /* Here basic blocks first_bb through last_bb's predecessor
2775 end with GIMPLE_COND, all of them have one of the edges to
2776 other_bb and another to another block in the range,
2777 all blocks except first_bb don't have side-effects and
2778 last_bb ends with either GIMPLE_COND, or cast satisfying
2779 final_range_test_p. */
2780 for (bb
= last_bb
; ; bb
= single_pred (bb
))
2782 enum tree_code code
;
2784 inter_bb_range_test_entry bb_ent
;
2786 bb_ent
.op
= NULL_TREE
;
2787 bb_ent
.first_idx
= ops
.length ();
2788 bb_ent
.last_idx
= bb_ent
.first_idx
;
2789 e
= find_edge (bb
, other_bb
);
2790 stmt
= last_stmt (bb
);
2791 gimple_set_visited (stmt
, true);
2792 if (gimple_code (stmt
) != GIMPLE_COND
)
2794 use_operand_p use_p
;
2799 lhs
= gimple_assign_lhs (stmt
);
2800 rhs
= gimple_assign_rhs1 (stmt
);
2801 gcc_assert (bb
== last_bb
);
2808 # _345 = PHI <_123(N), 1(...), 1(...)>
2810 or 0 instead of 1. If it is 0, the _234
2811 range test is anded together with all the
2812 other range tests, if it is 1, it is ored with
2814 single_imm_use (lhs
, &use_p
, &phi
);
2815 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
2816 e2
= find_edge (first_bb
, other_bb
);
2818 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
2819 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
2820 code
= BIT_AND_EXPR
;
2823 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
2824 code
= BIT_IOR_EXPR
;
2827 /* If _234 SSA_NAME_DEF_STMT is
2829 (or &, corresponding to 1/0 in the phi arguments,
2830 push into ops the individual range test arguments
2831 of the bitwise or resp. and, recursively. */
2832 if (!get_ops (rhs
, code
, &ops
,
2833 loop_containing_stmt (stmt
))
2834 && has_single_use (rhs
))
2836 /* Otherwise, push the _234 range test itself. */
2838 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
2848 bb_ent
.last_idx
= ops
.length ();
2850 bbinfo
.safe_push (bb_ent
);
2853 /* Otherwise stmt is GIMPLE_COND. */
2854 code
= gimple_cond_code (stmt
);
2855 lhs
= gimple_cond_lhs (stmt
);
2856 rhs
= gimple_cond_rhs (stmt
);
2857 if (TREE_CODE (lhs
) == SSA_NAME
2858 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2859 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
2860 || rhs
!= boolean_false_node
2861 /* Either push into ops the individual bitwise
2862 or resp. and operands, depending on which
2863 edge is other_bb. */
2864 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
2865 ^ (code
== EQ_EXPR
))
2866 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
2867 loop_containing_stmt (stmt
))))
2869 /* Or push the GIMPLE_COND stmt itself. */
2871 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
2874 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
2875 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
2876 /* oe->op = NULL signs that there is no SSA_NAME
2877 for the range test, and oe->id instead is the
2878 basic block number, at which's end the GIMPLE_COND
2886 else if (ops
.length () > bb_ent
.first_idx
)
2889 bb_ent
.last_idx
= ops
.length ();
2891 bbinfo
.safe_push (bb_ent
);
2895 if (ops
.length () > 1)
2896 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
2900 /* update_ops relies on has_single_use predicates returning the
2901 same values as it did during get_ops earlier. Additionally it
2902 never removes statements, only adds new ones and it should walk
2903 from the single imm use and check the predicate already before
2904 making those changes.
2905 On the other side, the handling of GIMPLE_COND directly can turn
2906 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2907 it needs to be done in a separate loop afterwards. */
2908 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
2910 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
2911 && bbinfo
[idx
].op
!= NULL_TREE
)
2915 stmt
= last_stmt (bb
);
2916 new_op
= update_ops (bbinfo
[idx
].op
,
2918 ops
[bbinfo
[idx
].first_idx
]->rank
,
2919 ops
, &bbinfo
[idx
].first_idx
,
2920 loop_containing_stmt (stmt
));
2921 if (new_op
== NULL_TREE
)
2923 gcc_assert (bb
== last_bb
);
2924 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
2926 if (bbinfo
[idx
].op
!= new_op
)
2928 imm_use_iterator iter
;
2929 use_operand_p use_p
;
2930 gimple use_stmt
, cast_stmt
= NULL
;
2932 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
2933 if (is_gimple_debug (use_stmt
))
2935 else if (gimple_code (use_stmt
) == GIMPLE_COND
2936 || gimple_code (use_stmt
) == GIMPLE_PHI
)
2937 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2938 SET_USE (use_p
, new_op
);
2939 else if (gimple_assign_cast_p (use_stmt
))
2940 cast_stmt
= use_stmt
;
2945 gcc_assert (bb
== last_bb
);
2946 tree lhs
= gimple_assign_lhs (cast_stmt
);
2947 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
2948 enum tree_code rhs_code
2949 = gimple_assign_rhs_code (cast_stmt
);
2951 if (is_gimple_min_invariant (new_op
))
2953 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
2954 g
= gimple_build_assign (new_lhs
, new_op
);
2957 g
= gimple_build_assign_with_ops (rhs_code
, new_lhs
,
2959 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
2960 gimple_set_uid (g
, gimple_uid (cast_stmt
));
2961 gimple_set_visited (g
, true);
2962 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2963 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
2964 if (is_gimple_debug (use_stmt
))
2966 else if (gimple_code (use_stmt
) == GIMPLE_COND
2967 || gimple_code (use_stmt
) == GIMPLE_PHI
)
2968 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2969 SET_USE (use_p
, new_lhs
);
2978 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
2980 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
2981 && bbinfo
[idx
].op
== NULL_TREE
2982 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
2984 stmt
= last_stmt (bb
);
2985 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
2986 gimple_cond_make_false (stmt
);
2987 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
2988 gimple_cond_make_true (stmt
);
2991 gimple_cond_set_code (stmt
, NE_EXPR
);
2992 gimple_cond_set_lhs (stmt
, ops
[bbinfo
[idx
].first_idx
]->op
);
2993 gimple_cond_set_rhs (stmt
, boolean_false_node
);
3003 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3004 of STMT in it's operands. This is also known as a "destructive
3005 update" operation. */
3008 is_phi_for_stmt (gimple stmt
, tree operand
)
3012 use_operand_p arg_p
;
3015 if (TREE_CODE (operand
) != SSA_NAME
)
3018 lhs
= gimple_assign_lhs (stmt
);
3020 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3021 if (gimple_code (def_stmt
) != GIMPLE_PHI
)
3024 FOR_EACH_PHI_ARG (arg_p
, def_stmt
, i
, SSA_OP_USE
)
3025 if (lhs
== USE_FROM_PTR (arg_p
))
3030 /* Remove def stmt of VAR if VAR has zero uses and recurse
3031 on rhs1 operand if so. */
3034 remove_visited_stmt_chain (tree var
)
3037 gimple_stmt_iterator gsi
;
3041 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3043 stmt
= SSA_NAME_DEF_STMT (var
);
3044 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3046 var
= gimple_assign_rhs1 (stmt
);
3047 gsi
= gsi_for_stmt (stmt
);
3048 gsi_remove (&gsi
, true);
3049 release_defs (stmt
);
3056 /* This function checks three consequtive operands in
3057 passed operands vector OPS starting from OPINDEX and
3058 swaps two operands if it is profitable for binary operation
3059 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3061 We pair ops with the same rank if possible.
3063 The alternative we try is to see if STMT is a destructive
3064 update style statement, which is like:
3067 In that case, we want to use the destructive update form to
3068 expose the possible vectorizer sum reduction opportunity.
3069 In that case, the third operand will be the phi node. This
3070 check is not performed if STMT is null.
3072 We could, of course, try to be better as noted above, and do a
3073 lot of work to try to find these opportunities in >3 operand
3074 cases, but it is unlikely to be worth it. */
3077 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3078 unsigned int opindex
, gimple stmt
)
3080 operand_entry_t oe1
, oe2
, oe3
;
3083 oe2
= ops
[opindex
+ 1];
3084 oe3
= ops
[opindex
+ 2];
3086 if ((oe1
->rank
== oe2
->rank
3087 && oe2
->rank
!= oe3
->rank
)
3088 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3089 && !is_phi_for_stmt (stmt
, oe1
->op
)
3090 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3092 struct operand_entry temp
= *oe3
;
3094 oe3
->rank
= oe1
->rank
;
3096 oe1
->rank
= temp
.rank
;
3098 else if ((oe1
->rank
== oe3
->rank
3099 && oe2
->rank
!= oe3
->rank
)
3100 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3101 && !is_phi_for_stmt (stmt
, oe1
->op
)
3102 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3104 struct operand_entry temp
= *oe2
;
3106 oe2
->rank
= oe1
->rank
;
3108 oe1
->rank
= temp
.rank
;
3112 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3113 two definitions, otherwise return STMT. */
3115 static inline gimple
3116 find_insert_point (gimple stmt
, tree rhs1
, tree rhs2
)
3118 if (TREE_CODE (rhs1
) == SSA_NAME
3119 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3120 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3121 if (TREE_CODE (rhs2
) == SSA_NAME
3122 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3123 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3127 /* Recursively rewrite our linearized statements so that the operators
3128 match those in OPS[OPINDEX], putting the computation in rank
3129 order. Return new lhs. */
3132 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3133 vec
<operand_entry_t
> ops
, bool changed
)
3135 tree rhs1
= gimple_assign_rhs1 (stmt
);
3136 tree rhs2
= gimple_assign_rhs2 (stmt
);
3137 tree lhs
= gimple_assign_lhs (stmt
);
3140 /* The final recursion case for this function is that you have
3141 exactly two operations left.
3142 If we had one exactly one op in the entire list to start with, we
3143 would have never called this function, and the tail recursion
3144 rewrites them one at a time. */
3145 if (opindex
+ 2 == ops
.length ())
3147 operand_entry_t oe1
, oe2
;
3150 oe2
= ops
[opindex
+ 1];
3152 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3154 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3155 unsigned int uid
= gimple_uid (stmt
);
3157 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3159 fprintf (dump_file
, "Transforming ");
3160 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3165 gimple insert_point
= find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3166 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3168 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
3169 lhs
, oe1
->op
, oe2
->op
);
3170 gimple_set_uid (stmt
, uid
);
3171 gimple_set_visited (stmt
, true);
3172 if (insert_point
== gsi_stmt (gsi
))
3173 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3175 insert_stmt_after (stmt
, insert_point
);
3179 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3181 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3182 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3186 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3187 remove_visited_stmt_chain (rhs1
);
3189 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3191 fprintf (dump_file
, " into ");
3192 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3198 /* If we hit here, we should have 3 or more ops left. */
3199 gcc_assert (opindex
+ 2 < ops
.length ());
3201 /* Rewrite the next operator. */
3204 /* Recurse on the LHS of the binary operator, which is guaranteed to
3205 be the non-leaf side. */
3207 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3208 changed
|| oe
->op
!= rhs2
);
3210 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3212 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3214 fprintf (dump_file
, "Transforming ");
3215 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3218 /* If changed is false, this is either opindex == 0
3219 or all outer rhs2's were equal to corresponding oe->op,
3220 and powi_result is NULL.
3221 That means lhs is equivalent before and after reassociation.
3222 Otherwise ensure the old lhs SSA_NAME is not reused and
3223 create a new stmt as well, so that any debug stmts will be
3224 properly adjusted. */
3227 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3228 unsigned int uid
= gimple_uid (stmt
);
3229 gimple insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3231 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3232 stmt
= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
3233 lhs
, new_rhs1
, oe
->op
);
3234 gimple_set_uid (stmt
, uid
);
3235 gimple_set_visited (stmt
, true);
3236 if (insert_point
== gsi_stmt (gsi
))
3237 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3239 insert_stmt_after (stmt
, insert_point
);
3243 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3245 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3246 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3250 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3252 fprintf (dump_file
, " into ");
3253 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3259 /* Find out how many cycles we need to compute statements chain.
3260 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3261 maximum number of independent statements we may execute per cycle. */
3264 get_required_cycles (int ops_num
, int cpu_width
)
3270 /* While we have more than 2 * cpu_width operands
3271 we may reduce number of operands by cpu_width
3273 res
= ops_num
/ (2 * cpu_width
);
3275 /* Remained operands count may be reduced twice per cycle
3276 until we have only one operand. */
3277 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3278 elog
= exact_log2 (rest
);
3282 res
+= floor_log2 (rest
) + 1;
3287 /* Returns an optimal number of registers to use for computation of
3288 given statements. */
3291 get_reassociation_width (int ops_num
, enum tree_code opc
,
3292 enum machine_mode mode
)
3294 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3299 if (param_width
> 0)
3300 width
= param_width
;
3302 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3307 /* Get the minimal time required for sequence computation. */
3308 cycles_best
= get_required_cycles (ops_num
, width
);
3310 /* Check if we may use less width and still compute sequence for
3311 the same time. It will allow us to reduce registers usage.
3312 get_required_cycles is monotonically increasing with lower width
3313 so we can perform a binary search for the minimal width that still
3314 results in the optimal cycle count. */
3316 while (width
> width_min
)
3318 int width_mid
= (width
+ width_min
) / 2;
3320 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3322 else if (width_min
< width_mid
)
3323 width_min
= width_mid
;
3331 /* Recursively rewrite our linearized statements so that the operators
3332 match those in OPS[OPINDEX], putting the computation in rank
3333 order and trying to allow operations to be executed in
3337 rewrite_expr_tree_parallel (gimple stmt
, int width
,
3338 vec
<operand_entry_t
> ops
)
3340 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3341 int op_num
= ops
.length ();
3342 int stmt_num
= op_num
- 1;
3343 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3344 int op_index
= op_num
- 1;
3346 int ready_stmts_end
= 0;
3348 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3350 /* We start expression rewriting from the top statements.
3351 So, in this loop we create a full list of statements
3352 we will work with. */
3353 stmts
[stmt_num
- 1] = stmt
;
3354 for (i
= stmt_num
- 2; i
>= 0; i
--)
3355 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3357 for (i
= 0; i
< stmt_num
; i
++)
3361 /* Determine whether we should use results of
3362 already handled statements or not. */
3363 if (ready_stmts_end
== 0
3364 && (i
- stmt_index
>= width
|| op_index
< 1))
3365 ready_stmts_end
= i
;
3367 /* Now we choose operands for the next statement. Non zero
3368 value in ready_stmts_end means here that we should use
3369 the result of already generated statements as new operand. */
3370 if (ready_stmts_end
> 0)
3372 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3373 if (ready_stmts_end
> stmt_index
)
3374 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3375 else if (op_index
>= 0)
3376 op2
= ops
[op_index
--]->op
;
3379 gcc_assert (stmt_index
< i
);
3380 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3383 if (stmt_index
>= ready_stmts_end
)
3384 ready_stmts_end
= 0;
3389 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3390 op2
= ops
[op_index
--]->op
;
3391 op1
= ops
[op_index
--]->op
;
3394 /* If we emit the last statement then we should put
3395 operands into the last statement. It will also
3397 if (op_index
< 0 && stmt_index
== i
)
3400 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3402 fprintf (dump_file
, "Transforming ");
3403 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3406 /* We keep original statement only for the last one. All
3407 others are recreated. */
3408 if (i
== stmt_num
- 1)
3410 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3411 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3412 update_stmt (stmts
[i
]);
3415 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3417 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3419 fprintf (dump_file
, " into ");
3420 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3424 remove_visited_stmt_chain (last_rhs1
);
3427 /* Transform STMT, which is really (A +B) + (C + D) into the left
3428 linear form, ((A+B)+C)+D.
3429 Recurse on D if necessary. */
3432 linearize_expr (gimple stmt
)
3434 gimple_stmt_iterator gsi
;
3435 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3436 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3437 gimple oldbinrhs
= binrhs
;
3438 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3439 gimple newbinrhs
= NULL
;
3440 struct loop
*loop
= loop_containing_stmt (stmt
);
3441 tree lhs
= gimple_assign_lhs (stmt
);
3443 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3444 && is_reassociable_op (binrhs
, rhscode
, loop
));
3446 gsi
= gsi_for_stmt (stmt
);
3448 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3449 binrhs
= gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs
),
3450 make_ssa_name (TREE_TYPE (lhs
), NULL
),
3451 gimple_assign_lhs (binlhs
),
3452 gimple_assign_rhs2 (binrhs
));
3453 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3454 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3455 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3457 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3458 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3460 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3462 fprintf (dump_file
, "Linearized: ");
3463 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3466 reassociate_stats
.linearized
++;
3469 gsi
= gsi_for_stmt (oldbinrhs
);
3470 gsi_remove (&gsi
, true);
3471 release_defs (oldbinrhs
);
3473 gimple_set_visited (stmt
, true);
3474 gimple_set_visited (binlhs
, true);
3475 gimple_set_visited (binrhs
, true);
3477 /* Tail recurse on the new rhs if it still needs reassociation. */
3478 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3479 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3480 want to change the algorithm while converting to tuples. */
3481 linearize_expr (stmt
);
3484 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3485 it. Otherwise, return NULL. */
3488 get_single_immediate_use (tree lhs
)
3490 use_operand_p immuse
;
3493 if (TREE_CODE (lhs
) == SSA_NAME
3494 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3495 && is_gimple_assign (immusestmt
))
3501 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3502 representing the negated value. Insertions of any necessary
3503 instructions go before GSI.
3504 This function is recursive in that, if you hand it "a_5" as the
3505 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3506 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3509 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3511 gimple negatedefstmt
= NULL
;
3512 tree resultofnegate
;
3513 gimple_stmt_iterator gsi
;
3516 /* If we are trying to negate a name, defined by an add, negate the
3517 add operands instead. */
3518 if (TREE_CODE (tonegate
) == SSA_NAME
)
3519 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3520 if (TREE_CODE (tonegate
) == SSA_NAME
3521 && is_gimple_assign (negatedefstmt
)
3522 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3523 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3524 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3526 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3527 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3528 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3531 gsi
= gsi_for_stmt (negatedefstmt
);
3532 rhs1
= negate_value (rhs1
, &gsi
);
3534 gsi
= gsi_for_stmt (negatedefstmt
);
3535 rhs2
= negate_value (rhs2
, &gsi
);
3537 gsi
= gsi_for_stmt (negatedefstmt
);
3538 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3539 gimple_set_visited (negatedefstmt
, true);
3540 g
= gimple_build_assign_with_ops (PLUS_EXPR
, lhs
, rhs1
, rhs2
);
3541 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3542 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3546 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3547 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3548 NULL_TREE
, true, GSI_SAME_STMT
);
3550 uid
= gimple_uid (gsi_stmt (gsi
));
3551 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3553 gimple stmt
= gsi_stmt (gsi
);
3554 if (gimple_uid (stmt
) != 0)
3556 gimple_set_uid (stmt
, uid
);
3558 return resultofnegate
;
3561 /* Return true if we should break up the subtract in STMT into an add
3562 with negate. This is true when we the subtract operands are really
3563 adds, or the subtract itself is used in an add expression. In
3564 either case, breaking up the subtract into an add with negate
3565 exposes the adds to reassociation. */
3568 should_break_up_subtract (gimple stmt
)
3570 tree lhs
= gimple_assign_lhs (stmt
);
3571 tree binlhs
= gimple_assign_rhs1 (stmt
);
3572 tree binrhs
= gimple_assign_rhs2 (stmt
);
3574 struct loop
*loop
= loop_containing_stmt (stmt
);
3576 if (TREE_CODE (binlhs
) == SSA_NAME
3577 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3580 if (TREE_CODE (binrhs
) == SSA_NAME
3581 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3584 if (TREE_CODE (lhs
) == SSA_NAME
3585 && (immusestmt
= get_single_immediate_use (lhs
))
3586 && is_gimple_assign (immusestmt
)
3587 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3588 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3593 /* Transform STMT from A - B into A + -B. */
3596 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3598 tree rhs1
= gimple_assign_rhs1 (stmt
);
3599 tree rhs2
= gimple_assign_rhs2 (stmt
);
3601 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3603 fprintf (dump_file
, "Breaking up subtract ");
3604 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3607 rhs2
= negate_value (rhs2
, gsip
);
3608 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3612 /* Determine whether STMT is a builtin call that raises an SSA name
3613 to an integer power and has only one use. If so, and this is early
3614 reassociation and unsafe math optimizations are permitted, place
3615 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3616 If any of these conditions does not hold, return FALSE. */
3619 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3622 REAL_VALUE_TYPE c
, cint
;
3624 if (!first_pass_instance
3625 || !flag_unsafe_math_optimizations
3626 || !is_gimple_call (stmt
)
3627 || !has_single_use (gimple_call_lhs (stmt
)))
3630 fndecl
= gimple_call_fndecl (stmt
);
3633 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3636 switch (DECL_FUNCTION_CODE (fndecl
))
3638 CASE_FLT_FN (BUILT_IN_POW
):
3639 *base
= gimple_call_arg (stmt
, 0);
3640 arg1
= gimple_call_arg (stmt
, 1);
3642 if (TREE_CODE (arg1
) != REAL_CST
)
3645 c
= TREE_REAL_CST (arg1
);
3647 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
3650 *exponent
= real_to_integer (&c
);
3651 real_from_integer (&cint
, VOIDmode
, *exponent
,
3652 *exponent
< 0 ? -1 : 0, 0);
3653 if (!real_identical (&c
, &cint
))
3658 CASE_FLT_FN (BUILT_IN_POWI
):
3659 *base
= gimple_call_arg (stmt
, 0);
3660 arg1
= gimple_call_arg (stmt
, 1);
3662 if (!tree_fits_shwi_p (arg1
))
3665 *exponent
= tree_to_shwi (arg1
);
3672 /* Expanding negative exponents is generally unproductive, so we don't
3673 complicate matters with those. Exponents of zero and one should
3674 have been handled by expression folding. */
3675 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
3681 /* Recursively linearize a binary expression that is the RHS of STMT.
3682 Place the operands of the expression tree in the vector named OPS. */
3685 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
3686 bool is_associative
, bool set_visited
)
3688 tree binlhs
= gimple_assign_rhs1 (stmt
);
3689 tree binrhs
= gimple_assign_rhs2 (stmt
);
3690 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
3691 bool binlhsisreassoc
= false;
3692 bool binrhsisreassoc
= false;
3693 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3694 struct loop
*loop
= loop_containing_stmt (stmt
);
3695 tree base
= NULL_TREE
;
3696 HOST_WIDE_INT exponent
= 0;
3699 gimple_set_visited (stmt
, true);
3701 if (TREE_CODE (binlhs
) == SSA_NAME
)
3703 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
3704 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
3705 && !stmt_could_throw_p (binlhsdef
));
3708 if (TREE_CODE (binrhs
) == SSA_NAME
)
3710 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
3711 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
3712 && !stmt_could_throw_p (binrhsdef
));
3715 /* If the LHS is not reassociable, but the RHS is, we need to swap
3716 them. If neither is reassociable, there is nothing we can do, so
3717 just put them in the ops vector. If the LHS is reassociable,
3718 linearize it. If both are reassociable, then linearize the RHS
3721 if (!binlhsisreassoc
)
3725 /* If this is not a associative operation like division, give up. */
3726 if (!is_associative
)
3728 add_to_ops_vec (ops
, binrhs
);
3732 if (!binrhsisreassoc
)
3734 if (rhscode
== MULT_EXPR
3735 && TREE_CODE (binrhs
) == SSA_NAME
3736 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
3738 add_repeat_to_ops_vec (ops
, base
, exponent
);
3739 gimple_set_visited (binrhsdef
, true);
3742 add_to_ops_vec (ops
, binrhs
);
3744 if (rhscode
== MULT_EXPR
3745 && TREE_CODE (binlhs
) == SSA_NAME
3746 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
3748 add_repeat_to_ops_vec (ops
, base
, exponent
);
3749 gimple_set_visited (binlhsdef
, true);
3752 add_to_ops_vec (ops
, binlhs
);
3757 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3759 fprintf (dump_file
, "swapping operands of ");
3760 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3763 swap_ssa_operands (stmt
,
3764 gimple_assign_rhs1_ptr (stmt
),
3765 gimple_assign_rhs2_ptr (stmt
));
3768 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3770 fprintf (dump_file
, " is now ");
3771 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3774 /* We want to make it so the lhs is always the reassociative op,
3780 else if (binrhsisreassoc
)
3782 linearize_expr (stmt
);
3783 binlhs
= gimple_assign_rhs1 (stmt
);
3784 binrhs
= gimple_assign_rhs2 (stmt
);
3787 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
3788 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
3790 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
3791 is_associative
, set_visited
);
3793 if (rhscode
== MULT_EXPR
3794 && TREE_CODE (binrhs
) == SSA_NAME
3795 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
3797 add_repeat_to_ops_vec (ops
, base
, exponent
);
3798 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
3801 add_to_ops_vec (ops
, binrhs
);
3804 /* Repropagate the negates back into subtracts, since no other pass
3805 currently does it. */
3808 repropagate_negates (void)
3813 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
3815 gimple user
= get_single_immediate_use (negate
);
3817 if (!user
|| !is_gimple_assign (user
))
3820 /* The negate operand can be either operand of a PLUS_EXPR
3821 (it can be the LHS if the RHS is a constant for example).
3823 Force the negate operand to the RHS of the PLUS_EXPR, then
3824 transform the PLUS_EXPR into a MINUS_EXPR. */
3825 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
3827 /* If the negated operand appears on the LHS of the
3828 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3829 to force the negated operand to the RHS of the PLUS_EXPR. */
3830 if (gimple_assign_rhs1 (user
) == negate
)
3832 swap_ssa_operands (user
,
3833 gimple_assign_rhs1_ptr (user
),
3834 gimple_assign_rhs2_ptr (user
));
3837 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3838 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3839 if (gimple_assign_rhs2 (user
) == negate
)
3841 tree rhs1
= gimple_assign_rhs1 (user
);
3842 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
3843 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
3844 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
3848 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
3850 if (gimple_assign_rhs1 (user
) == negate
)
3855 which we transform into
3858 This pushes down the negate which we possibly can merge
3859 into some other operation, hence insert it into the
3860 plus_negates vector. */
3861 gimple feed
= SSA_NAME_DEF_STMT (negate
);
3862 tree a
= gimple_assign_rhs1 (feed
);
3863 tree b
= gimple_assign_rhs2 (user
);
3864 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
3865 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
3866 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)), NULL
);
3867 gimple g
= gimple_build_assign_with_ops (PLUS_EXPR
, x
, a
, b
);
3868 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
3869 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
, NULL
);
3870 user
= gsi_stmt (gsi2
);
3872 gsi_remove (&gsi
, true);
3873 release_defs (feed
);
3874 plus_negates
.safe_push (gimple_assign_lhs (user
));
3878 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3879 rid of one operation. */
3880 gimple feed
= SSA_NAME_DEF_STMT (negate
);
3881 tree a
= gimple_assign_rhs1 (feed
);
3882 tree rhs1
= gimple_assign_rhs1 (user
);
3883 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
3884 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
3885 update_stmt (gsi_stmt (gsi
));
3891 /* Returns true if OP is of a type for which we can do reassociation.
3892 That is for integral or non-saturating fixed-point types, and for
3893 floating point type when associative-math is enabled. */
3896 can_reassociate_p (tree op
)
3898 tree type
= TREE_TYPE (op
);
3899 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
3900 || NON_SAT_FIXED_POINT_TYPE_P (type
)
3901 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
3906 /* Break up subtract operations in block BB.
3908 We do this top down because we don't know whether the subtract is
3909 part of a possible chain of reassociation except at the top.
3918 we want to break up k = t - q, but we won't until we've transformed q
3919 = b - r, which won't be broken up until we transform b = c - d.
3921 En passant, clear the GIMPLE visited flag on every statement
3922 and set UIDs within each basic block. */
3925 break_up_subtract_bb (basic_block bb
)
3927 gimple_stmt_iterator gsi
;
3929 unsigned int uid
= 1;
3931 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3933 gimple stmt
= gsi_stmt (gsi
);
3934 gimple_set_visited (stmt
, false);
3935 gimple_set_uid (stmt
, uid
++);
3937 if (!is_gimple_assign (stmt
)
3938 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
3941 /* Look for simple gimple subtract operations. */
3942 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
3944 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
3945 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
3948 /* Check for a subtract used only in an addition. If this
3949 is the case, transform it into add of a negate for better
3950 reassociation. IE transform C = A-B into C = A + -B if C
3951 is only used in an addition. */
3952 if (should_break_up_subtract (stmt
))
3953 break_up_subtract (stmt
, &gsi
);
3955 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
3956 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
3957 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
3959 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
3961 son
= next_dom_son (CDI_DOMINATORS
, son
))
3962 break_up_subtract_bb (son
);
3965 /* Used for repeated factor analysis. */
3966 struct repeat_factor_d
3968 /* An SSA name that occurs in a multiply chain. */
3971 /* Cached rank of the factor. */
3974 /* Number of occurrences of the factor in the chain. */
3975 HOST_WIDE_INT count
;
3977 /* An SSA name representing the product of this factor and
3978 all factors appearing later in the repeated factor vector. */
3982 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
3983 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
3986 static vec
<repeat_factor
> repeat_factor_vec
;
3988 /* Used for sorting the repeat factor vector. Sort primarily by
3989 ascending occurrence count, secondarily by descending rank. */
3992 compare_repeat_factors (const void *x1
, const void *x2
)
3994 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
3995 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
3997 if (rf1
->count
!= rf2
->count
)
3998 return rf1
->count
- rf2
->count
;
4000 return rf2
->rank
- rf1
->rank
;
4003 /* Look for repeated operands in OPS in the multiply tree rooted at
4004 STMT. Replace them with an optimal sequence of multiplies and powi
4005 builtin calls, and remove the used operands from OPS. Return an
4006 SSA name representing the value of the replacement sequence. */
4009 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
4011 unsigned i
, j
, vec_len
;
4014 repeat_factor_t rf1
, rf2
;
4015 repeat_factor rfnew
;
4016 tree result
= NULL_TREE
;
4017 tree target_ssa
, iter_result
;
4018 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4019 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4020 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4021 gimple mul_stmt
, pow_stmt
;
4023 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4028 /* Allocate the repeated factor vector. */
4029 repeat_factor_vec
.create (10);
4031 /* Scan the OPS vector for all SSA names in the product and build
4032 up a vector of occurrence counts for each factor. */
4033 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4035 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4037 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4039 if (rf1
->factor
== oe
->op
)
4041 rf1
->count
+= oe
->count
;
4046 if (j
>= repeat_factor_vec
.length ())
4048 rfnew
.factor
= oe
->op
;
4049 rfnew
.rank
= oe
->rank
;
4050 rfnew
.count
= oe
->count
;
4051 rfnew
.repr
= NULL_TREE
;
4052 repeat_factor_vec
.safe_push (rfnew
);
4057 /* Sort the repeated factor vector by (a) increasing occurrence count,
4058 and (b) decreasing rank. */
4059 repeat_factor_vec
.qsort (compare_repeat_factors
);
4061 /* It is generally best to combine as many base factors as possible
4062 into a product before applying __builtin_powi to the result.
4063 However, the sort order chosen for the repeated factor vector
4064 allows us to cache partial results for the product of the base
4065 factors for subsequent use. When we already have a cached partial
4066 result from a previous iteration, it is best to make use of it
4067 before looking for another __builtin_pow opportunity.
4069 As an example, consider x * x * y * y * y * z * z * z * z.
4070 We want to first compose the product x * y * z, raise it to the
4071 second power, then multiply this by y * z, and finally multiply
4072 by z. This can be done in 5 multiplies provided we cache y * z
4073 for use in both expressions:
4081 If we instead ignored the cached y * z and first multiplied by
4082 the __builtin_pow opportunity z * z, we would get the inferior:
4091 vec_len
= repeat_factor_vec
.length ();
4093 /* Repeatedly look for opportunities to create a builtin_powi call. */
4096 HOST_WIDE_INT power
;
4098 /* First look for the largest cached product of factors from
4099 preceding iterations. If found, create a builtin_powi for
4100 it if the minimum occurrence count for its factors is at
4101 least 2, or just use this cached product as our next
4102 multiplicand if the minimum occurrence count is 1. */
4103 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4105 if (rf1
->repr
&& rf1
->count
> 0)
4115 iter_result
= rf1
->repr
;
4117 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4121 fputs ("Multiplying by cached product ", dump_file
);
4122 for (elt
= j
; elt
< vec_len
; elt
++)
4124 rf
= &repeat_factor_vec
[elt
];
4125 print_generic_expr (dump_file
, rf
->factor
, 0);
4126 if (elt
< vec_len
- 1)
4127 fputs (" * ", dump_file
);
4129 fputs ("\n", dump_file
);
4134 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4135 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4136 build_int_cst (integer_type_node
,
4138 gimple_call_set_lhs (pow_stmt
, iter_result
);
4139 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4140 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4142 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4146 fputs ("Building __builtin_pow call for cached product (",
4148 for (elt
= j
; elt
< vec_len
; elt
++)
4150 rf
= &repeat_factor_vec
[elt
];
4151 print_generic_expr (dump_file
, rf
->factor
, 0);
4152 if (elt
< vec_len
- 1)
4153 fputs (" * ", dump_file
);
4155 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n",
4162 /* Otherwise, find the first factor in the repeated factor
4163 vector whose occurrence count is at least 2. If no such
4164 factor exists, there are no builtin_powi opportunities
4166 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4168 if (rf1
->count
>= 2)
4177 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4181 fputs ("Building __builtin_pow call for (", dump_file
);
4182 for (elt
= j
; elt
< vec_len
; elt
++)
4184 rf
= &repeat_factor_vec
[elt
];
4185 print_generic_expr (dump_file
, rf
->factor
, 0);
4186 if (elt
< vec_len
- 1)
4187 fputs (" * ", dump_file
);
4189 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4192 reassociate_stats
.pows_created
++;
4194 /* Visit each element of the vector in reverse order (so that
4195 high-occurrence elements are visited first, and within the
4196 same occurrence count, lower-ranked elements are visited
4197 first). Form a linear product of all elements in this order
4198 whose occurrencce count is at least that of element J.
4199 Record the SSA name representing the product of each element
4200 with all subsequent elements in the vector. */
4201 if (j
== vec_len
- 1)
4202 rf1
->repr
= rf1
->factor
;
4205 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4209 rf1
= &repeat_factor_vec
[ii
];
4210 rf2
= &repeat_factor_vec
[ii
+ 1];
4212 /* Init the last factor's representative to be itself. */
4214 rf2
->repr
= rf2
->factor
;
4219 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4220 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
,
4223 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4224 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4225 rf1
->repr
= target_ssa
;
4227 /* Don't reprocess the multiply we just introduced. */
4228 gimple_set_visited (mul_stmt
, true);
4232 /* Form a call to __builtin_powi for the maximum product
4233 just formed, raised to the power obtained earlier. */
4234 rf1
= &repeat_factor_vec
[j
];
4235 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4236 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4237 build_int_cst (integer_type_node
,
4239 gimple_call_set_lhs (pow_stmt
, iter_result
);
4240 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4241 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4244 /* If we previously formed at least one other builtin_powi call,
4245 form the product of this one and those others. */
4248 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4249 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, new_result
,
4250 result
, iter_result
);
4251 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4252 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4253 gimple_set_visited (mul_stmt
, true);
4254 result
= new_result
;
4257 result
= iter_result
;
4259 /* Decrement the occurrence count of each element in the product
4260 by the count found above, and remove this many copies of each
4262 for (i
= j
; i
< vec_len
; i
++)
4267 rf1
= &repeat_factor_vec
[i
];
4268 rf1
->count
-= power
;
4270 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4272 if (oe
->op
== rf1
->factor
)
4276 ops
->ordered_remove (n
);
4292 /* At this point all elements in the repeated factor vector have a
4293 remaining occurrence count of 0 or 1, and those with a count of 1
4294 don't have cached representatives. Re-sort the ops vector and
4296 ops
->qsort (sort_by_operand_rank
);
4297 repeat_factor_vec
.release ();
4299 /* Return the final product computed herein. Note that there may
4300 still be some elements with single occurrence count left in OPS;
4301 those will be handled by the normal reassociation logic. */
4305 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4308 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4312 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4314 fprintf (dump_file
, "Transforming ");
4315 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4318 rhs1
= gimple_assign_rhs1 (stmt
);
4319 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4321 remove_visited_stmt_chain (rhs1
);
4323 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4325 fprintf (dump_file
, " into ");
4326 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4330 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4333 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4334 tree rhs1
, tree rhs2
)
4336 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4338 fprintf (dump_file
, "Transforming ");
4339 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4342 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4343 update_stmt (gsi_stmt (*gsi
));
4344 remove_visited_stmt_chain (rhs1
);
4346 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4348 fprintf (dump_file
, " into ");
4349 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4353 /* Reassociate expressions in basic block BB and its post-dominator as
4357 reassociate_bb (basic_block bb
)
4359 gimple_stmt_iterator gsi
;
4361 gimple stmt
= last_stmt (bb
);
4363 if (stmt
&& !gimple_visited_p (stmt
))
4364 maybe_optimize_range_tests (stmt
);
4366 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4368 stmt
= gsi_stmt (gsi
);
4370 if (is_gimple_assign (stmt
)
4371 && !stmt_could_throw_p (stmt
))
4373 tree lhs
, rhs1
, rhs2
;
4374 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4376 /* If this is not a gimple binary expression, there is
4377 nothing for us to do with it. */
4378 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4381 /* If this was part of an already processed statement,
4382 we don't need to touch it again. */
4383 if (gimple_visited_p (stmt
))
4385 /* This statement might have become dead because of previous
4387 if (has_zero_uses (gimple_get_lhs (stmt
)))
4389 gsi_remove (&gsi
, true);
4390 release_defs (stmt
);
4391 /* We might end up removing the last stmt above which
4392 places the iterator to the end of the sequence.
4393 Reset it to the last stmt in this case which might
4394 be the end of the sequence as well if we removed
4395 the last statement of the sequence. In which case
4396 we need to bail out. */
4397 if (gsi_end_p (gsi
))
4399 gsi
= gsi_last_bb (bb
);
4400 if (gsi_end_p (gsi
))
4407 lhs
= gimple_assign_lhs (stmt
);
4408 rhs1
= gimple_assign_rhs1 (stmt
);
4409 rhs2
= gimple_assign_rhs2 (stmt
);
4411 /* For non-bit or min/max operations we can't associate
4412 all types. Verify that here. */
4413 if (rhs_code
!= BIT_IOR_EXPR
4414 && rhs_code
!= BIT_AND_EXPR
4415 && rhs_code
!= BIT_XOR_EXPR
4416 && rhs_code
!= MIN_EXPR
4417 && rhs_code
!= MAX_EXPR
4418 && (!can_reassociate_p (lhs
)
4419 || !can_reassociate_p (rhs1
)
4420 || !can_reassociate_p (rhs2
)))
4423 if (associative_tree_code (rhs_code
))
4425 auto_vec
<operand_entry_t
> ops
;
4426 tree powi_result
= NULL_TREE
;
4428 /* There may be no immediate uses left by the time we
4429 get here because we may have eliminated them all. */
4430 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4433 gimple_set_visited (stmt
, true);
4434 linearize_expr_tree (&ops
, stmt
, true, true);
4435 ops
.qsort (sort_by_operand_rank
);
4436 optimize_ops_list (rhs_code
, &ops
);
4437 if (undistribute_ops_list (rhs_code
, &ops
,
4438 loop_containing_stmt (stmt
)))
4440 ops
.qsort (sort_by_operand_rank
);
4441 optimize_ops_list (rhs_code
, &ops
);
4444 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4445 optimize_range_tests (rhs_code
, &ops
);
4447 if (first_pass_instance
4448 && rhs_code
== MULT_EXPR
4449 && flag_unsafe_math_optimizations
)
4450 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4452 /* If the operand vector is now empty, all operands were
4453 consumed by the __builtin_powi optimization. */
4454 if (ops
.length () == 0)
4455 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4456 else if (ops
.length () == 1)
4458 tree last_op
= ops
.last ()->op
;
4461 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4464 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4468 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4469 int ops_num
= ops
.length ();
4470 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4473 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4475 "Width = %d was chosen for reassociation\n", width
);
4478 && ops
.length () > 3)
4479 rewrite_expr_tree_parallel (stmt
, width
, ops
);
4482 /* When there are three operands left, we want
4483 to make sure the ones that get the double
4484 binary op are chosen wisely. */
4485 int len
= ops
.length ();
4487 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4489 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4490 powi_result
!= NULL
);
4493 /* If we combined some repeated factors into a
4494 __builtin_powi call, multiply that result by the
4495 reassociated operands. */
4498 gimple mul_stmt
, lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4499 tree type
= TREE_TYPE (lhs
);
4500 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4502 gimple_set_lhs (lhs_stmt
, target_ssa
);
4503 update_stmt (lhs_stmt
);
4505 target_ssa
= new_lhs
;
4506 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, lhs
,
4509 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4510 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4516 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4518 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4519 reassociate_bb (son
);
4522 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4523 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4525 /* Dump the operand entry vector OPS to FILE. */
4528 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4533 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4535 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4536 print_generic_expr (file
, oe
->op
, 0);
4540 /* Dump the operand entry vector OPS to STDERR. */
4543 debug_ops_vector (vec
<operand_entry_t
> ops
)
4545 dump_ops_vector (stderr
, ops
);
4551 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
4552 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
4555 /* Initialize the reassociation pass. */
4562 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
4564 /* Find the loops, so that we can prevent moving calculations in
4566 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
4568 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
4570 operand_entry_pool
= create_alloc_pool ("operand entry pool",
4571 sizeof (struct operand_entry
), 30);
4572 next_operand_entry_id
= 0;
4574 /* Reverse RPO (Reverse Post Order) will give us something where
4575 deeper loops come later. */
4576 pre_and_rev_post_order_compute (NULL
, bbs
, false);
4577 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
4578 operand_rank
= pointer_map_create ();
4580 /* Give each default definition a distinct rank. This includes
4581 parameters and the static chain. Walk backwards over all
4582 SSA names so that we get proper rank ordering according
4583 to tree_swap_operands_p. */
4584 for (i
= num_ssa_names
- 1; i
> 0; --i
)
4586 tree name
= ssa_name (i
);
4587 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
4588 insert_operand_rank (name
, ++rank
);
4591 /* Set up rank for each BB */
4592 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
4593 bb_rank
[bbs
[i
]] = ++rank
<< 16;
4596 calculate_dominance_info (CDI_POST_DOMINATORS
);
4597 plus_negates
= vNULL
;
4600 /* Cleanup after the reassociation pass, and print stats if
4606 statistics_counter_event (cfun
, "Linearized",
4607 reassociate_stats
.linearized
);
4608 statistics_counter_event (cfun
, "Constants eliminated",
4609 reassociate_stats
.constants_eliminated
);
4610 statistics_counter_event (cfun
, "Ops eliminated",
4611 reassociate_stats
.ops_eliminated
);
4612 statistics_counter_event (cfun
, "Statements rewritten",
4613 reassociate_stats
.rewritten
);
4614 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
4615 reassociate_stats
.pows_encountered
);
4616 statistics_counter_event (cfun
, "Built-in powi calls created",
4617 reassociate_stats
.pows_created
);
4619 pointer_map_destroy (operand_rank
);
4620 free_alloc_pool (operand_entry_pool
);
4622 plus_negates
.release ();
4623 free_dominance_info (CDI_POST_DOMINATORS
);
4624 loop_optimizer_finalize ();
4627 /* Gate and execute functions for Reassociation. */
4630 execute_reassoc (void)
4635 repropagate_negates ();
4642 gate_tree_ssa_reassoc (void)
4644 return flag_tree_reassoc
!= 0;
4649 const pass_data pass_data_reassoc
=
4651 GIMPLE_PASS
, /* type */
4652 "reassoc", /* name */
4653 OPTGROUP_NONE
, /* optinfo_flags */
4654 true, /* has_gate */
4655 true, /* has_execute */
4656 TV_TREE_REASSOC
, /* tv_id */
4657 ( PROP_cfg
| PROP_ssa
), /* properties_required */
4658 0, /* properties_provided */
4659 0, /* properties_destroyed */
4660 0, /* todo_flags_start */
4662 | TODO_update_ssa_only_virtuals
4663 | TODO_verify_flow
), /* todo_flags_finish */
4666 class pass_reassoc
: public gimple_opt_pass
4669 pass_reassoc (gcc::context
*ctxt
)
4670 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
4673 /* opt_pass methods: */
4674 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
4675 bool gate () { return gate_tree_ssa_reassoc (); }
4676 unsigned int execute () { return execute_reassoc (); }
4678 }; // class pass_reassoc
4683 make_pass_reassoc (gcc::context
*ctxt
)
4685 return new pass_reassoc (ctxt
);