1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
24 #include "hash-table.h"
29 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-inline.h"
33 #include "pointer-set.h"
34 #include "tree-ssa-alias.h"
35 #include "internal-fn.h"
36 #include "gimple-fold.h"
38 #include "gimple-expr.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "gimple-ssa.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
54 #include "tree-iterator.h"
55 #include "tree-pass.h"
56 #include "alloc-pool.h"
57 #include "langhooks.h"
62 #include "diagnostic-core.h"
64 /* This is a simple global reassociation pass. It is, in part, based
65 on the LLVM pass of the same name (They do some things more/less
66 than we do, in different orders, etc).
68 It consists of five steps:
70 1. Breaking up subtract operations into addition + negate, where
71 it would promote the reassociation of adds.
73 2. Left linearization of the expression trees, so that (A+B)+(C+D)
74 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
75 During linearization, we place the operands of the binary
76 expressions into a vector of operand_entry_t
78 3. Optimization of the operand lists, eliminating things like a +
81 3a. Combine repeated factors with the same occurrence counts
82 into a __builtin_powi call that will later be optimized into
83 an optimal number of multiplies.
85 4. Rewrite the expression trees we linearized and optimized so
86 they are in proper rank order.
88 5. Repropagate negates, as nothing else will clean it up ATM.
90 A bit of theory on #4, since nobody seems to write anything down
91 about why it makes sense to do it the way they do it:
93 We could do this much nicer theoretically, but don't (for reasons
94 explained after how to do it theoretically nice :P).
96 In order to promote the most redundancy elimination, you want
97 binary expressions whose operands are the same rank (or
98 preferably, the same value) exposed to the redundancy eliminator,
99 for possible elimination.
101 So the way to do this if we really cared, is to build the new op
102 tree from the leaves to the roots, merging as you go, and putting the
103 new op on the end of the worklist, until you are left with one
104 thing on the worklist.
106 IE if you have to rewrite the following set of operands (listed with
107 rank in parentheses), with opcode PLUS_EXPR:
109 a (1), b (1), c (1), d (2), e (2)
112 We start with our merge worklist empty, and the ops list with all of
115 You want to first merge all leaves of the same rank, as much as
118 So first build a binary op of
120 mergetmp = a + b, and put "mergetmp" on the merge worklist.
122 Because there is no three operand form of PLUS_EXPR, c is not going to
123 be exposed to redundancy elimination as a rank 1 operand.
125 So you might as well throw it on the merge worklist (you could also
126 consider it to now be a rank two operand, and merge it with d and e,
127 but in this case, you then have evicted e from a binary op. So at
128 least in this situation, you can't win.)
130 Then build a binary op of d + e
133 and put mergetmp2 on the merge worklist.
135 so merge worklist = {mergetmp, c, mergetmp2}
137 Continue building binary ops of these operations until you have only
138 one operation left on the worklist.
143 mergetmp3 = mergetmp + c
145 worklist = {mergetmp2, mergetmp3}
147 mergetmp4 = mergetmp2 + mergetmp3
149 worklist = {mergetmp4}
151 because we have one operation left, we can now just set the original
152 statement equal to the result of that operation.
154 This will at least expose a + b and d + e to redundancy elimination
155 as binary operations.
157 For extra points, you can reuse the old statements to build the
158 mergetmps, since you shouldn't run out.
160 So why don't we do this?
162 Because it's expensive, and rarely will help. Most trees we are
163 reassociating have 3 or less ops. If they have 2 ops, they already
164 will be written into a nice single binary op. If you have 3 ops, a
165 single simple check suffices to tell you whether the first two are of the
166 same rank. If so, you know to order it
169 newstmt = mergetmp + op3
173 newstmt = mergetmp + op1
175 If all three are of the same rank, you can't expose them all in a
176 single binary operator anyway, so the above is *still* the best you
179 Thus, this is what we do. When we have three ops left, we check to see
180 what order to put them in, and call it a day. As a nod to vector sum
181 reduction, we check if any of the ops are really a phi node that is a
182 destructive update for the associating op, and keep the destructive
183 update together for vector sum reduction recognition. */
190 int constants_eliminated
;
193 int pows_encountered
;
197 /* Operator, rank pair. */
198 typedef struct operand_entry
206 static alloc_pool operand_entry_pool
;
208 /* This is used to assign a unique ID to each struct operand_entry
209 so that qsort results are identical on different hosts. */
210 static int next_operand_entry_id
;
212 /* Starting rank number for a given basic block, so that we can rank
213 operations using unmovable instructions in that BB based on the bb
215 static long *bb_rank
;
217 /* Operand->rank hashtable. */
218 static struct pointer_map_t
*operand_rank
;
221 static long get_rank (tree
);
222 static bool reassoc_stmt_dominates_stmt_p (gimple
, gimple
);
225 /* Bias amount for loop-carried phis. We want this to be larger than
226 the depth of any reassociation tree we can see, but not larger than
227 the rank difference between two blocks. */
228 #define PHI_LOOP_BIAS (1 << 15)
230 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
231 an innermost loop, and the phi has only a single use which is inside
232 the loop, then the rank is the block rank of the loop latch plus an
233 extra bias for the loop-carried dependence. This causes expressions
234 calculated into an accumulator variable to be independent for each
235 iteration of the loop. If STMT is some other phi, the rank is the
236 block rank of its containing block. */
238 phi_rank (gimple stmt
)
240 basic_block bb
= gimple_bb (stmt
);
241 struct loop
*father
= bb
->loop_father
;
247 /* We only care about real loops (those with a latch). */
249 return bb_rank
[bb
->index
];
251 /* Interesting phis must be in headers of innermost loops. */
252 if (bb
!= father
->header
254 return bb_rank
[bb
->index
];
256 /* Ignore virtual SSA_NAMEs. */
257 res
= gimple_phi_result (stmt
);
258 if (virtual_operand_p (res
))
259 return bb_rank
[bb
->index
];
261 /* The phi definition must have a single use, and that use must be
262 within the loop. Otherwise this isn't an accumulator pattern. */
263 if (!single_imm_use (res
, &use
, &use_stmt
)
264 || gimple_bb (use_stmt
)->loop_father
!= father
)
265 return bb_rank
[bb
->index
];
267 /* Look for phi arguments from within the loop. If found, bias this phi. */
268 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
270 tree arg
= gimple_phi_arg_def (stmt
, i
);
271 if (TREE_CODE (arg
) == SSA_NAME
272 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
274 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
275 if (gimple_bb (def_stmt
)->loop_father
== father
)
276 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
280 /* Must be an uninteresting phi. */
281 return bb_rank
[bb
->index
];
284 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
285 loop-carried dependence of an innermost loop, return TRUE; else
288 loop_carried_phi (tree exp
)
293 if (TREE_CODE (exp
) != SSA_NAME
294 || SSA_NAME_IS_DEFAULT_DEF (exp
))
297 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
299 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
302 /* Non-loop-carried phis have block rank. Loop-carried phis have
303 an additional bias added in. If this phi doesn't have block rank,
304 it's biased and should not be propagated. */
305 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
307 if (phi_rank (phi_stmt
) != block_rank
)
313 /* Return the maximum of RANK and the rank that should be propagated
314 from expression OP. For most operands, this is just the rank of OP.
315 For loop-carried phis, the value is zero to avoid undoing the bias
316 in favor of the phi. */
318 propagate_rank (long rank
, tree op
)
322 if (loop_carried_phi (op
))
325 op_rank
= get_rank (op
);
327 return MAX (rank
, op_rank
);
330 /* Look up the operand rank structure for expression E. */
333 find_operand_rank (tree e
)
335 void **slot
= pointer_map_contains (operand_rank
, e
);
336 return slot
? (long) (intptr_t) *slot
: -1;
339 /* Insert {E,RANK} into the operand rank hashtable. */
342 insert_operand_rank (tree e
, long rank
)
345 gcc_assert (rank
> 0);
346 slot
= pointer_map_insert (operand_rank
, e
);
348 *slot
= (void *) (intptr_t) rank
;
351 /* Given an expression E, return the rank of the expression. */
356 /* Constants have rank 0. */
357 if (is_gimple_min_invariant (e
))
360 /* SSA_NAME's have the rank of the expression they are the result
362 For globals and uninitialized values, the rank is 0.
363 For function arguments, use the pre-setup rank.
364 For PHI nodes, stores, asm statements, etc, we use the rank of
366 For simple operations, the rank is the maximum rank of any of
367 its operands, or the bb_rank, whichever is less.
368 I make no claims that this is optimal, however, it gives good
371 /* We make an exception to the normal ranking system to break
372 dependences of accumulator variables in loops. Suppose we
373 have a simple one-block loop containing:
380 As shown, each iteration of the calculation into x is fully
381 dependent upon the iteration before it. We would prefer to
382 see this in the form:
389 If the loop is unrolled, the calculations of b and c from
390 different iterations can be interleaved.
392 To obtain this result during reassociation, we bias the rank
393 of the phi definition x_1 upward, when it is recognized as an
394 accumulator pattern. The artificial rank causes it to be
395 added last, providing the desired independence. */
397 if (TREE_CODE (e
) == SSA_NAME
)
404 if (SSA_NAME_IS_DEFAULT_DEF (e
))
405 return find_operand_rank (e
);
407 stmt
= SSA_NAME_DEF_STMT (e
);
408 if (gimple_code (stmt
) == GIMPLE_PHI
)
409 return phi_rank (stmt
);
411 if (!is_gimple_assign (stmt
)
412 || gimple_vdef (stmt
))
413 return bb_rank
[gimple_bb (stmt
)->index
];
415 /* If we already have a rank for this expression, use that. */
416 rank
= find_operand_rank (e
);
420 /* Otherwise, find the maximum rank for the operands. As an
421 exception, remove the bias from loop-carried phis when propagating
422 the rank so that dependent operations are not also biased. */
424 if (gimple_assign_single_p (stmt
))
426 tree rhs
= gimple_assign_rhs1 (stmt
);
427 n
= TREE_OPERAND_LENGTH (rhs
);
429 rank
= propagate_rank (rank
, rhs
);
432 for (i
= 0; i
< n
; i
++)
434 op
= TREE_OPERAND (rhs
, i
);
437 rank
= propagate_rank (rank
, op
);
443 n
= gimple_num_ops (stmt
);
444 for (i
= 1; i
< n
; i
++)
446 op
= gimple_op (stmt
, i
);
448 rank
= propagate_rank (rank
, op
);
452 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
454 fprintf (dump_file
, "Rank for ");
455 print_generic_expr (dump_file
, e
, 0);
456 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
459 /* Note the rank in the hashtable so we don't recompute it. */
460 insert_operand_rank (e
, (rank
+ 1));
464 /* Globals, etc, are rank 0 */
469 /* We want integer ones to end up last no matter what, since they are
470 the ones we can do the most with. */
471 #define INTEGER_CONST_TYPE 1 << 3
472 #define FLOAT_CONST_TYPE 1 << 2
473 #define OTHER_CONST_TYPE 1 << 1
475 /* Classify an invariant tree into integer, float, or other, so that
476 we can sort them to be near other constants of the same type. */
478 constant_type (tree t
)
480 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
481 return INTEGER_CONST_TYPE
;
482 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
483 return FLOAT_CONST_TYPE
;
485 return OTHER_CONST_TYPE
;
488 /* qsort comparison function to sort operand entries PA and PB by rank
489 so that the sorted array is ordered by rank in decreasing order. */
491 sort_by_operand_rank (const void *pa
, const void *pb
)
493 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
494 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
496 /* It's nicer for optimize_expression if constants that are likely
497 to fold when added/multiplied//whatever are put next to each
498 other. Since all constants have rank 0, order them by type. */
499 if (oeb
->rank
== 0 && oea
->rank
== 0)
501 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
502 return constant_type (oeb
->op
) - constant_type (oea
->op
);
504 /* To make sorting result stable, we use unique IDs to determine
506 return oeb
->id
- oea
->id
;
509 /* Lastly, make sure the versions that are the same go next to each
511 if ((oeb
->rank
- oea
->rank
== 0)
512 && TREE_CODE (oea
->op
) == SSA_NAME
513 && TREE_CODE (oeb
->op
) == SSA_NAME
)
515 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
516 versions of removed SSA_NAMEs, so if possible, prefer to sort
517 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
519 if (!SSA_NAME_IS_DEFAULT_DEF (oea
->op
)
520 && !SSA_NAME_IS_DEFAULT_DEF (oeb
->op
)
521 && SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
523 gimple stmta
= SSA_NAME_DEF_STMT (oea
->op
);
524 gimple stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
525 basic_block bba
= gimple_bb (stmta
);
526 basic_block bbb
= gimple_bb (stmtb
);
529 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
530 return bb_rank
[bbb
->index
] - bb_rank
[bba
->index
];
534 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
535 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
541 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
542 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
544 return oeb
->id
- oea
->id
;
547 if (oeb
->rank
!= oea
->rank
)
548 return oeb
->rank
- oea
->rank
;
550 return oeb
->id
- oea
->id
;
553 /* Add an operand entry to *OPS for the tree operand OP. */
556 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
558 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
561 oe
->rank
= get_rank (op
);
562 oe
->id
= next_operand_entry_id
++;
567 /* Add an operand entry to *OPS for the tree operand OP with repeat
571 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
572 HOST_WIDE_INT repeat
)
574 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
577 oe
->rank
= get_rank (op
);
578 oe
->id
= next_operand_entry_id
++;
582 reassociate_stats
.pows_encountered
++;
585 /* Return true if STMT is reassociable operation containing a binary
586 operation with tree code CODE, and is inside LOOP. */
589 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
591 basic_block bb
= gimple_bb (stmt
);
593 if (gimple_bb (stmt
) == NULL
)
596 if (!flow_bb_inside_loop_p (loop
, bb
))
599 if (is_gimple_assign (stmt
)
600 && gimple_assign_rhs_code (stmt
) == code
601 && has_single_use (gimple_assign_lhs (stmt
)))
608 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
609 operand of the negate operation. Otherwise, return NULL. */
612 get_unary_op (tree name
, enum tree_code opcode
)
614 gimple stmt
= SSA_NAME_DEF_STMT (name
);
616 if (!is_gimple_assign (stmt
))
619 if (gimple_assign_rhs_code (stmt
) == opcode
)
620 return gimple_assign_rhs1 (stmt
);
624 /* If CURR and LAST are a pair of ops that OPCODE allows us to
625 eliminate through equivalences, do so, remove them from OPS, and
626 return true. Otherwise, return false. */
629 eliminate_duplicate_pair (enum tree_code opcode
,
630 vec
<operand_entry_t
> *ops
,
633 operand_entry_t curr
,
634 operand_entry_t last
)
637 /* If we have two of the same op, and the opcode is & |, min, or max,
638 we can eliminate one of them.
639 If we have two of the same op, and the opcode is ^, we can
640 eliminate both of them. */
642 if (last
&& last
->op
== curr
->op
)
650 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
652 fprintf (dump_file
, "Equivalence: ");
653 print_generic_expr (dump_file
, curr
->op
, 0);
654 fprintf (dump_file
, " [&|minmax] ");
655 print_generic_expr (dump_file
, last
->op
, 0);
656 fprintf (dump_file
, " -> ");
657 print_generic_stmt (dump_file
, last
->op
, 0);
660 ops
->ordered_remove (i
);
661 reassociate_stats
.ops_eliminated
++;
666 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
668 fprintf (dump_file
, "Equivalence: ");
669 print_generic_expr (dump_file
, curr
->op
, 0);
670 fprintf (dump_file
, " ^ ");
671 print_generic_expr (dump_file
, last
->op
, 0);
672 fprintf (dump_file
, " -> nothing\n");
675 reassociate_stats
.ops_eliminated
+= 2;
677 if (ops
->length () == 2)
680 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
685 ops
->ordered_remove (i
-1);
686 ops
->ordered_remove (i
-1);
698 static vec
<tree
> plus_negates
;
700 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
701 expression, look in OPS for a corresponding positive operation to cancel
702 it out. If we find one, remove the other from OPS, replace
703 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
707 eliminate_plus_minus_pair (enum tree_code opcode
,
708 vec
<operand_entry_t
> *ops
,
709 unsigned int currindex
,
710 operand_entry_t curr
)
717 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
720 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
721 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
722 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
725 /* Any non-negated version will have a rank that is one less than
726 the current rank. So once we hit those ranks, if we don't find
729 for (i
= currindex
+ 1;
730 ops
->iterate (i
, &oe
)
731 && oe
->rank
>= curr
->rank
- 1 ;
734 if (oe
->op
== negateop
)
737 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
739 fprintf (dump_file
, "Equivalence: ");
740 print_generic_expr (dump_file
, negateop
, 0);
741 fprintf (dump_file
, " + -");
742 print_generic_expr (dump_file
, oe
->op
, 0);
743 fprintf (dump_file
, " -> 0\n");
746 ops
->ordered_remove (i
);
747 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
748 ops
->ordered_remove (currindex
);
749 reassociate_stats
.ops_eliminated
++;
753 else if (oe
->op
== notop
)
755 tree op_type
= TREE_TYPE (oe
->op
);
757 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
759 fprintf (dump_file
, "Equivalence: ");
760 print_generic_expr (dump_file
, notop
, 0);
761 fprintf (dump_file
, " + ~");
762 print_generic_expr (dump_file
, oe
->op
, 0);
763 fprintf (dump_file
, " -> -1\n");
766 ops
->ordered_remove (i
);
767 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
768 ops
->ordered_remove (currindex
);
769 reassociate_stats
.ops_eliminated
++;
775 /* CURR->OP is a negate expr in a plus expr: save it for later
776 inspection in repropagate_negates(). */
777 if (negateop
!= NULL_TREE
)
778 plus_negates
.safe_push (curr
->op
);
783 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
784 bitwise not expression, look in OPS for a corresponding operand to
785 cancel it out. If we find one, remove the other from OPS, replace
786 OPS[CURRINDEX] with 0, and return true. Otherwise, return
790 eliminate_not_pairs (enum tree_code opcode
,
791 vec
<operand_entry_t
> *ops
,
792 unsigned int currindex
,
793 operand_entry_t curr
)
799 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
800 || TREE_CODE (curr
->op
) != SSA_NAME
)
803 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
804 if (notop
== NULL_TREE
)
807 /* Any non-not version will have a rank that is one less than
808 the current rank. So once we hit those ranks, if we don't find
811 for (i
= currindex
+ 1;
812 ops
->iterate (i
, &oe
)
813 && oe
->rank
>= curr
->rank
- 1;
818 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
820 fprintf (dump_file
, "Equivalence: ");
821 print_generic_expr (dump_file
, notop
, 0);
822 if (opcode
== BIT_AND_EXPR
)
823 fprintf (dump_file
, " & ~");
824 else if (opcode
== BIT_IOR_EXPR
)
825 fprintf (dump_file
, " | ~");
826 print_generic_expr (dump_file
, oe
->op
, 0);
827 if (opcode
== BIT_AND_EXPR
)
828 fprintf (dump_file
, " -> 0\n");
829 else if (opcode
== BIT_IOR_EXPR
)
830 fprintf (dump_file
, " -> -1\n");
833 if (opcode
== BIT_AND_EXPR
)
834 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
835 else if (opcode
== BIT_IOR_EXPR
)
836 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
838 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
840 ops
->quick_push (oe
);
848 /* Use constant value that may be present in OPS to try to eliminate
849 operands. Note that this function is only really used when we've
850 eliminated ops for other reasons, or merged constants. Across
851 single statements, fold already does all of this, plus more. There
852 is little point in duplicating logic, so I've only included the
853 identities that I could ever construct testcases to trigger. */
856 eliminate_using_constants (enum tree_code opcode
,
857 vec
<operand_entry_t
> *ops
)
859 operand_entry_t oelast
= ops
->last ();
860 tree type
= TREE_TYPE (oelast
->op
);
862 if (oelast
->rank
== 0
863 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
868 if (integer_zerop (oelast
->op
))
870 if (ops
->length () != 1)
872 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
873 fprintf (dump_file
, "Found & 0, removing all other ops\n");
875 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
878 ops
->quick_push (oelast
);
882 else if (integer_all_onesp (oelast
->op
))
884 if (ops
->length () != 1)
886 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
887 fprintf (dump_file
, "Found & -1, removing\n");
889 reassociate_stats
.ops_eliminated
++;
894 if (integer_all_onesp (oelast
->op
))
896 if (ops
->length () != 1)
898 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
899 fprintf (dump_file
, "Found | -1, removing all other ops\n");
901 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
904 ops
->quick_push (oelast
);
908 else if (integer_zerop (oelast
->op
))
910 if (ops
->length () != 1)
912 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
913 fprintf (dump_file
, "Found | 0, removing\n");
915 reassociate_stats
.ops_eliminated
++;
920 if (integer_zerop (oelast
->op
)
921 || (FLOAT_TYPE_P (type
)
922 && !HONOR_NANS (TYPE_MODE (type
))
923 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type
))
924 && real_zerop (oelast
->op
)))
926 if (ops
->length () != 1)
928 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
929 fprintf (dump_file
, "Found * 0, removing all other ops\n");
931 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
933 ops
->quick_push (oelast
);
937 else if (integer_onep (oelast
->op
)
938 || (FLOAT_TYPE_P (type
)
939 && !HONOR_SNANS (TYPE_MODE (type
))
940 && real_onep (oelast
->op
)))
942 if (ops
->length () != 1)
944 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
945 fprintf (dump_file
, "Found * 1, removing\n");
947 reassociate_stats
.ops_eliminated
++;
955 if (integer_zerop (oelast
->op
)
956 || (FLOAT_TYPE_P (type
)
957 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
958 && fold_real_zero_addition_p (type
, oelast
->op
,
959 opcode
== MINUS_EXPR
)))
961 if (ops
->length () != 1)
963 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
964 fprintf (dump_file
, "Found [|^+] 0, removing\n");
966 reassociate_stats
.ops_eliminated
++;
978 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
981 /* Structure for tracking and counting operands. */
982 typedef struct oecount_s
{
985 enum tree_code oecode
;
990 /* The heap for the oecount hashtable and the sorted list of operands. */
991 static vec
<oecount
> cvec
;
994 /* Oecount hashtable helpers. */
996 struct oecount_hasher
: typed_noop_remove
<void>
998 /* Note that this hash table stores integers, not pointers.
999 So, observe the casting in the member functions. */
1000 typedef void value_type
;
1001 typedef void compare_type
;
1002 static inline hashval_t
hash (const value_type
*);
1003 static inline bool equal (const value_type
*, const compare_type
*);
1006 /* Hash function for oecount. */
1009 oecount_hasher::hash (const value_type
*p
)
1011 const oecount
*c
= &cvec
[(size_t)p
- 42];
1012 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1015 /* Comparison function for oecount. */
1018 oecount_hasher::equal (const value_type
*p1
, const compare_type
*p2
)
1020 const oecount
*c1
= &cvec
[(size_t)p1
- 42];
1021 const oecount
*c2
= &cvec
[(size_t)p2
- 42];
1022 return (c1
->oecode
== c2
->oecode
1023 && c1
->op
== c2
->op
);
1026 /* Comparison function for qsort sorting oecount elements by count. */
1029 oecount_cmp (const void *p1
, const void *p2
)
1031 const oecount
*c1
= (const oecount
*)p1
;
1032 const oecount
*c2
= (const oecount
*)p2
;
1033 if (c1
->cnt
!= c2
->cnt
)
1034 return c1
->cnt
- c2
->cnt
;
1036 /* If counts are identical, use unique IDs to stabilize qsort. */
1037 return c1
->id
- c2
->id
;
1040 /* Return TRUE iff STMT represents a builtin call that raises OP
1041 to some exponent. */
1044 stmt_is_power_of_op (gimple stmt
, tree op
)
1048 if (!is_gimple_call (stmt
))
1051 fndecl
= gimple_call_fndecl (stmt
);
1054 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1057 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1059 CASE_FLT_FN (BUILT_IN_POW
):
1060 CASE_FLT_FN (BUILT_IN_POWI
):
1061 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1068 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1069 in place and return the result. Assumes that stmt_is_power_of_op
1070 was previously called for STMT and returned TRUE. */
1072 static HOST_WIDE_INT
1073 decrement_power (gimple stmt
)
1075 REAL_VALUE_TYPE c
, cint
;
1076 HOST_WIDE_INT power
;
1079 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1081 CASE_FLT_FN (BUILT_IN_POW
):
1082 arg1
= gimple_call_arg (stmt
, 1);
1083 c
= TREE_REAL_CST (arg1
);
1084 power
= real_to_integer (&c
) - 1;
1085 real_from_integer (&cint
, VOIDmode
, power
, 0, 0);
1086 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1089 CASE_FLT_FN (BUILT_IN_POWI
):
1090 arg1
= gimple_call_arg (stmt
, 1);
1091 power
= TREE_INT_CST_LOW (arg1
) - 1;
1092 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1100 /* Find the single immediate use of STMT's LHS, and replace it
1101 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1102 replace *DEF with OP as well. */
1105 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1110 gimple_stmt_iterator gsi
;
1112 if (is_gimple_call (stmt
))
1113 lhs
= gimple_call_lhs (stmt
);
1115 lhs
= gimple_assign_lhs (stmt
);
1117 gcc_assert (has_single_use (lhs
));
1118 single_imm_use (lhs
, &use
, &use_stmt
);
1122 if (TREE_CODE (op
) != SSA_NAME
)
1123 update_stmt (use_stmt
);
1124 gsi
= gsi_for_stmt (stmt
);
1125 unlink_stmt_vdef (stmt
);
1126 gsi_remove (&gsi
, true);
1127 release_defs (stmt
);
1130 /* Walks the linear chain with result *DEF searching for an operation
1131 with operand OP and code OPCODE removing that from the chain. *DEF
1132 is updated if there is only one operand but no operation left. */
1135 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1137 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1143 if (opcode
== MULT_EXPR
1144 && stmt_is_power_of_op (stmt
, op
))
1146 if (decrement_power (stmt
) == 1)
1147 propagate_op_to_single_use (op
, stmt
, def
);
1151 name
= gimple_assign_rhs1 (stmt
);
1153 /* If this is the operation we look for and one of the operands
1154 is ours simply propagate the other operand into the stmts
1156 if (gimple_assign_rhs_code (stmt
) == opcode
1158 || gimple_assign_rhs2 (stmt
) == op
))
1161 name
= gimple_assign_rhs2 (stmt
);
1162 propagate_op_to_single_use (name
, stmt
, def
);
1166 /* We might have a multiply of two __builtin_pow* calls, and
1167 the operand might be hiding in the rightmost one. */
1168 if (opcode
== MULT_EXPR
1169 && gimple_assign_rhs_code (stmt
) == opcode
1170 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1171 && has_single_use (gimple_assign_rhs2 (stmt
)))
1173 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1174 if (stmt_is_power_of_op (stmt2
, op
))
1176 if (decrement_power (stmt2
) == 1)
1177 propagate_op_to_single_use (op
, stmt2
, def
);
1182 /* Continue walking the chain. */
1183 gcc_assert (name
!= op
1184 && TREE_CODE (name
) == SSA_NAME
);
1185 stmt
= SSA_NAME_DEF_STMT (name
);
1190 /* Returns true if statement S1 dominates statement S2. Like
1191 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1194 reassoc_stmt_dominates_stmt_p (gimple s1
, gimple s2
)
1196 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1198 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1199 SSA_NAME. Assume it lives at the beginning of function and
1200 thus dominates everything. */
1201 if (!bb1
|| s1
== s2
)
1204 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1210 /* PHIs in the same basic block are assumed to be
1211 executed all in parallel, if only one stmt is a PHI,
1212 it dominates the other stmt in the same basic block. */
1213 if (gimple_code (s1
) == GIMPLE_PHI
)
1216 if (gimple_code (s2
) == GIMPLE_PHI
)
1219 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1221 if (gimple_uid (s1
) < gimple_uid (s2
))
1224 if (gimple_uid (s1
) > gimple_uid (s2
))
1227 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1228 unsigned int uid
= gimple_uid (s1
);
1229 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1231 gimple s
= gsi_stmt (gsi
);
1232 if (gimple_uid (s
) != uid
)
1241 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1244 /* Insert STMT after INSERT_POINT. */
1247 insert_stmt_after (gimple stmt
, gimple insert_point
)
1249 gimple_stmt_iterator gsi
;
1252 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1253 bb
= gimple_bb (insert_point
);
1254 else if (!stmt_ends_bb_p (insert_point
))
1256 gsi
= gsi_for_stmt (insert_point
);
1257 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1258 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1262 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1263 thus if it must end a basic block, it should be a call that can
1264 throw, or some assignment that can throw. If it throws, the LHS
1265 of it will not be initialized though, so only valid places using
1266 the SSA_NAME should be dominated by the fallthru edge. */
1267 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1268 gsi
= gsi_after_labels (bb
);
1269 if (gsi_end_p (gsi
))
1271 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1272 gimple_set_uid (stmt
,
1273 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1276 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1277 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1280 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1281 the result. Places the statement after the definition of either
1282 OP1 or OP2. Returns the new statement. */
1285 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1287 gimple op1def
= NULL
, op2def
= NULL
;
1288 gimple_stmt_iterator gsi
;
1292 /* Create the addition statement. */
1293 op
= make_ssa_name (type
, NULL
);
1294 sum
= gimple_build_assign_with_ops (opcode
, op
, op1
, op2
);
1296 /* Find an insertion place and insert. */
1297 if (TREE_CODE (op1
) == SSA_NAME
)
1298 op1def
= SSA_NAME_DEF_STMT (op1
);
1299 if (TREE_CODE (op2
) == SSA_NAME
)
1300 op2def
= SSA_NAME_DEF_STMT (op2
);
1301 if ((!op1def
|| gimple_nop_p (op1def
))
1302 && (!op2def
|| gimple_nop_p (op2def
)))
1304 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1305 if (gsi_end_p (gsi
))
1307 gimple_stmt_iterator gsi2
1308 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1309 gimple_set_uid (sum
,
1310 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1313 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1314 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1318 gimple insert_point
;
1319 if ((!op1def
|| gimple_nop_p (op1def
))
1320 || (op2def
&& !gimple_nop_p (op2def
)
1321 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1322 insert_point
= op2def
;
1324 insert_point
= op1def
;
1325 insert_stmt_after (sum
, insert_point
);
1332 /* Perform un-distribution of divisions and multiplications.
1333 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1334 to (A + B) / X for real X.
1336 The algorithm is organized as follows.
1338 - First we walk the addition chain *OPS looking for summands that
1339 are defined by a multiplication or a real division. This results
1340 in the candidates bitmap with relevant indices into *OPS.
1342 - Second we build the chains of multiplications or divisions for
1343 these candidates, counting the number of occurrences of (operand, code)
1344 pairs in all of the candidates chains.
1346 - Third we sort the (operand, code) pairs by number of occurrence and
1347 process them starting with the pair with the most uses.
1349 * For each such pair we walk the candidates again to build a
1350 second candidate bitmap noting all multiplication/division chains
1351 that have at least one occurrence of (operand, code).
1353 * We build an alternate addition chain only covering these
1354 candidates with one (operand, code) operation removed from their
1355 multiplication/division chain.
1357 * The first candidate gets replaced by the alternate addition chain
1358 multiplied/divided by the operand.
1360 * All candidate chains get disabled for further processing and
1361 processing of (operand, code) pairs continues.
1363 The alternate addition chains built are re-processed by the main
1364 reassociation algorithm which allows optimizing a * x * y + b * y * x
1365 to (a + b ) * x * y in one invocation of the reassociation pass. */
1368 undistribute_ops_list (enum tree_code opcode
,
1369 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1371 unsigned int length
= ops
->length ();
1372 operand_entry_t oe1
;
1374 sbitmap candidates
, candidates2
;
1375 unsigned nr_candidates
, nr_candidates2
;
1376 sbitmap_iterator sbi0
;
1377 vec
<operand_entry_t
> *subops
;
1378 hash_table
<oecount_hasher
> ctable
;
1379 bool changed
= false;
1380 int next_oecount_id
= 0;
1383 || opcode
!= PLUS_EXPR
)
1386 /* Build a list of candidates to process. */
1387 candidates
= sbitmap_alloc (length
);
1388 bitmap_clear (candidates
);
1390 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1392 enum tree_code dcode
;
1395 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1397 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1398 if (!is_gimple_assign (oe1def
))
1400 dcode
= gimple_assign_rhs_code (oe1def
);
1401 if ((dcode
!= MULT_EXPR
1402 && dcode
!= RDIV_EXPR
)
1403 || !is_reassociable_op (oe1def
, dcode
, loop
))
1406 bitmap_set_bit (candidates
, i
);
1410 if (nr_candidates
< 2)
1412 sbitmap_free (candidates
);
1416 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1418 fprintf (dump_file
, "searching for un-distribute opportunities ");
1419 print_generic_expr (dump_file
,
1420 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1421 fprintf (dump_file
, " %d\n", nr_candidates
);
1424 /* Build linearized sub-operand lists and the counting table. */
1427 /* ??? Macro arguments cannot have multi-argument template types in
1428 them. This typedef is needed to workaround that limitation. */
1429 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1430 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1431 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1434 enum tree_code oecode
;
1437 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1438 oecode
= gimple_assign_rhs_code (oedef
);
1439 linearize_expr_tree (&subops
[i
], oedef
,
1440 associative_tree_code (oecode
), false);
1442 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1449 c
.id
= next_oecount_id
++;
1452 idx
= cvec
.length () + 41;
1453 slot
= ctable
.find_slot ((void *)idx
, INSERT
);
1456 *slot
= (void *)idx
;
1461 cvec
[(size_t)*slot
- 42].cnt
++;
1467 /* Sort the counting table. */
1468 cvec
.qsort (oecount_cmp
);
1470 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1473 fprintf (dump_file
, "Candidates:\n");
1474 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1476 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1477 c
->oecode
== MULT_EXPR
1478 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1479 print_generic_expr (dump_file
, c
->op
, 0);
1480 fprintf (dump_file
, "\n");
1484 /* Process the (operand, code) pairs in order of most occurrence. */
1485 candidates2
= sbitmap_alloc (length
);
1486 while (!cvec
.is_empty ())
1488 oecount
*c
= &cvec
.last ();
1492 /* Now collect the operands in the outer chain that contain
1493 the common operand in their inner chain. */
1494 bitmap_clear (candidates2
);
1496 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1499 enum tree_code oecode
;
1501 tree op
= (*ops
)[i
]->op
;
1503 /* If we undistributed in this chain already this may be
1505 if (TREE_CODE (op
) != SSA_NAME
)
1508 oedef
= SSA_NAME_DEF_STMT (op
);
1509 oecode
= gimple_assign_rhs_code (oedef
);
1510 if (oecode
!= c
->oecode
)
1513 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1515 if (oe1
->op
== c
->op
)
1517 bitmap_set_bit (candidates2
, i
);
1524 if (nr_candidates2
>= 2)
1526 operand_entry_t oe1
, oe2
;
1528 int first
= bitmap_first_set_bit (candidates2
);
1530 /* Build the new addition chain. */
1531 oe1
= (*ops
)[first
];
1532 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1534 fprintf (dump_file
, "Building (");
1535 print_generic_expr (dump_file
, oe1
->op
, 0);
1537 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1538 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1542 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1544 fprintf (dump_file
, " + ");
1545 print_generic_expr (dump_file
, oe2
->op
, 0);
1547 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1548 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1549 oe1
->op
, oe2
->op
, opcode
);
1550 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1552 oe1
->op
= gimple_get_lhs (sum
);
1555 /* Apply the multiplication/division. */
1556 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1557 oe1
->op
, c
->op
, c
->oecode
);
1558 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1560 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1561 print_generic_expr (dump_file
, c
->op
, 0);
1562 fprintf (dump_file
, "\n");
1565 /* Record it in the addition chain and disable further
1566 undistribution with this op. */
1567 oe1
->op
= gimple_assign_lhs (prod
);
1568 oe1
->rank
= get_rank (oe1
->op
);
1569 subops
[first
].release ();
1577 for (i
= 0; i
< ops
->length (); ++i
)
1578 subops
[i
].release ();
1581 sbitmap_free (candidates
);
1582 sbitmap_free (candidates2
);
1587 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1588 expression, examine the other OPS to see if any of them are comparisons
1589 of the same values, which we may be able to combine or eliminate.
1590 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1593 eliminate_redundant_comparison (enum tree_code opcode
,
1594 vec
<operand_entry_t
> *ops
,
1595 unsigned int currindex
,
1596 operand_entry_t curr
)
1599 enum tree_code lcode
, rcode
;
1604 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1607 /* Check that CURR is a comparison. */
1608 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1610 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1611 if (!is_gimple_assign (def1
))
1613 lcode
= gimple_assign_rhs_code (def1
);
1614 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1616 op1
= gimple_assign_rhs1 (def1
);
1617 op2
= gimple_assign_rhs2 (def1
);
1619 /* Now look for a similar comparison in the remaining OPS. */
1620 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1624 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1626 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1627 if (!is_gimple_assign (def2
))
1629 rcode
= gimple_assign_rhs_code (def2
);
1630 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1633 /* If we got here, we have a match. See if we can combine the
1635 if (opcode
== BIT_IOR_EXPR
)
1636 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1637 rcode
, gimple_assign_rhs1 (def2
),
1638 gimple_assign_rhs2 (def2
));
1640 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1641 rcode
, gimple_assign_rhs1 (def2
),
1642 gimple_assign_rhs2 (def2
));
1646 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1647 always give us a boolean_type_node value back. If the original
1648 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1649 we need to convert. */
1650 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1651 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1653 if (TREE_CODE (t
) != INTEGER_CST
1654 && !operand_equal_p (t
, curr
->op
, 0))
1656 enum tree_code subcode
;
1657 tree newop1
, newop2
;
1658 if (!COMPARISON_CLASS_P (t
))
1660 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1661 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1662 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1663 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1667 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1669 fprintf (dump_file
, "Equivalence: ");
1670 print_generic_expr (dump_file
, curr
->op
, 0);
1671 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1672 print_generic_expr (dump_file
, oe
->op
, 0);
1673 fprintf (dump_file
, " -> ");
1674 print_generic_expr (dump_file
, t
, 0);
1675 fprintf (dump_file
, "\n");
1678 /* Now we can delete oe, as it has been subsumed by the new combined
1680 ops
->ordered_remove (i
);
1681 reassociate_stats
.ops_eliminated
++;
1683 /* If t is the same as curr->op, we're done. Otherwise we must
1684 replace curr->op with t. Special case is if we got a constant
1685 back, in which case we add it to the end instead of in place of
1686 the current entry. */
1687 if (TREE_CODE (t
) == INTEGER_CST
)
1689 ops
->ordered_remove (currindex
);
1690 add_to_ops_vec (ops
, t
);
1692 else if (!operand_equal_p (t
, curr
->op
, 0))
1695 enum tree_code subcode
;
1698 gcc_assert (COMPARISON_CLASS_P (t
));
1699 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1700 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1701 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1702 gcc_checking_assert (is_gimple_val (newop1
)
1703 && is_gimple_val (newop2
));
1704 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1705 curr
->op
= gimple_get_lhs (sum
);
1713 /* Perform various identities and other optimizations on the list of
1714 operand entries, stored in OPS. The tree code for the binary
1715 operation between all the operands is OPCODE. */
1718 optimize_ops_list (enum tree_code opcode
,
1719 vec
<operand_entry_t
> *ops
)
1721 unsigned int length
= ops
->length ();
1724 operand_entry_t oelast
= NULL
;
1725 bool iterate
= false;
1730 oelast
= ops
->last ();
1732 /* If the last two are constants, pop the constants off, merge them
1733 and try the next two. */
1734 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1736 operand_entry_t oelm1
= (*ops
)[length
- 2];
1738 if (oelm1
->rank
== 0
1739 && is_gimple_min_invariant (oelm1
->op
)
1740 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1741 TREE_TYPE (oelast
->op
)))
1743 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1744 oelm1
->op
, oelast
->op
);
1746 if (folded
&& is_gimple_min_invariant (folded
))
1748 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1749 fprintf (dump_file
, "Merging constants\n");
1754 add_to_ops_vec (ops
, folded
);
1755 reassociate_stats
.constants_eliminated
++;
1757 optimize_ops_list (opcode
, ops
);
1763 eliminate_using_constants (opcode
, ops
);
1766 for (i
= 0; ops
->iterate (i
, &oe
);)
1770 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1772 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1773 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1774 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1786 length
= ops
->length ();
1787 oelast
= ops
->last ();
1790 optimize_ops_list (opcode
, ops
);
1793 /* The following functions are subroutines to optimize_range_tests and allow
1794 it to try to change a logical combination of comparisons into a range
1798 X == 2 || X == 5 || X == 3 || X == 4
1802 (unsigned) (X - 2) <= 3
1804 For more information see comments above fold_test_range in fold-const.c,
1805 this implementation is for GIMPLE. */
1813 bool strict_overflow_p
;
1814 unsigned int idx
, next
;
1817 /* This is similar to make_range in fold-const.c, but on top of
1818 GIMPLE instead of trees. If EXP is non-NULL, it should be
1819 an SSA_NAME and STMT argument is ignored, otherwise STMT
1820 argument should be a GIMPLE_COND. */
1823 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1827 bool is_bool
, strict_overflow_p
;
1831 r
->strict_overflow_p
= false;
1833 r
->high
= NULL_TREE
;
1834 if (exp
!= NULL_TREE
1835 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1838 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1839 and see if we can refine the range. Some of the cases below may not
1840 happen, but it doesn't seem worth worrying about this. We "continue"
1841 the outer loop when we've changed something; otherwise we "break"
1842 the switch, which will "break" the while. */
1843 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1846 strict_overflow_p
= false;
1848 if (exp
== NULL_TREE
)
1850 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1852 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1857 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1862 enum tree_code code
;
1863 tree arg0
, arg1
, exp_type
;
1867 if (exp
!= NULL_TREE
)
1869 if (TREE_CODE (exp
) != SSA_NAME
1870 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
1873 stmt
= SSA_NAME_DEF_STMT (exp
);
1874 if (!is_gimple_assign (stmt
))
1877 code
= gimple_assign_rhs_code (stmt
);
1878 arg0
= gimple_assign_rhs1 (stmt
);
1879 arg1
= gimple_assign_rhs2 (stmt
);
1880 exp_type
= TREE_TYPE (exp
);
1884 code
= gimple_cond_code (stmt
);
1885 arg0
= gimple_cond_lhs (stmt
);
1886 arg1
= gimple_cond_rhs (stmt
);
1887 exp_type
= boolean_type_node
;
1890 if (TREE_CODE (arg0
) != SSA_NAME
)
1892 loc
= gimple_location (stmt
);
1896 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1897 /* Ensure the range is either +[-,0], +[0,0],
1898 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1899 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1900 or similar expression of unconditional true or
1901 false, it should not be negated. */
1902 && ((high
&& integer_zerop (high
))
1903 || (low
&& integer_onep (low
))))
1916 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1918 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1923 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1938 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1940 &strict_overflow_p
);
1941 if (nexp
!= NULL_TREE
)
1944 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
1957 r
->strict_overflow_p
= strict_overflow_p
;
1961 /* Comparison function for qsort. Sort entries
1962 without SSA_NAME exp first, then with SSA_NAMEs sorted
1963 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1964 by increasing ->low and if ->low is the same, by increasing
1965 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1969 range_entry_cmp (const void *a
, const void *b
)
1971 const struct range_entry
*p
= (const struct range_entry
*) a
;
1972 const struct range_entry
*q
= (const struct range_entry
*) b
;
1974 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
1976 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1978 /* Group range_entries for the same SSA_NAME together. */
1979 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
1981 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
1983 /* If ->low is different, NULL low goes first, then by
1985 if (p
->low
!= NULL_TREE
)
1987 if (q
->low
!= NULL_TREE
)
1989 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1991 if (tem
&& integer_onep (tem
))
1993 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
1995 if (tem
&& integer_onep (tem
))
2001 else if (q
->low
!= NULL_TREE
)
2003 /* If ->high is different, NULL high goes last, before that by
2005 if (p
->high
!= NULL_TREE
)
2007 if (q
->high
!= NULL_TREE
)
2009 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2011 if (tem
&& integer_onep (tem
))
2013 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2015 if (tem
&& integer_onep (tem
))
2021 else if (p
->high
!= NULL_TREE
)
2023 /* If both ranges are the same, sort below by ascending idx. */
2028 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2031 if (p
->idx
< q
->idx
)
2035 gcc_checking_assert (p
->idx
> q
->idx
);
2040 /* Helper routine of optimize_range_test.
2041 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2042 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2043 OPCODE and OPS are arguments of optimize_range_tests. Return
2044 true if the range merge has been successful.
2045 If OPCODE is ERROR_MARK, this is called from within
2046 maybe_optimize_range_tests and is performing inter-bb range optimization.
2047 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2051 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2052 unsigned int count
, enum tree_code opcode
,
2053 vec
<operand_entry_t
> *ops
, tree exp
, bool in_p
,
2054 tree low
, tree high
, bool strict_overflow_p
)
2056 operand_entry_t oe
= (*ops
)[range
->idx
];
2058 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) :
2059 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2060 location_t loc
= gimple_location (stmt
);
2061 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2062 tree tem
= build_range_check (loc
, optype
, exp
, in_p
, low
, high
);
2063 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2064 gimple_stmt_iterator gsi
;
2066 if (tem
== NULL_TREE
)
2069 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2070 warning_at (loc
, OPT_Wstrict_overflow
,
2071 "assuming signed overflow does not occur "
2072 "when simplifying range test");
2074 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2076 struct range_entry
*r
;
2077 fprintf (dump_file
, "Optimizing range tests ");
2078 print_generic_expr (dump_file
, range
->exp
, 0);
2079 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2080 print_generic_expr (dump_file
, range
->low
, 0);
2081 fprintf (dump_file
, ", ");
2082 print_generic_expr (dump_file
, range
->high
, 0);
2083 fprintf (dump_file
, "]");
2084 for (r
= otherrange
; r
< otherrange
+ count
; r
++)
2086 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2087 print_generic_expr (dump_file
, r
->low
, 0);
2088 fprintf (dump_file
, ", ");
2089 print_generic_expr (dump_file
, r
->high
, 0);
2090 fprintf (dump_file
, "]");
2092 fprintf (dump_file
, "\n into ");
2093 print_generic_expr (dump_file
, tem
, 0);
2094 fprintf (dump_file
, "\n");
2097 if (opcode
== BIT_IOR_EXPR
2098 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2099 tem
= invert_truthvalue_loc (loc
, tem
);
2101 tem
= fold_convert_loc (loc
, optype
, tem
);
2102 gsi
= gsi_for_stmt (stmt
);
2103 /* In rare cases range->exp can be equal to lhs of stmt.
2104 In that case we have to insert after the stmt rather then before
2106 if (op
== range
->exp
)
2107 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2108 GSI_CONTINUE_LINKING
);
2111 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2115 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2116 if (gimple_uid (gsi_stmt (gsi
)))
2119 gimple_set_uid (gsi_stmt (gsi
), gimple_uid (stmt
));
2126 range
->strict_overflow_p
= false;
2128 for (range
= otherrange
; range
< otherrange
+ count
; range
++)
2130 oe
= (*ops
)[range
->idx
];
2131 /* Now change all the other range test immediate uses, so that
2132 those tests will be optimized away. */
2133 if (opcode
== ERROR_MARK
)
2136 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2137 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2139 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2140 ? boolean_false_node
: boolean_true_node
);
2143 oe
->op
= error_mark_node
;
2144 range
->exp
= NULL_TREE
;
2149 /* Optimize X == CST1 || X == CST2
2150 if popcount (CST1 ^ CST2) == 1 into
2151 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2152 Similarly for ranges. E.g.
2153 X != 2 && X != 3 && X != 10 && X != 11
2154 will be transformed by the previous optimization into
2155 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2156 and this loop can transform that into
2157 !(((X & ~8) - 2U) <= 1U). */
2160 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2161 tree lowi
, tree lowj
, tree highi
, tree highj
,
2162 vec
<operand_entry_t
> *ops
,
2163 struct range_entry
*rangei
,
2164 struct range_entry
*rangej
)
2166 tree lowxor
, highxor
, tem
, exp
;
2167 /* Check highi ^ lowi == highj ^ lowj and
2168 popcount (highi ^ lowi) == 1. */
2169 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2170 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2172 if (tree_log2 (lowxor
) < 0)
2174 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2175 if (!tree_int_cst_equal (lowxor
, highxor
))
2178 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2179 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2180 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2181 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2182 if (update_range_test (rangei
, rangej
, 1, opcode
, ops
, exp
,
2183 rangei
->in_p
, lowj
, highj
,
2184 rangei
->strict_overflow_p
2185 || rangej
->strict_overflow_p
))
2190 /* Optimize X == CST1 || X == CST2
2191 if popcount (CST2 - CST1) == 1 into
2192 ((X - CST1) & ~(CST2 - CST1)) == 0.
2193 Similarly for ranges. E.g.
2194 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2195 || X == 75 || X == 45
2196 will be transformed by the previous optimization into
2197 (X - 43U) <= 3U || (X - 75U) <= 3U
2198 and this loop can transform that into
2199 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2201 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2202 tree lowi
, tree lowj
, tree highi
, tree highj
,
2203 vec
<operand_entry_t
> *ops
,
2204 struct range_entry
*rangei
,
2205 struct range_entry
*rangej
)
2207 tree tem1
, tem2
, mask
;
2208 /* Check highi - lowi == highj - lowj. */
2209 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2210 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2212 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2213 if (!tree_int_cst_equal (tem1
, tem2
))
2215 /* Check popcount (lowj - lowi) == 1. */
2216 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2217 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2219 if (tree_log2 (tem1
) < 0)
2222 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2223 tem1
= fold_binary (MINUS_EXPR
, type
, rangei
->exp
, lowi
);
2224 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2225 lowj
= build_int_cst (type
, 0);
2226 if (update_range_test (rangei
, rangej
, 1, opcode
, ops
, tem1
,
2227 rangei
->in_p
, lowj
, tem2
,
2228 rangei
->strict_overflow_p
2229 || rangej
->strict_overflow_p
))
2234 /* It does some common checks for function optimize_range_tests_xor and
2235 optimize_range_tests_diff.
2236 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2237 Else it calls optimize_range_tests_diff. */
2240 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2241 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2242 struct range_entry
*ranges
)
2245 bool any_changes
= false;
2246 for (i
= first
; i
< length
; i
++)
2248 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2250 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2252 type
= TREE_TYPE (ranges
[i
].exp
);
2253 if (!INTEGRAL_TYPE_P (type
))
2255 lowi
= ranges
[i
].low
;
2256 if (lowi
== NULL_TREE
)
2257 lowi
= TYPE_MIN_VALUE (type
);
2258 highi
= ranges
[i
].high
;
2259 if (highi
== NULL_TREE
)
2261 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2264 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2266 lowj
= ranges
[j
].low
;
2267 if (lowj
== NULL_TREE
)
2269 highj
= ranges
[j
].high
;
2270 if (highj
== NULL_TREE
)
2271 highj
= TYPE_MAX_VALUE (type
);
2272 /* Check lowj > highi. */
2273 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2275 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2278 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2280 ranges
+ i
, ranges
+ j
);
2282 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2284 ranges
+ i
, ranges
+ j
);
2295 /* Optimize range tests, similarly how fold_range_test optimizes
2296 it on trees. The tree code for the binary
2297 operation between all the operands is OPCODE.
2298 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2299 maybe_optimize_range_tests for inter-bb range optimization.
2300 In that case if oe->op is NULL, oe->id is bb->index whose
2301 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2302 the actual opcode. */
2305 optimize_range_tests (enum tree_code opcode
,
2306 vec
<operand_entry_t
> *ops
)
2308 unsigned int length
= ops
->length (), i
, j
, first
;
2310 struct range_entry
*ranges
;
2311 bool any_changes
= false;
2316 ranges
= XNEWVEC (struct range_entry
, length
);
2317 for (i
= 0; i
< length
; i
++)
2321 init_range_entry (ranges
+ i
, oe
->op
,
2323 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2324 /* For | invert it now, we will invert it again before emitting
2325 the optimized expression. */
2326 if (opcode
== BIT_IOR_EXPR
2327 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2328 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2331 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2332 for (i
= 0; i
< length
; i
++)
2333 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2336 /* Try to merge ranges. */
2337 for (first
= i
; i
< length
; i
++)
2339 tree low
= ranges
[i
].low
;
2340 tree high
= ranges
[i
].high
;
2341 int in_p
= ranges
[i
].in_p
;
2342 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2343 int update_fail_count
= 0;
2345 for (j
= i
+ 1; j
< length
; j
++)
2347 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2349 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2350 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2352 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2358 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, j
- i
- 1, opcode
,
2359 ops
, ranges
[i
].exp
, in_p
, low
, high
,
2365 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2366 while update_range_test would fail. */
2367 else if (update_fail_count
== 64)
2370 ++update_fail_count
;
2373 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2376 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2377 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2380 if (any_changes
&& opcode
!= ERROR_MARK
)
2383 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2385 if (oe
->op
== error_mark_node
)
2394 XDELETEVEC (ranges
);
2398 /* Return true if STMT is a cast like:
2404 # _345 = PHI <_123(N), 1(...), 1(...)>
2405 where _234 has bool type, _123 has single use and
2406 bb N has a single successor M. This is commonly used in
2407 the last block of a range test. */
2410 final_range_test_p (gimple stmt
)
2412 basic_block bb
, rhs_bb
;
2415 use_operand_p use_p
;
2418 if (!gimple_assign_cast_p (stmt
))
2420 bb
= gimple_bb (stmt
);
2421 if (!single_succ_p (bb
))
2423 e
= single_succ_edge (bb
);
2424 if (e
->flags
& EDGE_COMPLEX
)
2427 lhs
= gimple_assign_lhs (stmt
);
2428 rhs
= gimple_assign_rhs1 (stmt
);
2429 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2430 || TREE_CODE (rhs
) != SSA_NAME
2431 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2434 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2435 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2438 if (gimple_code (use_stmt
) != GIMPLE_PHI
2439 || gimple_bb (use_stmt
) != e
->dest
)
2442 /* And that the rhs is defined in the same loop. */
2443 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2445 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2451 /* Return true if BB is suitable basic block for inter-bb range test
2452 optimization. If BACKWARD is true, BB should be the only predecessor
2453 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2454 or compared with to find a common basic block to which all conditions
2455 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2456 be the only predecessor of BB. */
2459 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2462 edge_iterator ei
, ei2
;
2465 gimple_stmt_iterator gsi
;
2466 bool other_edge_seen
= false;
2471 /* Check last stmt first. */
2472 stmt
= last_stmt (bb
);
2474 || (gimple_code (stmt
) != GIMPLE_COND
2475 && (backward
|| !final_range_test_p (stmt
)))
2476 || gimple_visited_p (stmt
)
2477 || stmt_could_throw_p (stmt
)
2480 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2483 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2484 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2485 to *OTHER_BB (if not set yet, try to find it out). */
2486 if (EDGE_COUNT (bb
->succs
) != 2)
2488 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2490 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2492 if (e
->dest
== test_bb
)
2501 if (*other_bb
== NULL
)
2503 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2504 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2506 else if (e
->dest
== e2
->dest
)
2507 *other_bb
= e
->dest
;
2508 if (*other_bb
== NULL
)
2511 if (e
->dest
== *other_bb
)
2512 other_edge_seen
= true;
2516 if (*other_bb
== NULL
|| !other_edge_seen
)
2519 else if (single_succ (bb
) != *other_bb
)
2522 /* Now check all PHIs of *OTHER_BB. */
2523 e
= find_edge (bb
, *other_bb
);
2524 e2
= find_edge (test_bb
, *other_bb
);
2525 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2527 gimple phi
= gsi_stmt (gsi
);
2528 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2529 corresponding to BB and TEST_BB predecessor must be the same. */
2530 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2531 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2533 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2534 one of the PHIs should have the lhs of the last stmt in
2535 that block as PHI arg and that PHI should have 0 or 1
2536 corresponding to it in all other range test basic blocks
2540 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2541 == gimple_assign_lhs (stmt
)
2542 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2543 || integer_onep (gimple_phi_arg_def (phi
,
2549 gimple test_last
= last_stmt (test_bb
);
2550 if (gimple_code (test_last
) != GIMPLE_COND
2551 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2552 == gimple_assign_lhs (test_last
)
2553 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2554 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2564 /* Return true if BB doesn't have side-effects that would disallow
2565 range test optimization, all SSA_NAMEs set in the bb are consumed
2566 in the bb and there are no PHIs. */
2569 no_side_effect_bb (basic_block bb
)
2571 gimple_stmt_iterator gsi
;
2574 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2576 last
= last_stmt (bb
);
2577 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2579 gimple stmt
= gsi_stmt (gsi
);
2581 imm_use_iterator imm_iter
;
2582 use_operand_p use_p
;
2584 if (is_gimple_debug (stmt
))
2586 if (gimple_has_side_effects (stmt
))
2590 if (!is_gimple_assign (stmt
))
2592 lhs
= gimple_assign_lhs (stmt
);
2593 if (TREE_CODE (lhs
) != SSA_NAME
)
2595 if (gimple_assign_rhs_could_trap_p (stmt
))
2597 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2599 gimple use_stmt
= USE_STMT (use_p
);
2600 if (is_gimple_debug (use_stmt
))
2602 if (gimple_bb (use_stmt
) != bb
)
2609 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2610 return true and fill in *OPS recursively. */
2613 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2616 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2620 if (!is_reassociable_op (stmt
, code
, loop
))
2623 rhs
[0] = gimple_assign_rhs1 (stmt
);
2624 rhs
[1] = gimple_assign_rhs2 (stmt
);
2625 gimple_set_visited (stmt
, true);
2626 for (i
= 0; i
< 2; i
++)
2627 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2628 && !get_ops (rhs
[i
], code
, ops
, loop
)
2629 && has_single_use (rhs
[i
]))
2631 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
2637 ops
->safe_push (oe
);
2642 /* Find the ops that were added by get_ops starting from VAR, see if
2643 they were changed during update_range_test and if yes, create new
2647 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
2648 unsigned int *pidx
, struct loop
*loop
)
2650 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2654 if (!is_reassociable_op (stmt
, code
, loop
))
2657 rhs
[0] = gimple_assign_rhs1 (stmt
);
2658 rhs
[1] = gimple_assign_rhs2 (stmt
);
2661 for (i
= 0; i
< 2; i
++)
2662 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
2664 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
2665 if (rhs
[2 + i
] == NULL_TREE
)
2667 if (has_single_use (rhs
[i
]))
2668 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
2670 rhs
[2 + i
] = rhs
[i
];
2673 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
2674 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
2676 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
2677 var
= make_ssa_name (TREE_TYPE (var
), NULL
);
2678 gimple g
= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
2679 var
, rhs
[2], rhs
[3]);
2680 gimple_set_uid (g
, gimple_uid (stmt
));
2681 gimple_set_visited (g
, true);
2682 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2687 /* Structure to track the initial value passed to get_ops and
2688 the range in the ops vector for each basic block. */
2690 struct inter_bb_range_test_entry
2693 unsigned int first_idx
, last_idx
;
2696 /* Inter-bb range test optimization. */
2699 maybe_optimize_range_tests (gimple stmt
)
2701 basic_block first_bb
= gimple_bb (stmt
);
2702 basic_block last_bb
= first_bb
;
2703 basic_block other_bb
= NULL
;
2707 auto_vec
<operand_entry_t
> ops
;
2708 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
2709 bool any_changes
= false;
2711 /* Consider only basic blocks that end with GIMPLE_COND or
2712 a cast statement satisfying final_range_test_p. All
2713 but the last bb in the first_bb .. last_bb range
2714 should end with GIMPLE_COND. */
2715 if (gimple_code (stmt
) == GIMPLE_COND
)
2717 if (EDGE_COUNT (first_bb
->succs
) != 2)
2720 else if (final_range_test_p (stmt
))
2721 other_bb
= single_succ (first_bb
);
2725 if (stmt_could_throw_p (stmt
))
2728 /* As relative ordering of post-dominator sons isn't fixed,
2729 maybe_optimize_range_tests can be called first on any
2730 bb in the range we want to optimize. So, start searching
2731 backwards, if first_bb can be set to a predecessor. */
2732 while (single_pred_p (first_bb
))
2734 basic_block pred_bb
= single_pred (first_bb
);
2735 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
2737 if (!no_side_effect_bb (first_bb
))
2741 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2742 Before starting forward search in last_bb successors, find
2743 out the other_bb. */
2744 if (first_bb
== last_bb
)
2747 /* As non-GIMPLE_COND last stmt always terminates the range,
2748 if forward search didn't discover anything, just give up. */
2749 if (gimple_code (stmt
) != GIMPLE_COND
)
2751 /* Look at both successors. Either it ends with a GIMPLE_COND
2752 and satisfies suitable_cond_bb, or ends with a cast and
2753 other_bb is that cast's successor. */
2754 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
2755 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
2756 || e
->dest
== first_bb
)
2758 else if (single_pred_p (e
->dest
))
2760 stmt
= last_stmt (e
->dest
);
2762 && gimple_code (stmt
) == GIMPLE_COND
2763 && EDGE_COUNT (e
->dest
->succs
) == 2)
2765 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
2771 && final_range_test_p (stmt
)
2772 && find_edge (first_bb
, single_succ (e
->dest
)))
2774 other_bb
= single_succ (e
->dest
);
2775 if (other_bb
== first_bb
)
2779 if (other_bb
== NULL
)
2782 /* Now do the forward search, moving last_bb to successor bbs
2783 that aren't other_bb. */
2784 while (EDGE_COUNT (last_bb
->succs
) == 2)
2786 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
2787 if (e
->dest
!= other_bb
)
2791 if (!single_pred_p (e
->dest
))
2793 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
2795 if (!no_side_effect_bb (e
->dest
))
2799 if (first_bb
== last_bb
)
2801 /* Here basic blocks first_bb through last_bb's predecessor
2802 end with GIMPLE_COND, all of them have one of the edges to
2803 other_bb and another to another block in the range,
2804 all blocks except first_bb don't have side-effects and
2805 last_bb ends with either GIMPLE_COND, or cast satisfying
2806 final_range_test_p. */
2807 for (bb
= last_bb
; ; bb
= single_pred (bb
))
2809 enum tree_code code
;
2811 inter_bb_range_test_entry bb_ent
;
2813 bb_ent
.op
= NULL_TREE
;
2814 bb_ent
.first_idx
= ops
.length ();
2815 bb_ent
.last_idx
= bb_ent
.first_idx
;
2816 e
= find_edge (bb
, other_bb
);
2817 stmt
= last_stmt (bb
);
2818 gimple_set_visited (stmt
, true);
2819 if (gimple_code (stmt
) != GIMPLE_COND
)
2821 use_operand_p use_p
;
2826 lhs
= gimple_assign_lhs (stmt
);
2827 rhs
= gimple_assign_rhs1 (stmt
);
2828 gcc_assert (bb
== last_bb
);
2835 # _345 = PHI <_123(N), 1(...), 1(...)>
2837 or 0 instead of 1. If it is 0, the _234
2838 range test is anded together with all the
2839 other range tests, if it is 1, it is ored with
2841 single_imm_use (lhs
, &use_p
, &phi
);
2842 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
2843 e2
= find_edge (first_bb
, other_bb
);
2845 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
2846 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
2847 code
= BIT_AND_EXPR
;
2850 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
2851 code
= BIT_IOR_EXPR
;
2854 /* If _234 SSA_NAME_DEF_STMT is
2856 (or &, corresponding to 1/0 in the phi arguments,
2857 push into ops the individual range test arguments
2858 of the bitwise or resp. and, recursively. */
2859 if (!get_ops (rhs
, code
, &ops
,
2860 loop_containing_stmt (stmt
))
2861 && has_single_use (rhs
))
2863 /* Otherwise, push the _234 range test itself. */
2865 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
2875 bb_ent
.last_idx
= ops
.length ();
2877 bbinfo
.safe_push (bb_ent
);
2880 /* Otherwise stmt is GIMPLE_COND. */
2881 code
= gimple_cond_code (stmt
);
2882 lhs
= gimple_cond_lhs (stmt
);
2883 rhs
= gimple_cond_rhs (stmt
);
2884 if (TREE_CODE (lhs
) == SSA_NAME
2885 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2886 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
2887 || rhs
!= boolean_false_node
2888 /* Either push into ops the individual bitwise
2889 or resp. and operands, depending on which
2890 edge is other_bb. */
2891 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
2892 ^ (code
== EQ_EXPR
))
2893 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
2894 loop_containing_stmt (stmt
))))
2896 /* Or push the GIMPLE_COND stmt itself. */
2898 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
2901 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
2902 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
2903 /* oe->op = NULL signs that there is no SSA_NAME
2904 for the range test, and oe->id instead is the
2905 basic block number, at which's end the GIMPLE_COND
2913 else if (ops
.length () > bb_ent
.first_idx
)
2916 bb_ent
.last_idx
= ops
.length ();
2918 bbinfo
.safe_push (bb_ent
);
2922 if (ops
.length () > 1)
2923 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
2927 /* update_ops relies on has_single_use predicates returning the
2928 same values as it did during get_ops earlier. Additionally it
2929 never removes statements, only adds new ones and it should walk
2930 from the single imm use and check the predicate already before
2931 making those changes.
2932 On the other side, the handling of GIMPLE_COND directly can turn
2933 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2934 it needs to be done in a separate loop afterwards. */
2935 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
2937 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
2938 && bbinfo
[idx
].op
!= NULL_TREE
)
2942 stmt
= last_stmt (bb
);
2943 new_op
= update_ops (bbinfo
[idx
].op
,
2945 ops
[bbinfo
[idx
].first_idx
]->rank
,
2946 ops
, &bbinfo
[idx
].first_idx
,
2947 loop_containing_stmt (stmt
));
2948 if (new_op
== NULL_TREE
)
2950 gcc_assert (bb
== last_bb
);
2951 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
2953 if (bbinfo
[idx
].op
!= new_op
)
2955 imm_use_iterator iter
;
2956 use_operand_p use_p
;
2957 gimple use_stmt
, cast_stmt
= NULL
;
2959 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
2960 if (is_gimple_debug (use_stmt
))
2962 else if (gimple_code (use_stmt
) == GIMPLE_COND
2963 || gimple_code (use_stmt
) == GIMPLE_PHI
)
2964 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2965 SET_USE (use_p
, new_op
);
2966 else if (gimple_assign_cast_p (use_stmt
))
2967 cast_stmt
= use_stmt
;
2972 gcc_assert (bb
== last_bb
);
2973 tree lhs
= gimple_assign_lhs (cast_stmt
);
2974 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
2975 enum tree_code rhs_code
2976 = gimple_assign_rhs_code (cast_stmt
);
2978 if (is_gimple_min_invariant (new_op
))
2980 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
2981 g
= gimple_build_assign (new_lhs
, new_op
);
2984 g
= gimple_build_assign_with_ops (rhs_code
, new_lhs
,
2986 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
2987 gimple_set_uid (g
, gimple_uid (cast_stmt
));
2988 gimple_set_visited (g
, true);
2989 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2990 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
2991 if (is_gimple_debug (use_stmt
))
2993 else if (gimple_code (use_stmt
) == GIMPLE_COND
2994 || gimple_code (use_stmt
) == GIMPLE_PHI
)
2995 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2996 SET_USE (use_p
, new_lhs
);
3005 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3007 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3008 && bbinfo
[idx
].op
== NULL_TREE
3009 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
3011 stmt
= last_stmt (bb
);
3012 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
3013 gimple_cond_make_false (stmt
);
3014 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
3015 gimple_cond_make_true (stmt
);
3018 gimple_cond_set_code (stmt
, NE_EXPR
);
3019 gimple_cond_set_lhs (stmt
, ops
[bbinfo
[idx
].first_idx
]->op
);
3020 gimple_cond_set_rhs (stmt
, boolean_false_node
);
3030 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3031 of STMT in it's operands. This is also known as a "destructive
3032 update" operation. */
3035 is_phi_for_stmt (gimple stmt
, tree operand
)
3039 use_operand_p arg_p
;
3042 if (TREE_CODE (operand
) != SSA_NAME
)
3045 lhs
= gimple_assign_lhs (stmt
);
3047 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3048 if (gimple_code (def_stmt
) != GIMPLE_PHI
)
3051 FOR_EACH_PHI_ARG (arg_p
, def_stmt
, i
, SSA_OP_USE
)
3052 if (lhs
== USE_FROM_PTR (arg_p
))
3057 /* Remove def stmt of VAR if VAR has zero uses and recurse
3058 on rhs1 operand if so. */
3061 remove_visited_stmt_chain (tree var
)
3064 gimple_stmt_iterator gsi
;
3068 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3070 stmt
= SSA_NAME_DEF_STMT (var
);
3071 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3073 var
= gimple_assign_rhs1 (stmt
);
3074 gsi
= gsi_for_stmt (stmt
);
3075 gsi_remove (&gsi
, true);
3076 release_defs (stmt
);
3083 /* This function checks three consequtive operands in
3084 passed operands vector OPS starting from OPINDEX and
3085 swaps two operands if it is profitable for binary operation
3086 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3088 We pair ops with the same rank if possible.
3090 The alternative we try is to see if STMT is a destructive
3091 update style statement, which is like:
3094 In that case, we want to use the destructive update form to
3095 expose the possible vectorizer sum reduction opportunity.
3096 In that case, the third operand will be the phi node. This
3097 check is not performed if STMT is null.
3099 We could, of course, try to be better as noted above, and do a
3100 lot of work to try to find these opportunities in >3 operand
3101 cases, but it is unlikely to be worth it. */
3104 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3105 unsigned int opindex
, gimple stmt
)
3107 operand_entry_t oe1
, oe2
, oe3
;
3110 oe2
= ops
[opindex
+ 1];
3111 oe3
= ops
[opindex
+ 2];
3113 if ((oe1
->rank
== oe2
->rank
3114 && oe2
->rank
!= oe3
->rank
)
3115 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3116 && !is_phi_for_stmt (stmt
, oe1
->op
)
3117 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3119 struct operand_entry temp
= *oe3
;
3121 oe3
->rank
= oe1
->rank
;
3123 oe1
->rank
= temp
.rank
;
3125 else if ((oe1
->rank
== oe3
->rank
3126 && oe2
->rank
!= oe3
->rank
)
3127 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3128 && !is_phi_for_stmt (stmt
, oe1
->op
)
3129 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3131 struct operand_entry temp
= *oe2
;
3133 oe2
->rank
= oe1
->rank
;
3135 oe1
->rank
= temp
.rank
;
3139 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3140 two definitions, otherwise return STMT. */
3142 static inline gimple
3143 find_insert_point (gimple stmt
, tree rhs1
, tree rhs2
)
3145 if (TREE_CODE (rhs1
) == SSA_NAME
3146 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3147 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3148 if (TREE_CODE (rhs2
) == SSA_NAME
3149 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3150 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3154 /* Recursively rewrite our linearized statements so that the operators
3155 match those in OPS[OPINDEX], putting the computation in rank
3156 order. Return new lhs. */
3159 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3160 vec
<operand_entry_t
> ops
, bool changed
)
3162 tree rhs1
= gimple_assign_rhs1 (stmt
);
3163 tree rhs2
= gimple_assign_rhs2 (stmt
);
3164 tree lhs
= gimple_assign_lhs (stmt
);
3167 /* The final recursion case for this function is that you have
3168 exactly two operations left.
3169 If we had one exactly one op in the entire list to start with, we
3170 would have never called this function, and the tail recursion
3171 rewrites them one at a time. */
3172 if (opindex
+ 2 == ops
.length ())
3174 operand_entry_t oe1
, oe2
;
3177 oe2
= ops
[opindex
+ 1];
3179 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3181 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3182 unsigned int uid
= gimple_uid (stmt
);
3184 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3186 fprintf (dump_file
, "Transforming ");
3187 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3192 gimple insert_point
= find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3193 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3195 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
3196 lhs
, oe1
->op
, oe2
->op
);
3197 gimple_set_uid (stmt
, uid
);
3198 gimple_set_visited (stmt
, true);
3199 if (insert_point
== gsi_stmt (gsi
))
3200 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3202 insert_stmt_after (stmt
, insert_point
);
3206 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3208 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3209 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3213 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3214 remove_visited_stmt_chain (rhs1
);
3216 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3218 fprintf (dump_file
, " into ");
3219 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3225 /* If we hit here, we should have 3 or more ops left. */
3226 gcc_assert (opindex
+ 2 < ops
.length ());
3228 /* Rewrite the next operator. */
3231 /* Recurse on the LHS of the binary operator, which is guaranteed to
3232 be the non-leaf side. */
3234 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3235 changed
|| oe
->op
!= rhs2
);
3237 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3239 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3241 fprintf (dump_file
, "Transforming ");
3242 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3245 /* If changed is false, this is either opindex == 0
3246 or all outer rhs2's were equal to corresponding oe->op,
3247 and powi_result is NULL.
3248 That means lhs is equivalent before and after reassociation.
3249 Otherwise ensure the old lhs SSA_NAME is not reused and
3250 create a new stmt as well, so that any debug stmts will be
3251 properly adjusted. */
3254 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3255 unsigned int uid
= gimple_uid (stmt
);
3256 gimple insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3258 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3259 stmt
= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
3260 lhs
, new_rhs1
, oe
->op
);
3261 gimple_set_uid (stmt
, uid
);
3262 gimple_set_visited (stmt
, true);
3263 if (insert_point
== gsi_stmt (gsi
))
3264 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3266 insert_stmt_after (stmt
, insert_point
);
3270 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3272 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3273 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3277 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3279 fprintf (dump_file
, " into ");
3280 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3286 /* Find out how many cycles we need to compute statements chain.
3287 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3288 maximum number of independent statements we may execute per cycle. */
3291 get_required_cycles (int ops_num
, int cpu_width
)
3297 /* While we have more than 2 * cpu_width operands
3298 we may reduce number of operands by cpu_width
3300 res
= ops_num
/ (2 * cpu_width
);
3302 /* Remained operands count may be reduced twice per cycle
3303 until we have only one operand. */
3304 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3305 elog
= exact_log2 (rest
);
3309 res
+= floor_log2 (rest
) + 1;
3314 /* Returns an optimal number of registers to use for computation of
3315 given statements. */
3318 get_reassociation_width (int ops_num
, enum tree_code opc
,
3319 enum machine_mode mode
)
3321 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3326 if (param_width
> 0)
3327 width
= param_width
;
3329 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3334 /* Get the minimal time required for sequence computation. */
3335 cycles_best
= get_required_cycles (ops_num
, width
);
3337 /* Check if we may use less width and still compute sequence for
3338 the same time. It will allow us to reduce registers usage.
3339 get_required_cycles is monotonically increasing with lower width
3340 so we can perform a binary search for the minimal width that still
3341 results in the optimal cycle count. */
3343 while (width
> width_min
)
3345 int width_mid
= (width
+ width_min
) / 2;
3347 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3349 else if (width_min
< width_mid
)
3350 width_min
= width_mid
;
3358 /* Recursively rewrite our linearized statements so that the operators
3359 match those in OPS[OPINDEX], putting the computation in rank
3360 order and trying to allow operations to be executed in
3364 rewrite_expr_tree_parallel (gimple stmt
, int width
,
3365 vec
<operand_entry_t
> ops
)
3367 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3368 int op_num
= ops
.length ();
3369 int stmt_num
= op_num
- 1;
3370 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3371 int op_index
= op_num
- 1;
3373 int ready_stmts_end
= 0;
3375 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3377 /* We start expression rewriting from the top statements.
3378 So, in this loop we create a full list of statements
3379 we will work with. */
3380 stmts
[stmt_num
- 1] = stmt
;
3381 for (i
= stmt_num
- 2; i
>= 0; i
--)
3382 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3384 for (i
= 0; i
< stmt_num
; i
++)
3388 /* Determine whether we should use results of
3389 already handled statements or not. */
3390 if (ready_stmts_end
== 0
3391 && (i
- stmt_index
>= width
|| op_index
< 1))
3392 ready_stmts_end
= i
;
3394 /* Now we choose operands for the next statement. Non zero
3395 value in ready_stmts_end means here that we should use
3396 the result of already generated statements as new operand. */
3397 if (ready_stmts_end
> 0)
3399 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3400 if (ready_stmts_end
> stmt_index
)
3401 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3402 else if (op_index
>= 0)
3403 op2
= ops
[op_index
--]->op
;
3406 gcc_assert (stmt_index
< i
);
3407 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3410 if (stmt_index
>= ready_stmts_end
)
3411 ready_stmts_end
= 0;
3416 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3417 op2
= ops
[op_index
--]->op
;
3418 op1
= ops
[op_index
--]->op
;
3421 /* If we emit the last statement then we should put
3422 operands into the last statement. It will also
3424 if (op_index
< 0 && stmt_index
== i
)
3427 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3429 fprintf (dump_file
, "Transforming ");
3430 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3433 /* We keep original statement only for the last one. All
3434 others are recreated. */
3435 if (i
== stmt_num
- 1)
3437 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3438 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3439 update_stmt (stmts
[i
]);
3442 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3444 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3446 fprintf (dump_file
, " into ");
3447 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3451 remove_visited_stmt_chain (last_rhs1
);
3454 /* Transform STMT, which is really (A +B) + (C + D) into the left
3455 linear form, ((A+B)+C)+D.
3456 Recurse on D if necessary. */
3459 linearize_expr (gimple stmt
)
3461 gimple_stmt_iterator gsi
;
3462 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3463 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3464 gimple oldbinrhs
= binrhs
;
3465 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3466 gimple newbinrhs
= NULL
;
3467 struct loop
*loop
= loop_containing_stmt (stmt
);
3468 tree lhs
= gimple_assign_lhs (stmt
);
3470 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3471 && is_reassociable_op (binrhs
, rhscode
, loop
));
3473 gsi
= gsi_for_stmt (stmt
);
3475 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3476 binrhs
= gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs
),
3477 make_ssa_name (TREE_TYPE (lhs
), NULL
),
3478 gimple_assign_lhs (binlhs
),
3479 gimple_assign_rhs2 (binrhs
));
3480 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3481 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3482 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3484 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3485 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3487 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3489 fprintf (dump_file
, "Linearized: ");
3490 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3493 reassociate_stats
.linearized
++;
3496 gsi
= gsi_for_stmt (oldbinrhs
);
3497 gsi_remove (&gsi
, true);
3498 release_defs (oldbinrhs
);
3500 gimple_set_visited (stmt
, true);
3501 gimple_set_visited (binlhs
, true);
3502 gimple_set_visited (binrhs
, true);
3504 /* Tail recurse on the new rhs if it still needs reassociation. */
3505 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3506 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3507 want to change the algorithm while converting to tuples. */
3508 linearize_expr (stmt
);
3511 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3512 it. Otherwise, return NULL. */
3515 get_single_immediate_use (tree lhs
)
3517 use_operand_p immuse
;
3520 if (TREE_CODE (lhs
) == SSA_NAME
3521 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3522 && is_gimple_assign (immusestmt
))
3528 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3529 representing the negated value. Insertions of any necessary
3530 instructions go before GSI.
3531 This function is recursive in that, if you hand it "a_5" as the
3532 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3533 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3536 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3538 gimple negatedefstmt
= NULL
;
3539 tree resultofnegate
;
3540 gimple_stmt_iterator gsi
;
3543 /* If we are trying to negate a name, defined by an add, negate the
3544 add operands instead. */
3545 if (TREE_CODE (tonegate
) == SSA_NAME
)
3546 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3547 if (TREE_CODE (tonegate
) == SSA_NAME
3548 && is_gimple_assign (negatedefstmt
)
3549 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3550 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3551 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3553 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3554 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3555 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3558 gsi
= gsi_for_stmt (negatedefstmt
);
3559 rhs1
= negate_value (rhs1
, &gsi
);
3561 gsi
= gsi_for_stmt (negatedefstmt
);
3562 rhs2
= negate_value (rhs2
, &gsi
);
3564 gsi
= gsi_for_stmt (negatedefstmt
);
3565 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3566 gimple_set_visited (negatedefstmt
, true);
3567 g
= gimple_build_assign_with_ops (PLUS_EXPR
, lhs
, rhs1
, rhs2
);
3568 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3569 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3573 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3574 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3575 NULL_TREE
, true, GSI_SAME_STMT
);
3577 uid
= gimple_uid (gsi_stmt (gsi
));
3578 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3580 gimple stmt
= gsi_stmt (gsi
);
3581 if (gimple_uid (stmt
) != 0)
3583 gimple_set_uid (stmt
, uid
);
3585 return resultofnegate
;
3588 /* Return true if we should break up the subtract in STMT into an add
3589 with negate. This is true when we the subtract operands are really
3590 adds, or the subtract itself is used in an add expression. In
3591 either case, breaking up the subtract into an add with negate
3592 exposes the adds to reassociation. */
3595 should_break_up_subtract (gimple stmt
)
3597 tree lhs
= gimple_assign_lhs (stmt
);
3598 tree binlhs
= gimple_assign_rhs1 (stmt
);
3599 tree binrhs
= gimple_assign_rhs2 (stmt
);
3601 struct loop
*loop
= loop_containing_stmt (stmt
);
3603 if (TREE_CODE (binlhs
) == SSA_NAME
3604 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3607 if (TREE_CODE (binrhs
) == SSA_NAME
3608 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3611 if (TREE_CODE (lhs
) == SSA_NAME
3612 && (immusestmt
= get_single_immediate_use (lhs
))
3613 && is_gimple_assign (immusestmt
)
3614 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3615 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3620 /* Transform STMT from A - B into A + -B. */
3623 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3625 tree rhs1
= gimple_assign_rhs1 (stmt
);
3626 tree rhs2
= gimple_assign_rhs2 (stmt
);
3628 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3630 fprintf (dump_file
, "Breaking up subtract ");
3631 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3634 rhs2
= negate_value (rhs2
, gsip
);
3635 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3639 /* Determine whether STMT is a builtin call that raises an SSA name
3640 to an integer power and has only one use. If so, and this is early
3641 reassociation and unsafe math optimizations are permitted, place
3642 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3643 If any of these conditions does not hold, return FALSE. */
3646 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3649 REAL_VALUE_TYPE c
, cint
;
3651 if (!first_pass_instance
3652 || !flag_unsafe_math_optimizations
3653 || !is_gimple_call (stmt
)
3654 || !has_single_use (gimple_call_lhs (stmt
)))
3657 fndecl
= gimple_call_fndecl (stmt
);
3660 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3663 switch (DECL_FUNCTION_CODE (fndecl
))
3665 CASE_FLT_FN (BUILT_IN_POW
):
3666 *base
= gimple_call_arg (stmt
, 0);
3667 arg1
= gimple_call_arg (stmt
, 1);
3669 if (TREE_CODE (arg1
) != REAL_CST
)
3672 c
= TREE_REAL_CST (arg1
);
3674 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
3677 *exponent
= real_to_integer (&c
);
3678 real_from_integer (&cint
, VOIDmode
, *exponent
,
3679 *exponent
< 0 ? -1 : 0, 0);
3680 if (!real_identical (&c
, &cint
))
3685 CASE_FLT_FN (BUILT_IN_POWI
):
3686 *base
= gimple_call_arg (stmt
, 0);
3687 arg1
= gimple_call_arg (stmt
, 1);
3689 if (!tree_fits_shwi_p (arg1
))
3692 *exponent
= tree_to_shwi (arg1
);
3699 /* Expanding negative exponents is generally unproductive, so we don't
3700 complicate matters with those. Exponents of zero and one should
3701 have been handled by expression folding. */
3702 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
3708 /* Recursively linearize a binary expression that is the RHS of STMT.
3709 Place the operands of the expression tree in the vector named OPS. */
3712 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
3713 bool is_associative
, bool set_visited
)
3715 tree binlhs
= gimple_assign_rhs1 (stmt
);
3716 tree binrhs
= gimple_assign_rhs2 (stmt
);
3717 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
3718 bool binlhsisreassoc
= false;
3719 bool binrhsisreassoc
= false;
3720 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3721 struct loop
*loop
= loop_containing_stmt (stmt
);
3722 tree base
= NULL_TREE
;
3723 HOST_WIDE_INT exponent
= 0;
3726 gimple_set_visited (stmt
, true);
3728 if (TREE_CODE (binlhs
) == SSA_NAME
)
3730 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
3731 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
3732 && !stmt_could_throw_p (binlhsdef
));
3735 if (TREE_CODE (binrhs
) == SSA_NAME
)
3737 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
3738 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
3739 && !stmt_could_throw_p (binrhsdef
));
3742 /* If the LHS is not reassociable, but the RHS is, we need to swap
3743 them. If neither is reassociable, there is nothing we can do, so
3744 just put them in the ops vector. If the LHS is reassociable,
3745 linearize it. If both are reassociable, then linearize the RHS
3748 if (!binlhsisreassoc
)
3752 /* If this is not a associative operation like division, give up. */
3753 if (!is_associative
)
3755 add_to_ops_vec (ops
, binrhs
);
3759 if (!binrhsisreassoc
)
3761 if (rhscode
== MULT_EXPR
3762 && TREE_CODE (binrhs
) == SSA_NAME
3763 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
3765 add_repeat_to_ops_vec (ops
, base
, exponent
);
3766 gimple_set_visited (binrhsdef
, true);
3769 add_to_ops_vec (ops
, binrhs
);
3771 if (rhscode
== MULT_EXPR
3772 && TREE_CODE (binlhs
) == SSA_NAME
3773 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
3775 add_repeat_to_ops_vec (ops
, base
, exponent
);
3776 gimple_set_visited (binlhsdef
, true);
3779 add_to_ops_vec (ops
, binlhs
);
3784 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3786 fprintf (dump_file
, "swapping operands of ");
3787 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3790 swap_ssa_operands (stmt
,
3791 gimple_assign_rhs1_ptr (stmt
),
3792 gimple_assign_rhs2_ptr (stmt
));
3795 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3797 fprintf (dump_file
, " is now ");
3798 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3801 /* We want to make it so the lhs is always the reassociative op,
3807 else if (binrhsisreassoc
)
3809 linearize_expr (stmt
);
3810 binlhs
= gimple_assign_rhs1 (stmt
);
3811 binrhs
= gimple_assign_rhs2 (stmt
);
3814 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
3815 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
3817 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
3818 is_associative
, set_visited
);
3820 if (rhscode
== MULT_EXPR
3821 && TREE_CODE (binrhs
) == SSA_NAME
3822 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
3824 add_repeat_to_ops_vec (ops
, base
, exponent
);
3825 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
3828 add_to_ops_vec (ops
, binrhs
);
3831 /* Repropagate the negates back into subtracts, since no other pass
3832 currently does it. */
3835 repropagate_negates (void)
3840 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
3842 gimple user
= get_single_immediate_use (negate
);
3844 if (!user
|| !is_gimple_assign (user
))
3847 /* The negate operand can be either operand of a PLUS_EXPR
3848 (it can be the LHS if the RHS is a constant for example).
3850 Force the negate operand to the RHS of the PLUS_EXPR, then
3851 transform the PLUS_EXPR into a MINUS_EXPR. */
3852 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
3854 /* If the negated operand appears on the LHS of the
3855 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3856 to force the negated operand to the RHS of the PLUS_EXPR. */
3857 if (gimple_assign_rhs1 (user
) == negate
)
3859 swap_ssa_operands (user
,
3860 gimple_assign_rhs1_ptr (user
),
3861 gimple_assign_rhs2_ptr (user
));
3864 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3865 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3866 if (gimple_assign_rhs2 (user
) == negate
)
3868 tree rhs1
= gimple_assign_rhs1 (user
);
3869 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
3870 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
3871 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
3875 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
3877 if (gimple_assign_rhs1 (user
) == negate
)
3882 which we transform into
3885 This pushes down the negate which we possibly can merge
3886 into some other operation, hence insert it into the
3887 plus_negates vector. */
3888 gimple feed
= SSA_NAME_DEF_STMT (negate
);
3889 tree a
= gimple_assign_rhs1 (feed
);
3890 tree b
= gimple_assign_rhs2 (user
);
3891 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
3892 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
3893 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)), NULL
);
3894 gimple g
= gimple_build_assign_with_ops (PLUS_EXPR
, x
, a
, b
);
3895 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
3896 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
, NULL
);
3897 user
= gsi_stmt (gsi2
);
3899 gsi_remove (&gsi
, true);
3900 release_defs (feed
);
3901 plus_negates
.safe_push (gimple_assign_lhs (user
));
3905 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3906 rid of one operation. */
3907 gimple feed
= SSA_NAME_DEF_STMT (negate
);
3908 tree a
= gimple_assign_rhs1 (feed
);
3909 tree rhs1
= gimple_assign_rhs1 (user
);
3910 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
3911 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
3912 update_stmt (gsi_stmt (gsi
));
3918 /* Returns true if OP is of a type for which we can do reassociation.
3919 That is for integral or non-saturating fixed-point types, and for
3920 floating point type when associative-math is enabled. */
3923 can_reassociate_p (tree op
)
3925 tree type
= TREE_TYPE (op
);
3926 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
3927 || NON_SAT_FIXED_POINT_TYPE_P (type
)
3928 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
3933 /* Break up subtract operations in block BB.
3935 We do this top down because we don't know whether the subtract is
3936 part of a possible chain of reassociation except at the top.
3945 we want to break up k = t - q, but we won't until we've transformed q
3946 = b - r, which won't be broken up until we transform b = c - d.
3948 En passant, clear the GIMPLE visited flag on every statement
3949 and set UIDs within each basic block. */
3952 break_up_subtract_bb (basic_block bb
)
3954 gimple_stmt_iterator gsi
;
3956 unsigned int uid
= 1;
3958 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3960 gimple stmt
= gsi_stmt (gsi
);
3961 gimple_set_visited (stmt
, false);
3962 gimple_set_uid (stmt
, uid
++);
3964 if (!is_gimple_assign (stmt
)
3965 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
3968 /* Look for simple gimple subtract operations. */
3969 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
3971 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
3972 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
3975 /* Check for a subtract used only in an addition. If this
3976 is the case, transform it into add of a negate for better
3977 reassociation. IE transform C = A-B into C = A + -B if C
3978 is only used in an addition. */
3979 if (should_break_up_subtract (stmt
))
3980 break_up_subtract (stmt
, &gsi
);
3982 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
3983 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
3984 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
3986 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
3988 son
= next_dom_son (CDI_DOMINATORS
, son
))
3989 break_up_subtract_bb (son
);
3992 /* Used for repeated factor analysis. */
3993 struct repeat_factor_d
3995 /* An SSA name that occurs in a multiply chain. */
3998 /* Cached rank of the factor. */
4001 /* Number of occurrences of the factor in the chain. */
4002 HOST_WIDE_INT count
;
4004 /* An SSA name representing the product of this factor and
4005 all factors appearing later in the repeated factor vector. */
4009 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
4010 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
4013 static vec
<repeat_factor
> repeat_factor_vec
;
4015 /* Used for sorting the repeat factor vector. Sort primarily by
4016 ascending occurrence count, secondarily by descending rank. */
4019 compare_repeat_factors (const void *x1
, const void *x2
)
4021 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
4022 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
4024 if (rf1
->count
!= rf2
->count
)
4025 return rf1
->count
- rf2
->count
;
4027 return rf2
->rank
- rf1
->rank
;
4030 /* Look for repeated operands in OPS in the multiply tree rooted at
4031 STMT. Replace them with an optimal sequence of multiplies and powi
4032 builtin calls, and remove the used operands from OPS. Return an
4033 SSA name representing the value of the replacement sequence. */
4036 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
4038 unsigned i
, j
, vec_len
;
4041 repeat_factor_t rf1
, rf2
;
4042 repeat_factor rfnew
;
4043 tree result
= NULL_TREE
;
4044 tree target_ssa
, iter_result
;
4045 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4046 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4047 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4048 gimple mul_stmt
, pow_stmt
;
4050 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4055 /* Allocate the repeated factor vector. */
4056 repeat_factor_vec
.create (10);
4058 /* Scan the OPS vector for all SSA names in the product and build
4059 up a vector of occurrence counts for each factor. */
4060 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4062 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4064 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4066 if (rf1
->factor
== oe
->op
)
4068 rf1
->count
+= oe
->count
;
4073 if (j
>= repeat_factor_vec
.length ())
4075 rfnew
.factor
= oe
->op
;
4076 rfnew
.rank
= oe
->rank
;
4077 rfnew
.count
= oe
->count
;
4078 rfnew
.repr
= NULL_TREE
;
4079 repeat_factor_vec
.safe_push (rfnew
);
4084 /* Sort the repeated factor vector by (a) increasing occurrence count,
4085 and (b) decreasing rank. */
4086 repeat_factor_vec
.qsort (compare_repeat_factors
);
4088 /* It is generally best to combine as many base factors as possible
4089 into a product before applying __builtin_powi to the result.
4090 However, the sort order chosen for the repeated factor vector
4091 allows us to cache partial results for the product of the base
4092 factors for subsequent use. When we already have a cached partial
4093 result from a previous iteration, it is best to make use of it
4094 before looking for another __builtin_pow opportunity.
4096 As an example, consider x * x * y * y * y * z * z * z * z.
4097 We want to first compose the product x * y * z, raise it to the
4098 second power, then multiply this by y * z, and finally multiply
4099 by z. This can be done in 5 multiplies provided we cache y * z
4100 for use in both expressions:
4108 If we instead ignored the cached y * z and first multiplied by
4109 the __builtin_pow opportunity z * z, we would get the inferior:
4118 vec_len
= repeat_factor_vec
.length ();
4120 /* Repeatedly look for opportunities to create a builtin_powi call. */
4123 HOST_WIDE_INT power
;
4125 /* First look for the largest cached product of factors from
4126 preceding iterations. If found, create a builtin_powi for
4127 it if the minimum occurrence count for its factors is at
4128 least 2, or just use this cached product as our next
4129 multiplicand if the minimum occurrence count is 1. */
4130 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4132 if (rf1
->repr
&& rf1
->count
> 0)
4142 iter_result
= rf1
->repr
;
4144 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4148 fputs ("Multiplying by cached product ", dump_file
);
4149 for (elt
= j
; elt
< vec_len
; elt
++)
4151 rf
= &repeat_factor_vec
[elt
];
4152 print_generic_expr (dump_file
, rf
->factor
, 0);
4153 if (elt
< vec_len
- 1)
4154 fputs (" * ", dump_file
);
4156 fputs ("\n", dump_file
);
4161 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4162 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4163 build_int_cst (integer_type_node
,
4165 gimple_call_set_lhs (pow_stmt
, iter_result
);
4166 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4167 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4169 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4173 fputs ("Building __builtin_pow call for cached product (",
4175 for (elt
= j
; elt
< vec_len
; elt
++)
4177 rf
= &repeat_factor_vec
[elt
];
4178 print_generic_expr (dump_file
, rf
->factor
, 0);
4179 if (elt
< vec_len
- 1)
4180 fputs (" * ", dump_file
);
4182 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n",
4189 /* Otherwise, find the first factor in the repeated factor
4190 vector whose occurrence count is at least 2. If no such
4191 factor exists, there are no builtin_powi opportunities
4193 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4195 if (rf1
->count
>= 2)
4204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4208 fputs ("Building __builtin_pow call for (", dump_file
);
4209 for (elt
= j
; elt
< vec_len
; elt
++)
4211 rf
= &repeat_factor_vec
[elt
];
4212 print_generic_expr (dump_file
, rf
->factor
, 0);
4213 if (elt
< vec_len
- 1)
4214 fputs (" * ", dump_file
);
4216 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4219 reassociate_stats
.pows_created
++;
4221 /* Visit each element of the vector in reverse order (so that
4222 high-occurrence elements are visited first, and within the
4223 same occurrence count, lower-ranked elements are visited
4224 first). Form a linear product of all elements in this order
4225 whose occurrencce count is at least that of element J.
4226 Record the SSA name representing the product of each element
4227 with all subsequent elements in the vector. */
4228 if (j
== vec_len
- 1)
4229 rf1
->repr
= rf1
->factor
;
4232 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4236 rf1
= &repeat_factor_vec
[ii
];
4237 rf2
= &repeat_factor_vec
[ii
+ 1];
4239 /* Init the last factor's representative to be itself. */
4241 rf2
->repr
= rf2
->factor
;
4246 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4247 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
,
4250 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4251 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4252 rf1
->repr
= target_ssa
;
4254 /* Don't reprocess the multiply we just introduced. */
4255 gimple_set_visited (mul_stmt
, true);
4259 /* Form a call to __builtin_powi for the maximum product
4260 just formed, raised to the power obtained earlier. */
4261 rf1
= &repeat_factor_vec
[j
];
4262 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4263 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4264 build_int_cst (integer_type_node
,
4266 gimple_call_set_lhs (pow_stmt
, iter_result
);
4267 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4268 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4271 /* If we previously formed at least one other builtin_powi call,
4272 form the product of this one and those others. */
4275 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4276 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, new_result
,
4277 result
, iter_result
);
4278 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4279 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4280 gimple_set_visited (mul_stmt
, true);
4281 result
= new_result
;
4284 result
= iter_result
;
4286 /* Decrement the occurrence count of each element in the product
4287 by the count found above, and remove this many copies of each
4289 for (i
= j
; i
< vec_len
; i
++)
4294 rf1
= &repeat_factor_vec
[i
];
4295 rf1
->count
-= power
;
4297 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4299 if (oe
->op
== rf1
->factor
)
4303 ops
->ordered_remove (n
);
4319 /* At this point all elements in the repeated factor vector have a
4320 remaining occurrence count of 0 or 1, and those with a count of 1
4321 don't have cached representatives. Re-sort the ops vector and
4323 ops
->qsort (sort_by_operand_rank
);
4324 repeat_factor_vec
.release ();
4326 /* Return the final product computed herein. Note that there may
4327 still be some elements with single occurrence count left in OPS;
4328 those will be handled by the normal reassociation logic. */
4332 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4335 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4339 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4341 fprintf (dump_file
, "Transforming ");
4342 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4345 rhs1
= gimple_assign_rhs1 (stmt
);
4346 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4348 remove_visited_stmt_chain (rhs1
);
4350 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4352 fprintf (dump_file
, " into ");
4353 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4357 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4360 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4361 tree rhs1
, tree rhs2
)
4363 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4365 fprintf (dump_file
, "Transforming ");
4366 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4369 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4370 update_stmt (gsi_stmt (*gsi
));
4371 remove_visited_stmt_chain (rhs1
);
4373 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4375 fprintf (dump_file
, " into ");
4376 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4380 /* Reassociate expressions in basic block BB and its post-dominator as
4384 reassociate_bb (basic_block bb
)
4386 gimple_stmt_iterator gsi
;
4388 gimple stmt
= last_stmt (bb
);
4390 if (stmt
&& !gimple_visited_p (stmt
))
4391 maybe_optimize_range_tests (stmt
);
4393 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4395 stmt
= gsi_stmt (gsi
);
4397 if (is_gimple_assign (stmt
)
4398 && !stmt_could_throw_p (stmt
))
4400 tree lhs
, rhs1
, rhs2
;
4401 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4403 /* If this is not a gimple binary expression, there is
4404 nothing for us to do with it. */
4405 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4408 /* If this was part of an already processed statement,
4409 we don't need to touch it again. */
4410 if (gimple_visited_p (stmt
))
4412 /* This statement might have become dead because of previous
4414 if (has_zero_uses (gimple_get_lhs (stmt
)))
4416 gsi_remove (&gsi
, true);
4417 release_defs (stmt
);
4418 /* We might end up removing the last stmt above which
4419 places the iterator to the end of the sequence.
4420 Reset it to the last stmt in this case which might
4421 be the end of the sequence as well if we removed
4422 the last statement of the sequence. In which case
4423 we need to bail out. */
4424 if (gsi_end_p (gsi
))
4426 gsi
= gsi_last_bb (bb
);
4427 if (gsi_end_p (gsi
))
4434 lhs
= gimple_assign_lhs (stmt
);
4435 rhs1
= gimple_assign_rhs1 (stmt
);
4436 rhs2
= gimple_assign_rhs2 (stmt
);
4438 /* For non-bit or min/max operations we can't associate
4439 all types. Verify that here. */
4440 if (rhs_code
!= BIT_IOR_EXPR
4441 && rhs_code
!= BIT_AND_EXPR
4442 && rhs_code
!= BIT_XOR_EXPR
4443 && rhs_code
!= MIN_EXPR
4444 && rhs_code
!= MAX_EXPR
4445 && (!can_reassociate_p (lhs
)
4446 || !can_reassociate_p (rhs1
)
4447 || !can_reassociate_p (rhs2
)))
4450 if (associative_tree_code (rhs_code
))
4452 auto_vec
<operand_entry_t
> ops
;
4453 tree powi_result
= NULL_TREE
;
4455 /* There may be no immediate uses left by the time we
4456 get here because we may have eliminated them all. */
4457 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4460 gimple_set_visited (stmt
, true);
4461 linearize_expr_tree (&ops
, stmt
, true, true);
4462 ops
.qsort (sort_by_operand_rank
);
4463 optimize_ops_list (rhs_code
, &ops
);
4464 if (undistribute_ops_list (rhs_code
, &ops
,
4465 loop_containing_stmt (stmt
)))
4467 ops
.qsort (sort_by_operand_rank
);
4468 optimize_ops_list (rhs_code
, &ops
);
4471 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4472 optimize_range_tests (rhs_code
, &ops
);
4474 if (first_pass_instance
4475 && rhs_code
== MULT_EXPR
4476 && flag_unsafe_math_optimizations
)
4477 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4479 /* If the operand vector is now empty, all operands were
4480 consumed by the __builtin_powi optimization. */
4481 if (ops
.length () == 0)
4482 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4483 else if (ops
.length () == 1)
4485 tree last_op
= ops
.last ()->op
;
4488 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4491 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4495 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4496 int ops_num
= ops
.length ();
4497 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4500 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4502 "Width = %d was chosen for reassociation\n", width
);
4505 && ops
.length () > 3)
4506 rewrite_expr_tree_parallel (stmt
, width
, ops
);
4509 /* When there are three operands left, we want
4510 to make sure the ones that get the double
4511 binary op are chosen wisely. */
4512 int len
= ops
.length ();
4514 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4516 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4517 powi_result
!= NULL
);
4520 /* If we combined some repeated factors into a
4521 __builtin_powi call, multiply that result by the
4522 reassociated operands. */
4525 gimple mul_stmt
, lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4526 tree type
= TREE_TYPE (lhs
);
4527 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4529 gimple_set_lhs (lhs_stmt
, target_ssa
);
4530 update_stmt (lhs_stmt
);
4532 target_ssa
= new_lhs
;
4533 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, lhs
,
4536 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4537 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4543 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4545 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4546 reassociate_bb (son
);
4549 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4550 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4552 /* Dump the operand entry vector OPS to FILE. */
4555 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4560 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4562 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4563 print_generic_expr (file
, oe
->op
, 0);
4567 /* Dump the operand entry vector OPS to STDERR. */
4570 debug_ops_vector (vec
<operand_entry_t
> ops
)
4572 dump_ops_vector (stderr
, ops
);
4578 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
4579 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
4582 /* Initialize the reassociation pass. */
4589 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
4591 /* Find the loops, so that we can prevent moving calculations in
4593 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
4595 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
4597 operand_entry_pool
= create_alloc_pool ("operand entry pool",
4598 sizeof (struct operand_entry
), 30);
4599 next_operand_entry_id
= 0;
4601 /* Reverse RPO (Reverse Post Order) will give us something where
4602 deeper loops come later. */
4603 pre_and_rev_post_order_compute (NULL
, bbs
, false);
4604 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
4605 operand_rank
= pointer_map_create ();
4607 /* Give each default definition a distinct rank. This includes
4608 parameters and the static chain. Walk backwards over all
4609 SSA names so that we get proper rank ordering according
4610 to tree_swap_operands_p. */
4611 for (i
= num_ssa_names
- 1; i
> 0; --i
)
4613 tree name
= ssa_name (i
);
4614 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
4615 insert_operand_rank (name
, ++rank
);
4618 /* Set up rank for each BB */
4619 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
4620 bb_rank
[bbs
[i
]] = ++rank
<< 16;
4623 calculate_dominance_info (CDI_POST_DOMINATORS
);
4624 plus_negates
= vNULL
;
4627 /* Cleanup after the reassociation pass, and print stats if
4633 statistics_counter_event (cfun
, "Linearized",
4634 reassociate_stats
.linearized
);
4635 statistics_counter_event (cfun
, "Constants eliminated",
4636 reassociate_stats
.constants_eliminated
);
4637 statistics_counter_event (cfun
, "Ops eliminated",
4638 reassociate_stats
.ops_eliminated
);
4639 statistics_counter_event (cfun
, "Statements rewritten",
4640 reassociate_stats
.rewritten
);
4641 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
4642 reassociate_stats
.pows_encountered
);
4643 statistics_counter_event (cfun
, "Built-in powi calls created",
4644 reassociate_stats
.pows_created
);
4646 pointer_map_destroy (operand_rank
);
4647 free_alloc_pool (operand_entry_pool
);
4649 plus_negates
.release ();
4650 free_dominance_info (CDI_POST_DOMINATORS
);
4651 loop_optimizer_finalize ();
4654 /* Gate and execute functions for Reassociation. */
4657 execute_reassoc (void)
4662 repropagate_negates ();
4669 gate_tree_ssa_reassoc (void)
4671 return flag_tree_reassoc
!= 0;
4676 const pass_data pass_data_reassoc
=
4678 GIMPLE_PASS
, /* type */
4679 "reassoc", /* name */
4680 OPTGROUP_NONE
, /* optinfo_flags */
4681 true, /* has_gate */
4682 true, /* has_execute */
4683 TV_TREE_REASSOC
, /* tv_id */
4684 ( PROP_cfg
| PROP_ssa
), /* properties_required */
4685 0, /* properties_provided */
4686 0, /* properties_destroyed */
4687 0, /* todo_flags_start */
4689 | TODO_update_ssa_only_virtuals
4690 | TODO_verify_flow
), /* todo_flags_finish */
4693 class pass_reassoc
: public gimple_opt_pass
4696 pass_reassoc (gcc::context
*ctxt
)
4697 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
4700 /* opt_pass methods: */
4701 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
4702 bool gate () { return gate_tree_ssa_reassoc (); }
4703 unsigned int execute () { return execute_reassoc (); }
4705 }; // class pass_reassoc
4710 make_pass_reassoc (gcc::context
*ctxt
)
4712 return new pass_reassoc (ctxt
);