1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
24 #include "hash-table.h"
29 #include "stor-layout.h"
35 #include "hard-reg-set.h"
38 #include "dominance.h"
41 #include "basic-block.h"
42 #include "gimple-pretty-print.h"
43 #include "tree-inline.h"
45 #include "tree-ssa-alias.h"
46 #include "internal-fn.h"
47 #include "gimple-fold.h"
49 #include "gimple-expr.h"
52 #include "gimple-iterator.h"
53 #include "gimplify-me.h"
54 #include "gimple-ssa.h"
56 #include "tree-phinodes.h"
57 #include "ssa-iterators.h"
58 #include "stringpool.h"
59 #include "tree-ssanames.h"
60 #include "tree-ssa-loop-niter.h"
61 #include "tree-ssa-loop.h"
65 #include "tree-iterator.h"
66 #include "tree-pass.h"
67 #include "alloc-pool.h"
68 #include "langhooks.h"
73 #include "diagnostic-core.h"
78 /* This is a simple global reassociation pass. It is, in part, based
79 on the LLVM pass of the same name (They do some things more/less
80 than we do, in different orders, etc).
82 It consists of five steps:
84 1. Breaking up subtract operations into addition + negate, where
85 it would promote the reassociation of adds.
87 2. Left linearization of the expression trees, so that (A+B)+(C+D)
88 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
89 During linearization, we place the operands of the binary
90 expressions into a vector of operand_entry_t
92 3. Optimization of the operand lists, eliminating things like a +
95 3a. Combine repeated factors with the same occurrence counts
96 into a __builtin_powi call that will later be optimized into
97 an optimal number of multiplies.
99 4. Rewrite the expression trees we linearized and optimized so
100 they are in proper rank order.
102 5. Repropagate negates, as nothing else will clean it up ATM.
104 A bit of theory on #4, since nobody seems to write anything down
105 about why it makes sense to do it the way they do it:
107 We could do this much nicer theoretically, but don't (for reasons
108 explained after how to do it theoretically nice :P).
110 In order to promote the most redundancy elimination, you want
111 binary expressions whose operands are the same rank (or
112 preferably, the same value) exposed to the redundancy eliminator,
113 for possible elimination.
115 So the way to do this if we really cared, is to build the new op
116 tree from the leaves to the roots, merging as you go, and putting the
117 new op on the end of the worklist, until you are left with one
118 thing on the worklist.
120 IE if you have to rewrite the following set of operands (listed with
121 rank in parentheses), with opcode PLUS_EXPR:
123 a (1), b (1), c (1), d (2), e (2)
126 We start with our merge worklist empty, and the ops list with all of
129 You want to first merge all leaves of the same rank, as much as
132 So first build a binary op of
134 mergetmp = a + b, and put "mergetmp" on the merge worklist.
136 Because there is no three operand form of PLUS_EXPR, c is not going to
137 be exposed to redundancy elimination as a rank 1 operand.
139 So you might as well throw it on the merge worklist (you could also
140 consider it to now be a rank two operand, and merge it with d and e,
141 but in this case, you then have evicted e from a binary op. So at
142 least in this situation, you can't win.)
144 Then build a binary op of d + e
147 and put mergetmp2 on the merge worklist.
149 so merge worklist = {mergetmp, c, mergetmp2}
151 Continue building binary ops of these operations until you have only
152 one operation left on the worklist.
157 mergetmp3 = mergetmp + c
159 worklist = {mergetmp2, mergetmp3}
161 mergetmp4 = mergetmp2 + mergetmp3
163 worklist = {mergetmp4}
165 because we have one operation left, we can now just set the original
166 statement equal to the result of that operation.
168 This will at least expose a + b and d + e to redundancy elimination
169 as binary operations.
171 For extra points, you can reuse the old statements to build the
172 mergetmps, since you shouldn't run out.
174 So why don't we do this?
176 Because it's expensive, and rarely will help. Most trees we are
177 reassociating have 3 or less ops. If they have 2 ops, they already
178 will be written into a nice single binary op. If you have 3 ops, a
179 single simple check suffices to tell you whether the first two are of the
180 same rank. If so, you know to order it
183 newstmt = mergetmp + op3
187 newstmt = mergetmp + op1
189 If all three are of the same rank, you can't expose them all in a
190 single binary operator anyway, so the above is *still* the best you
193 Thus, this is what we do. When we have three ops left, we check to see
194 what order to put them in, and call it a day. As a nod to vector sum
195 reduction, we check if any of the ops are really a phi node that is a
196 destructive update for the associating op, and keep the destructive
197 update together for vector sum reduction recognition. */
204 int constants_eliminated
;
207 int pows_encountered
;
211 /* Operator, rank pair. */
212 typedef struct operand_entry
220 static alloc_pool operand_entry_pool
;
222 /* This is used to assign a unique ID to each struct operand_entry
223 so that qsort results are identical on different hosts. */
224 static int next_operand_entry_id
;
226 /* Starting rank number for a given basic block, so that we can rank
227 operations using unmovable instructions in that BB based on the bb
229 static long *bb_rank
;
231 /* Operand->rank hashtable. */
232 static hash_map
<tree
, long> *operand_rank
;
234 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
235 all basic blocks the CFG should be adjusted - basic blocks
236 split right after that SSA_NAME's definition statement and before
237 the only use, which must be a bit ior. */
238 static vec
<tree
> reassoc_branch_fixups
;
241 static long get_rank (tree
);
242 static bool reassoc_stmt_dominates_stmt_p (gimple
, gimple
);
244 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
245 possibly added by gsi_remove. */
248 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
250 gimple stmt
= gsi_stmt (*gsi
);
252 if (!MAY_HAVE_DEBUG_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
253 return gsi_remove (gsi
, true);
255 gimple_stmt_iterator prev
= *gsi
;
257 unsigned uid
= gimple_uid (stmt
);
258 basic_block bb
= gimple_bb (stmt
);
259 bool ret
= gsi_remove (gsi
, true);
260 if (!gsi_end_p (prev
))
263 prev
= gsi_start_bb (bb
);
264 gimple end_stmt
= gsi_stmt (*gsi
);
265 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
267 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
268 gimple_set_uid (stmt
, uid
);
274 /* Bias amount for loop-carried phis. We want this to be larger than
275 the depth of any reassociation tree we can see, but not larger than
276 the rank difference between two blocks. */
277 #define PHI_LOOP_BIAS (1 << 15)
279 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
280 an innermost loop, and the phi has only a single use which is inside
281 the loop, then the rank is the block rank of the loop latch plus an
282 extra bias for the loop-carried dependence. This causes expressions
283 calculated into an accumulator variable to be independent for each
284 iteration of the loop. If STMT is some other phi, the rank is the
285 block rank of its containing block. */
287 phi_rank (gimple stmt
)
289 basic_block bb
= gimple_bb (stmt
);
290 struct loop
*father
= bb
->loop_father
;
296 /* We only care about real loops (those with a latch). */
298 return bb_rank
[bb
->index
];
300 /* Interesting phis must be in headers of innermost loops. */
301 if (bb
!= father
->header
303 return bb_rank
[bb
->index
];
305 /* Ignore virtual SSA_NAMEs. */
306 res
= gimple_phi_result (stmt
);
307 if (virtual_operand_p (res
))
308 return bb_rank
[bb
->index
];
310 /* The phi definition must have a single use, and that use must be
311 within the loop. Otherwise this isn't an accumulator pattern. */
312 if (!single_imm_use (res
, &use
, &use_stmt
)
313 || gimple_bb (use_stmt
)->loop_father
!= father
)
314 return bb_rank
[bb
->index
];
316 /* Look for phi arguments from within the loop. If found, bias this phi. */
317 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
319 tree arg
= gimple_phi_arg_def (stmt
, i
);
320 if (TREE_CODE (arg
) == SSA_NAME
321 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
323 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
324 if (gimple_bb (def_stmt
)->loop_father
== father
)
325 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
329 /* Must be an uninteresting phi. */
330 return bb_rank
[bb
->index
];
333 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
334 loop-carried dependence of an innermost loop, return TRUE; else
337 loop_carried_phi (tree exp
)
342 if (TREE_CODE (exp
) != SSA_NAME
343 || SSA_NAME_IS_DEFAULT_DEF (exp
))
346 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
348 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
351 /* Non-loop-carried phis have block rank. Loop-carried phis have
352 an additional bias added in. If this phi doesn't have block rank,
353 it's biased and should not be propagated. */
354 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
356 if (phi_rank (phi_stmt
) != block_rank
)
362 /* Return the maximum of RANK and the rank that should be propagated
363 from expression OP. For most operands, this is just the rank of OP.
364 For loop-carried phis, the value is zero to avoid undoing the bias
365 in favor of the phi. */
367 propagate_rank (long rank
, tree op
)
371 if (loop_carried_phi (op
))
374 op_rank
= get_rank (op
);
376 return MAX (rank
, op_rank
);
379 /* Look up the operand rank structure for expression E. */
382 find_operand_rank (tree e
)
384 long *slot
= operand_rank
->get (e
);
385 return slot
? *slot
: -1;
388 /* Insert {E,RANK} into the operand rank hashtable. */
391 insert_operand_rank (tree e
, long rank
)
393 gcc_assert (rank
> 0);
394 gcc_assert (!operand_rank
->put (e
, rank
));
397 /* Given an expression E, return the rank of the expression. */
402 /* Constants have rank 0. */
403 if (is_gimple_min_invariant (e
))
406 /* SSA_NAME's have the rank of the expression they are the result
408 For globals and uninitialized values, the rank is 0.
409 For function arguments, use the pre-setup rank.
410 For PHI nodes, stores, asm statements, etc, we use the rank of
412 For simple operations, the rank is the maximum rank of any of
413 its operands, or the bb_rank, whichever is less.
414 I make no claims that this is optimal, however, it gives good
417 /* We make an exception to the normal ranking system to break
418 dependences of accumulator variables in loops. Suppose we
419 have a simple one-block loop containing:
426 As shown, each iteration of the calculation into x is fully
427 dependent upon the iteration before it. We would prefer to
428 see this in the form:
435 If the loop is unrolled, the calculations of b and c from
436 different iterations can be interleaved.
438 To obtain this result during reassociation, we bias the rank
439 of the phi definition x_1 upward, when it is recognized as an
440 accumulator pattern. The artificial rank causes it to be
441 added last, providing the desired independence. */
443 if (TREE_CODE (e
) == SSA_NAME
)
450 if (SSA_NAME_IS_DEFAULT_DEF (e
))
451 return find_operand_rank (e
);
453 stmt
= SSA_NAME_DEF_STMT (e
);
454 if (gimple_code (stmt
) == GIMPLE_PHI
)
455 return phi_rank (stmt
);
457 if (!is_gimple_assign (stmt
)
458 || gimple_vdef (stmt
))
459 return bb_rank
[gimple_bb (stmt
)->index
];
461 /* If we already have a rank for this expression, use that. */
462 rank
= find_operand_rank (e
);
466 /* Otherwise, find the maximum rank for the operands. As an
467 exception, remove the bias from loop-carried phis when propagating
468 the rank so that dependent operations are not also biased. */
470 if (gimple_assign_single_p (stmt
))
472 tree rhs
= gimple_assign_rhs1 (stmt
);
473 n
= TREE_OPERAND_LENGTH (rhs
);
475 rank
= propagate_rank (rank
, rhs
);
478 for (i
= 0; i
< n
; i
++)
480 op
= TREE_OPERAND (rhs
, i
);
483 rank
= propagate_rank (rank
, op
);
489 n
= gimple_num_ops (stmt
);
490 for (i
= 1; i
< n
; i
++)
492 op
= gimple_op (stmt
, i
);
494 rank
= propagate_rank (rank
, op
);
498 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
500 fprintf (dump_file
, "Rank for ");
501 print_generic_expr (dump_file
, e
, 0);
502 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
505 /* Note the rank in the hashtable so we don't recompute it. */
506 insert_operand_rank (e
, (rank
+ 1));
510 /* Globals, etc, are rank 0 */
515 /* We want integer ones to end up last no matter what, since they are
516 the ones we can do the most with. */
517 #define INTEGER_CONST_TYPE 1 << 3
518 #define FLOAT_CONST_TYPE 1 << 2
519 #define OTHER_CONST_TYPE 1 << 1
521 /* Classify an invariant tree into integer, float, or other, so that
522 we can sort them to be near other constants of the same type. */
524 constant_type (tree t
)
526 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
527 return INTEGER_CONST_TYPE
;
528 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
529 return FLOAT_CONST_TYPE
;
531 return OTHER_CONST_TYPE
;
534 /* qsort comparison function to sort operand entries PA and PB by rank
535 so that the sorted array is ordered by rank in decreasing order. */
537 sort_by_operand_rank (const void *pa
, const void *pb
)
539 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
540 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
542 /* It's nicer for optimize_expression if constants that are likely
543 to fold when added/multiplied//whatever are put next to each
544 other. Since all constants have rank 0, order them by type. */
545 if (oeb
->rank
== 0 && oea
->rank
== 0)
547 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
548 return constant_type (oeb
->op
) - constant_type (oea
->op
);
550 /* To make sorting result stable, we use unique IDs to determine
552 return oeb
->id
- oea
->id
;
555 /* Lastly, make sure the versions that are the same go next to each
557 if ((oeb
->rank
- oea
->rank
== 0)
558 && TREE_CODE (oea
->op
) == SSA_NAME
559 && TREE_CODE (oeb
->op
) == SSA_NAME
)
561 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
562 versions of removed SSA_NAMEs, so if possible, prefer to sort
563 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
565 if (!SSA_NAME_IS_DEFAULT_DEF (oea
->op
)
566 && !SSA_NAME_IS_DEFAULT_DEF (oeb
->op
)
567 && SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
569 gimple stmta
= SSA_NAME_DEF_STMT (oea
->op
);
570 gimple stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
571 basic_block bba
= gimple_bb (stmta
);
572 basic_block bbb
= gimple_bb (stmtb
);
575 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
576 return bb_rank
[bbb
->index
] - bb_rank
[bba
->index
];
580 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
581 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
587 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
588 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
590 return oeb
->id
- oea
->id
;
593 if (oeb
->rank
!= oea
->rank
)
594 return oeb
->rank
- oea
->rank
;
596 return oeb
->id
- oea
->id
;
599 /* Add an operand entry to *OPS for the tree operand OP. */
602 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
604 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
607 oe
->rank
= get_rank (op
);
608 oe
->id
= next_operand_entry_id
++;
613 /* Add an operand entry to *OPS for the tree operand OP with repeat
617 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
618 HOST_WIDE_INT repeat
)
620 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
623 oe
->rank
= get_rank (op
);
624 oe
->id
= next_operand_entry_id
++;
628 reassociate_stats
.pows_encountered
++;
631 /* Return true if STMT is reassociable operation containing a binary
632 operation with tree code CODE, and is inside LOOP. */
635 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
637 basic_block bb
= gimple_bb (stmt
);
639 if (gimple_bb (stmt
) == NULL
)
642 if (!flow_bb_inside_loop_p (loop
, bb
))
645 if (is_gimple_assign (stmt
)
646 && gimple_assign_rhs_code (stmt
) == code
647 && has_single_use (gimple_assign_lhs (stmt
)))
654 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
655 operand of the negate operation. Otherwise, return NULL. */
658 get_unary_op (tree name
, enum tree_code opcode
)
660 gimple stmt
= SSA_NAME_DEF_STMT (name
);
662 if (!is_gimple_assign (stmt
))
665 if (gimple_assign_rhs_code (stmt
) == opcode
)
666 return gimple_assign_rhs1 (stmt
);
670 /* If CURR and LAST are a pair of ops that OPCODE allows us to
671 eliminate through equivalences, do so, remove them from OPS, and
672 return true. Otherwise, return false. */
675 eliminate_duplicate_pair (enum tree_code opcode
,
676 vec
<operand_entry_t
> *ops
,
679 operand_entry_t curr
,
680 operand_entry_t last
)
683 /* If we have two of the same op, and the opcode is & |, min, or max,
684 we can eliminate one of them.
685 If we have two of the same op, and the opcode is ^, we can
686 eliminate both of them. */
688 if (last
&& last
->op
== curr
->op
)
696 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
698 fprintf (dump_file
, "Equivalence: ");
699 print_generic_expr (dump_file
, curr
->op
, 0);
700 fprintf (dump_file
, " [&|minmax] ");
701 print_generic_expr (dump_file
, last
->op
, 0);
702 fprintf (dump_file
, " -> ");
703 print_generic_stmt (dump_file
, last
->op
, 0);
706 ops
->ordered_remove (i
);
707 reassociate_stats
.ops_eliminated
++;
712 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
714 fprintf (dump_file
, "Equivalence: ");
715 print_generic_expr (dump_file
, curr
->op
, 0);
716 fprintf (dump_file
, " ^ ");
717 print_generic_expr (dump_file
, last
->op
, 0);
718 fprintf (dump_file
, " -> nothing\n");
721 reassociate_stats
.ops_eliminated
+= 2;
723 if (ops
->length () == 2)
726 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
731 ops
->ordered_remove (i
-1);
732 ops
->ordered_remove (i
-1);
744 static vec
<tree
> plus_negates
;
746 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
747 expression, look in OPS for a corresponding positive operation to cancel
748 it out. If we find one, remove the other from OPS, replace
749 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
753 eliminate_plus_minus_pair (enum tree_code opcode
,
754 vec
<operand_entry_t
> *ops
,
755 unsigned int currindex
,
756 operand_entry_t curr
)
763 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
766 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
767 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
768 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
771 /* Any non-negated version will have a rank that is one less than
772 the current rank. So once we hit those ranks, if we don't find
775 for (i
= currindex
+ 1;
776 ops
->iterate (i
, &oe
)
777 && oe
->rank
>= curr
->rank
- 1 ;
780 if (oe
->op
== negateop
)
783 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
785 fprintf (dump_file
, "Equivalence: ");
786 print_generic_expr (dump_file
, negateop
, 0);
787 fprintf (dump_file
, " + -");
788 print_generic_expr (dump_file
, oe
->op
, 0);
789 fprintf (dump_file
, " -> 0\n");
792 ops
->ordered_remove (i
);
793 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
794 ops
->ordered_remove (currindex
);
795 reassociate_stats
.ops_eliminated
++;
799 else if (oe
->op
== notop
)
801 tree op_type
= TREE_TYPE (oe
->op
);
803 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
805 fprintf (dump_file
, "Equivalence: ");
806 print_generic_expr (dump_file
, notop
, 0);
807 fprintf (dump_file
, " + ~");
808 print_generic_expr (dump_file
, oe
->op
, 0);
809 fprintf (dump_file
, " -> -1\n");
812 ops
->ordered_remove (i
);
813 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
814 ops
->ordered_remove (currindex
);
815 reassociate_stats
.ops_eliminated
++;
821 /* CURR->OP is a negate expr in a plus expr: save it for later
822 inspection in repropagate_negates(). */
823 if (negateop
!= NULL_TREE
)
824 plus_negates
.safe_push (curr
->op
);
829 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
830 bitwise not expression, look in OPS for a corresponding operand to
831 cancel it out. If we find one, remove the other from OPS, replace
832 OPS[CURRINDEX] with 0, and return true. Otherwise, return
836 eliminate_not_pairs (enum tree_code opcode
,
837 vec
<operand_entry_t
> *ops
,
838 unsigned int currindex
,
839 operand_entry_t curr
)
845 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
846 || TREE_CODE (curr
->op
) != SSA_NAME
)
849 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
850 if (notop
== NULL_TREE
)
853 /* Any non-not version will have a rank that is one less than
854 the current rank. So once we hit those ranks, if we don't find
857 for (i
= currindex
+ 1;
858 ops
->iterate (i
, &oe
)
859 && oe
->rank
>= curr
->rank
- 1;
864 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
866 fprintf (dump_file
, "Equivalence: ");
867 print_generic_expr (dump_file
, notop
, 0);
868 if (opcode
== BIT_AND_EXPR
)
869 fprintf (dump_file
, " & ~");
870 else if (opcode
== BIT_IOR_EXPR
)
871 fprintf (dump_file
, " | ~");
872 print_generic_expr (dump_file
, oe
->op
, 0);
873 if (opcode
== BIT_AND_EXPR
)
874 fprintf (dump_file
, " -> 0\n");
875 else if (opcode
== BIT_IOR_EXPR
)
876 fprintf (dump_file
, " -> -1\n");
879 if (opcode
== BIT_AND_EXPR
)
880 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
881 else if (opcode
== BIT_IOR_EXPR
)
882 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
884 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
886 ops
->quick_push (oe
);
894 /* Use constant value that may be present in OPS to try to eliminate
895 operands. Note that this function is only really used when we've
896 eliminated ops for other reasons, or merged constants. Across
897 single statements, fold already does all of this, plus more. There
898 is little point in duplicating logic, so I've only included the
899 identities that I could ever construct testcases to trigger. */
902 eliminate_using_constants (enum tree_code opcode
,
903 vec
<operand_entry_t
> *ops
)
905 operand_entry_t oelast
= ops
->last ();
906 tree type
= TREE_TYPE (oelast
->op
);
908 if (oelast
->rank
== 0
909 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
914 if (integer_zerop (oelast
->op
))
916 if (ops
->length () != 1)
918 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
919 fprintf (dump_file
, "Found & 0, removing all other ops\n");
921 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
924 ops
->quick_push (oelast
);
928 else if (integer_all_onesp (oelast
->op
))
930 if (ops
->length () != 1)
932 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
933 fprintf (dump_file
, "Found & -1, removing\n");
935 reassociate_stats
.ops_eliminated
++;
940 if (integer_all_onesp (oelast
->op
))
942 if (ops
->length () != 1)
944 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
945 fprintf (dump_file
, "Found | -1, removing all other ops\n");
947 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
950 ops
->quick_push (oelast
);
954 else if (integer_zerop (oelast
->op
))
956 if (ops
->length () != 1)
958 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
959 fprintf (dump_file
, "Found | 0, removing\n");
961 reassociate_stats
.ops_eliminated
++;
966 if (integer_zerop (oelast
->op
)
967 || (FLOAT_TYPE_P (type
)
968 && !HONOR_NANS (TYPE_MODE (type
))
969 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type
))
970 && real_zerop (oelast
->op
)))
972 if (ops
->length () != 1)
974 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
975 fprintf (dump_file
, "Found * 0, removing all other ops\n");
977 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
979 ops
->quick_push (oelast
);
983 else if (integer_onep (oelast
->op
)
984 || (FLOAT_TYPE_P (type
)
985 && !HONOR_SNANS (TYPE_MODE (type
))
986 && real_onep (oelast
->op
)))
988 if (ops
->length () != 1)
990 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
991 fprintf (dump_file
, "Found * 1, removing\n");
993 reassociate_stats
.ops_eliminated
++;
1001 if (integer_zerop (oelast
->op
)
1002 || (FLOAT_TYPE_P (type
)
1003 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1004 && fold_real_zero_addition_p (type
, oelast
->op
,
1005 opcode
== MINUS_EXPR
)))
1007 if (ops
->length () != 1)
1009 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1010 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1012 reassociate_stats
.ops_eliminated
++;
1024 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
1027 /* Structure for tracking and counting operands. */
1028 typedef struct oecount_s
{
1031 enum tree_code oecode
;
1036 /* The heap for the oecount hashtable and the sorted list of operands. */
1037 static vec
<oecount
> cvec
;
1040 /* Oecount hashtable helpers. */
1042 struct oecount_hasher
1044 typedef int value_type
;
1045 typedef int compare_type
;
1046 typedef int store_values_directly
;
1047 static inline hashval_t
hash (const value_type
&);
1048 static inline bool equal (const value_type
&, const compare_type
&);
1049 static bool is_deleted (int &v
) { return v
== 1; }
1050 static void mark_deleted (int &e
) { e
= 1; }
1051 static bool is_empty (int &v
) { return v
== 0; }
1052 static void mark_empty (int &e
) { e
= 0; }
1053 static void remove (int &) {}
1056 /* Hash function for oecount. */
1059 oecount_hasher::hash (const value_type
&p
)
1061 const oecount
*c
= &cvec
[p
- 42];
1062 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1065 /* Comparison function for oecount. */
1068 oecount_hasher::equal (const value_type
&p1
, const compare_type
&p2
)
1070 const oecount
*c1
= &cvec
[p1
- 42];
1071 const oecount
*c2
= &cvec
[p2
- 42];
1072 return (c1
->oecode
== c2
->oecode
1073 && c1
->op
== c2
->op
);
1076 /* Comparison function for qsort sorting oecount elements by count. */
1079 oecount_cmp (const void *p1
, const void *p2
)
1081 const oecount
*c1
= (const oecount
*)p1
;
1082 const oecount
*c2
= (const oecount
*)p2
;
1083 if (c1
->cnt
!= c2
->cnt
)
1084 return c1
->cnt
- c2
->cnt
;
1086 /* If counts are identical, use unique IDs to stabilize qsort. */
1087 return c1
->id
- c2
->id
;
1090 /* Return TRUE iff STMT represents a builtin call that raises OP
1091 to some exponent. */
1094 stmt_is_power_of_op (gimple stmt
, tree op
)
1098 if (!is_gimple_call (stmt
))
1101 fndecl
= gimple_call_fndecl (stmt
);
1104 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1107 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1109 CASE_FLT_FN (BUILT_IN_POW
):
1110 CASE_FLT_FN (BUILT_IN_POWI
):
1111 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1118 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1119 in place and return the result. Assumes that stmt_is_power_of_op
1120 was previously called for STMT and returned TRUE. */
1122 static HOST_WIDE_INT
1123 decrement_power (gimple stmt
)
1125 REAL_VALUE_TYPE c
, cint
;
1126 HOST_WIDE_INT power
;
1129 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1131 CASE_FLT_FN (BUILT_IN_POW
):
1132 arg1
= gimple_call_arg (stmt
, 1);
1133 c
= TREE_REAL_CST (arg1
);
1134 power
= real_to_integer (&c
) - 1;
1135 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1136 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1139 CASE_FLT_FN (BUILT_IN_POWI
):
1140 arg1
= gimple_call_arg (stmt
, 1);
1141 power
= TREE_INT_CST_LOW (arg1
) - 1;
1142 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1150 /* Find the single immediate use of STMT's LHS, and replace it
1151 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1152 replace *DEF with OP as well. */
1155 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1160 gimple_stmt_iterator gsi
;
1162 if (is_gimple_call (stmt
))
1163 lhs
= gimple_call_lhs (stmt
);
1165 lhs
= gimple_assign_lhs (stmt
);
1167 gcc_assert (has_single_use (lhs
));
1168 single_imm_use (lhs
, &use
, &use_stmt
);
1172 if (TREE_CODE (op
) != SSA_NAME
)
1173 update_stmt (use_stmt
);
1174 gsi
= gsi_for_stmt (stmt
);
1175 unlink_stmt_vdef (stmt
);
1176 reassoc_remove_stmt (&gsi
);
1177 release_defs (stmt
);
1180 /* Walks the linear chain with result *DEF searching for an operation
1181 with operand OP and code OPCODE removing that from the chain. *DEF
1182 is updated if there is only one operand but no operation left. */
1185 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1187 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1193 if (opcode
== MULT_EXPR
1194 && stmt_is_power_of_op (stmt
, op
))
1196 if (decrement_power (stmt
) == 1)
1197 propagate_op_to_single_use (op
, stmt
, def
);
1201 name
= gimple_assign_rhs1 (stmt
);
1203 /* If this is the operation we look for and one of the operands
1204 is ours simply propagate the other operand into the stmts
1206 if (gimple_assign_rhs_code (stmt
) == opcode
1208 || gimple_assign_rhs2 (stmt
) == op
))
1211 name
= gimple_assign_rhs2 (stmt
);
1212 propagate_op_to_single_use (name
, stmt
, def
);
1216 /* We might have a multiply of two __builtin_pow* calls, and
1217 the operand might be hiding in the rightmost one. */
1218 if (opcode
== MULT_EXPR
1219 && gimple_assign_rhs_code (stmt
) == opcode
1220 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1221 && has_single_use (gimple_assign_rhs2 (stmt
)))
1223 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1224 if (stmt_is_power_of_op (stmt2
, op
))
1226 if (decrement_power (stmt2
) == 1)
1227 propagate_op_to_single_use (op
, stmt2
, def
);
1232 /* Continue walking the chain. */
1233 gcc_assert (name
!= op
1234 && TREE_CODE (name
) == SSA_NAME
);
1235 stmt
= SSA_NAME_DEF_STMT (name
);
1240 /* Returns true if statement S1 dominates statement S2. Like
1241 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1244 reassoc_stmt_dominates_stmt_p (gimple s1
, gimple s2
)
1246 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1248 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1249 SSA_NAME. Assume it lives at the beginning of function and
1250 thus dominates everything. */
1251 if (!bb1
|| s1
== s2
)
1254 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1260 /* PHIs in the same basic block are assumed to be
1261 executed all in parallel, if only one stmt is a PHI,
1262 it dominates the other stmt in the same basic block. */
1263 if (gimple_code (s1
) == GIMPLE_PHI
)
1266 if (gimple_code (s2
) == GIMPLE_PHI
)
1269 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1271 if (gimple_uid (s1
) < gimple_uid (s2
))
1274 if (gimple_uid (s1
) > gimple_uid (s2
))
1277 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1278 unsigned int uid
= gimple_uid (s1
);
1279 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1281 gimple s
= gsi_stmt (gsi
);
1282 if (gimple_uid (s
) != uid
)
1291 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1294 /* Insert STMT after INSERT_POINT. */
1297 insert_stmt_after (gimple stmt
, gimple insert_point
)
1299 gimple_stmt_iterator gsi
;
1302 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1303 bb
= gimple_bb (insert_point
);
1304 else if (!stmt_ends_bb_p (insert_point
))
1306 gsi
= gsi_for_stmt (insert_point
);
1307 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1308 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1312 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1313 thus if it must end a basic block, it should be a call that can
1314 throw, or some assignment that can throw. If it throws, the LHS
1315 of it will not be initialized though, so only valid places using
1316 the SSA_NAME should be dominated by the fallthru edge. */
1317 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1318 gsi
= gsi_after_labels (bb
);
1319 if (gsi_end_p (gsi
))
1321 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1322 gimple_set_uid (stmt
,
1323 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1326 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1327 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1330 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1331 the result. Places the statement after the definition of either
1332 OP1 or OP2. Returns the new statement. */
1335 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1337 gimple op1def
= NULL
, op2def
= NULL
;
1338 gimple_stmt_iterator gsi
;
1342 /* Create the addition statement. */
1343 op
= make_ssa_name (type
, NULL
);
1344 sum
= gimple_build_assign_with_ops (opcode
, op
, op1
, op2
);
1346 /* Find an insertion place and insert. */
1347 if (TREE_CODE (op1
) == SSA_NAME
)
1348 op1def
= SSA_NAME_DEF_STMT (op1
);
1349 if (TREE_CODE (op2
) == SSA_NAME
)
1350 op2def
= SSA_NAME_DEF_STMT (op2
);
1351 if ((!op1def
|| gimple_nop_p (op1def
))
1352 && (!op2def
|| gimple_nop_p (op2def
)))
1354 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1355 if (gsi_end_p (gsi
))
1357 gimple_stmt_iterator gsi2
1358 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1359 gimple_set_uid (sum
,
1360 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1363 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1364 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1368 gimple insert_point
;
1369 if ((!op1def
|| gimple_nop_p (op1def
))
1370 || (op2def
&& !gimple_nop_p (op2def
)
1371 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1372 insert_point
= op2def
;
1374 insert_point
= op1def
;
1375 insert_stmt_after (sum
, insert_point
);
1382 /* Perform un-distribution of divisions and multiplications.
1383 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1384 to (A + B) / X for real X.
1386 The algorithm is organized as follows.
1388 - First we walk the addition chain *OPS looking for summands that
1389 are defined by a multiplication or a real division. This results
1390 in the candidates bitmap with relevant indices into *OPS.
1392 - Second we build the chains of multiplications or divisions for
1393 these candidates, counting the number of occurrences of (operand, code)
1394 pairs in all of the candidates chains.
1396 - Third we sort the (operand, code) pairs by number of occurrence and
1397 process them starting with the pair with the most uses.
1399 * For each such pair we walk the candidates again to build a
1400 second candidate bitmap noting all multiplication/division chains
1401 that have at least one occurrence of (operand, code).
1403 * We build an alternate addition chain only covering these
1404 candidates with one (operand, code) operation removed from their
1405 multiplication/division chain.
1407 * The first candidate gets replaced by the alternate addition chain
1408 multiplied/divided by the operand.
1410 * All candidate chains get disabled for further processing and
1411 processing of (operand, code) pairs continues.
1413 The alternate addition chains built are re-processed by the main
1414 reassociation algorithm which allows optimizing a * x * y + b * y * x
1415 to (a + b ) * x * y in one invocation of the reassociation pass. */
1418 undistribute_ops_list (enum tree_code opcode
,
1419 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1421 unsigned int length
= ops
->length ();
1422 operand_entry_t oe1
;
1424 sbitmap candidates
, candidates2
;
1425 unsigned nr_candidates
, nr_candidates2
;
1426 sbitmap_iterator sbi0
;
1427 vec
<operand_entry_t
> *subops
;
1428 bool changed
= false;
1429 int next_oecount_id
= 0;
1432 || opcode
!= PLUS_EXPR
)
1435 /* Build a list of candidates to process. */
1436 candidates
= sbitmap_alloc (length
);
1437 bitmap_clear (candidates
);
1439 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1441 enum tree_code dcode
;
1444 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1446 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1447 if (!is_gimple_assign (oe1def
))
1449 dcode
= gimple_assign_rhs_code (oe1def
);
1450 if ((dcode
!= MULT_EXPR
1451 && dcode
!= RDIV_EXPR
)
1452 || !is_reassociable_op (oe1def
, dcode
, loop
))
1455 bitmap_set_bit (candidates
, i
);
1459 if (nr_candidates
< 2)
1461 sbitmap_free (candidates
);
1465 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1467 fprintf (dump_file
, "searching for un-distribute opportunities ");
1468 print_generic_expr (dump_file
,
1469 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1470 fprintf (dump_file
, " %d\n", nr_candidates
);
1473 /* Build linearized sub-operand lists and the counting table. */
1476 hash_table
<oecount_hasher
> ctable (15);
1478 /* ??? Macro arguments cannot have multi-argument template types in
1479 them. This typedef is needed to workaround that limitation. */
1480 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1481 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1482 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1485 enum tree_code oecode
;
1488 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1489 oecode
= gimple_assign_rhs_code (oedef
);
1490 linearize_expr_tree (&subops
[i
], oedef
,
1491 associative_tree_code (oecode
), false);
1493 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1500 c
.id
= next_oecount_id
++;
1503 idx
= cvec
.length () + 41;
1504 slot
= ctable
.find_slot (idx
, INSERT
);
1512 cvec
[*slot
- 42].cnt
++;
1517 /* Sort the counting table. */
1518 cvec
.qsort (oecount_cmp
);
1520 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1523 fprintf (dump_file
, "Candidates:\n");
1524 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1526 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1527 c
->oecode
== MULT_EXPR
1528 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1529 print_generic_expr (dump_file
, c
->op
, 0);
1530 fprintf (dump_file
, "\n");
1534 /* Process the (operand, code) pairs in order of most occurrence. */
1535 candidates2
= sbitmap_alloc (length
);
1536 while (!cvec
.is_empty ())
1538 oecount
*c
= &cvec
.last ();
1542 /* Now collect the operands in the outer chain that contain
1543 the common operand in their inner chain. */
1544 bitmap_clear (candidates2
);
1546 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1549 enum tree_code oecode
;
1551 tree op
= (*ops
)[i
]->op
;
1553 /* If we undistributed in this chain already this may be
1555 if (TREE_CODE (op
) != SSA_NAME
)
1558 oedef
= SSA_NAME_DEF_STMT (op
);
1559 oecode
= gimple_assign_rhs_code (oedef
);
1560 if (oecode
!= c
->oecode
)
1563 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1565 if (oe1
->op
== c
->op
)
1567 bitmap_set_bit (candidates2
, i
);
1574 if (nr_candidates2
>= 2)
1576 operand_entry_t oe1
, oe2
;
1578 int first
= bitmap_first_set_bit (candidates2
);
1580 /* Build the new addition chain. */
1581 oe1
= (*ops
)[first
];
1582 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1584 fprintf (dump_file
, "Building (");
1585 print_generic_expr (dump_file
, oe1
->op
, 0);
1587 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1588 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1592 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1594 fprintf (dump_file
, " + ");
1595 print_generic_expr (dump_file
, oe2
->op
, 0);
1597 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1598 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1599 oe1
->op
, oe2
->op
, opcode
);
1600 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1602 oe1
->op
= gimple_get_lhs (sum
);
1605 /* Apply the multiplication/division. */
1606 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1607 oe1
->op
, c
->op
, c
->oecode
);
1608 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1610 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1611 print_generic_expr (dump_file
, c
->op
, 0);
1612 fprintf (dump_file
, "\n");
1615 /* Record it in the addition chain and disable further
1616 undistribution with this op. */
1617 oe1
->op
= gimple_assign_lhs (prod
);
1618 oe1
->rank
= get_rank (oe1
->op
);
1619 subops
[first
].release ();
1627 for (i
= 0; i
< ops
->length (); ++i
)
1628 subops
[i
].release ();
1631 sbitmap_free (candidates
);
1632 sbitmap_free (candidates2
);
1637 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1638 expression, examine the other OPS to see if any of them are comparisons
1639 of the same values, which we may be able to combine or eliminate.
1640 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1643 eliminate_redundant_comparison (enum tree_code opcode
,
1644 vec
<operand_entry_t
> *ops
,
1645 unsigned int currindex
,
1646 operand_entry_t curr
)
1649 enum tree_code lcode
, rcode
;
1654 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1657 /* Check that CURR is a comparison. */
1658 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1660 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1661 if (!is_gimple_assign (def1
))
1663 lcode
= gimple_assign_rhs_code (def1
);
1664 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1666 op1
= gimple_assign_rhs1 (def1
);
1667 op2
= gimple_assign_rhs2 (def1
);
1669 /* Now look for a similar comparison in the remaining OPS. */
1670 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1674 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1676 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1677 if (!is_gimple_assign (def2
))
1679 rcode
= gimple_assign_rhs_code (def2
);
1680 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1683 /* If we got here, we have a match. See if we can combine the
1685 if (opcode
== BIT_IOR_EXPR
)
1686 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1687 rcode
, gimple_assign_rhs1 (def2
),
1688 gimple_assign_rhs2 (def2
));
1690 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1691 rcode
, gimple_assign_rhs1 (def2
),
1692 gimple_assign_rhs2 (def2
));
1696 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1697 always give us a boolean_type_node value back. If the original
1698 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1699 we need to convert. */
1700 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1701 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1703 if (TREE_CODE (t
) != INTEGER_CST
1704 && !operand_equal_p (t
, curr
->op
, 0))
1706 enum tree_code subcode
;
1707 tree newop1
, newop2
;
1708 if (!COMPARISON_CLASS_P (t
))
1710 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1711 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1712 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1713 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1717 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1719 fprintf (dump_file
, "Equivalence: ");
1720 print_generic_expr (dump_file
, curr
->op
, 0);
1721 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1722 print_generic_expr (dump_file
, oe
->op
, 0);
1723 fprintf (dump_file
, " -> ");
1724 print_generic_expr (dump_file
, t
, 0);
1725 fprintf (dump_file
, "\n");
1728 /* Now we can delete oe, as it has been subsumed by the new combined
1730 ops
->ordered_remove (i
);
1731 reassociate_stats
.ops_eliminated
++;
1733 /* If t is the same as curr->op, we're done. Otherwise we must
1734 replace curr->op with t. Special case is if we got a constant
1735 back, in which case we add it to the end instead of in place of
1736 the current entry. */
1737 if (TREE_CODE (t
) == INTEGER_CST
)
1739 ops
->ordered_remove (currindex
);
1740 add_to_ops_vec (ops
, t
);
1742 else if (!operand_equal_p (t
, curr
->op
, 0))
1745 enum tree_code subcode
;
1748 gcc_assert (COMPARISON_CLASS_P (t
));
1749 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1750 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1751 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1752 gcc_checking_assert (is_gimple_val (newop1
)
1753 && is_gimple_val (newop2
));
1754 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1755 curr
->op
= gimple_get_lhs (sum
);
1763 /* Perform various identities and other optimizations on the list of
1764 operand entries, stored in OPS. The tree code for the binary
1765 operation between all the operands is OPCODE. */
1768 optimize_ops_list (enum tree_code opcode
,
1769 vec
<operand_entry_t
> *ops
)
1771 unsigned int length
= ops
->length ();
1774 operand_entry_t oelast
= NULL
;
1775 bool iterate
= false;
1780 oelast
= ops
->last ();
1782 /* If the last two are constants, pop the constants off, merge them
1783 and try the next two. */
1784 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1786 operand_entry_t oelm1
= (*ops
)[length
- 2];
1788 if (oelm1
->rank
== 0
1789 && is_gimple_min_invariant (oelm1
->op
)
1790 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1791 TREE_TYPE (oelast
->op
)))
1793 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1794 oelm1
->op
, oelast
->op
);
1796 if (folded
&& is_gimple_min_invariant (folded
))
1798 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1799 fprintf (dump_file
, "Merging constants\n");
1804 add_to_ops_vec (ops
, folded
);
1805 reassociate_stats
.constants_eliminated
++;
1807 optimize_ops_list (opcode
, ops
);
1813 eliminate_using_constants (opcode
, ops
);
1816 for (i
= 0; ops
->iterate (i
, &oe
);)
1820 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1822 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1823 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1824 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1836 length
= ops
->length ();
1837 oelast
= ops
->last ();
1840 optimize_ops_list (opcode
, ops
);
1843 /* The following functions are subroutines to optimize_range_tests and allow
1844 it to try to change a logical combination of comparisons into a range
1848 X == 2 || X == 5 || X == 3 || X == 4
1852 (unsigned) (X - 2) <= 3
1854 For more information see comments above fold_test_range in fold-const.c,
1855 this implementation is for GIMPLE. */
1863 bool strict_overflow_p
;
1864 unsigned int idx
, next
;
1867 /* This is similar to make_range in fold-const.c, but on top of
1868 GIMPLE instead of trees. If EXP is non-NULL, it should be
1869 an SSA_NAME and STMT argument is ignored, otherwise STMT
1870 argument should be a GIMPLE_COND. */
1873 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1877 bool is_bool
, strict_overflow_p
;
1881 r
->strict_overflow_p
= false;
1883 r
->high
= NULL_TREE
;
1884 if (exp
!= NULL_TREE
1885 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1888 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1889 and see if we can refine the range. Some of the cases below may not
1890 happen, but it doesn't seem worth worrying about this. We "continue"
1891 the outer loop when we've changed something; otherwise we "break"
1892 the switch, which will "break" the while. */
1893 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1896 strict_overflow_p
= false;
1898 if (exp
== NULL_TREE
)
1900 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1902 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1907 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1912 enum tree_code code
;
1913 tree arg0
, arg1
, exp_type
;
1917 if (exp
!= NULL_TREE
)
1919 if (TREE_CODE (exp
) != SSA_NAME
1920 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
1923 stmt
= SSA_NAME_DEF_STMT (exp
);
1924 if (!is_gimple_assign (stmt
))
1927 code
= gimple_assign_rhs_code (stmt
);
1928 arg0
= gimple_assign_rhs1 (stmt
);
1929 arg1
= gimple_assign_rhs2 (stmt
);
1930 exp_type
= TREE_TYPE (exp
);
1934 code
= gimple_cond_code (stmt
);
1935 arg0
= gimple_cond_lhs (stmt
);
1936 arg1
= gimple_cond_rhs (stmt
);
1937 exp_type
= boolean_type_node
;
1940 if (TREE_CODE (arg0
) != SSA_NAME
)
1942 loc
= gimple_location (stmt
);
1946 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1947 /* Ensure the range is either +[-,0], +[0,0],
1948 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1949 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1950 or similar expression of unconditional true or
1951 false, it should not be negated. */
1952 && ((high
&& integer_zerop (high
))
1953 || (low
&& integer_onep (low
))))
1966 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1968 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1973 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1988 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1990 &strict_overflow_p
);
1991 if (nexp
!= NULL_TREE
)
1994 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2007 r
->strict_overflow_p
= strict_overflow_p
;
2011 /* Comparison function for qsort. Sort entries
2012 without SSA_NAME exp first, then with SSA_NAMEs sorted
2013 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2014 by increasing ->low and if ->low is the same, by increasing
2015 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2019 range_entry_cmp (const void *a
, const void *b
)
2021 const struct range_entry
*p
= (const struct range_entry
*) a
;
2022 const struct range_entry
*q
= (const struct range_entry
*) b
;
2024 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2026 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2028 /* Group range_entries for the same SSA_NAME together. */
2029 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2031 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2033 /* If ->low is different, NULL low goes first, then by
2035 if (p
->low
!= NULL_TREE
)
2037 if (q
->low
!= NULL_TREE
)
2039 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2041 if (tem
&& integer_onep (tem
))
2043 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2045 if (tem
&& integer_onep (tem
))
2051 else if (q
->low
!= NULL_TREE
)
2053 /* If ->high is different, NULL high goes last, before that by
2055 if (p
->high
!= NULL_TREE
)
2057 if (q
->high
!= NULL_TREE
)
2059 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2061 if (tem
&& integer_onep (tem
))
2063 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2065 if (tem
&& integer_onep (tem
))
2071 else if (p
->high
!= NULL_TREE
)
2073 /* If both ranges are the same, sort below by ascending idx. */
2078 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2081 if (p
->idx
< q
->idx
)
2085 gcc_checking_assert (p
->idx
> q
->idx
);
2090 /* Helper routine of optimize_range_test.
2091 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2092 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2093 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2094 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2095 an array of COUNT pointers to other ranges. Return
2096 true if the range merge has been successful.
2097 If OPCODE is ERROR_MARK, this is called from within
2098 maybe_optimize_range_tests and is performing inter-bb range optimization.
2099 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2103 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2104 struct range_entry
**otherrangep
,
2105 unsigned int count
, enum tree_code opcode
,
2106 vec
<operand_entry_t
> *ops
, tree exp
, gimple_seq seq
,
2107 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2109 operand_entry_t oe
= (*ops
)[range
->idx
];
2111 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) :
2112 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2113 location_t loc
= gimple_location (stmt
);
2114 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2115 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2117 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2118 gimple_stmt_iterator gsi
;
2121 if (tem
== NULL_TREE
)
2124 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2125 warning_at (loc
, OPT_Wstrict_overflow
,
2126 "assuming signed overflow does not occur "
2127 "when simplifying range test");
2129 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2131 struct range_entry
*r
;
2132 fprintf (dump_file
, "Optimizing range tests ");
2133 print_generic_expr (dump_file
, range
->exp
, 0);
2134 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2135 print_generic_expr (dump_file
, range
->low
, 0);
2136 fprintf (dump_file
, ", ");
2137 print_generic_expr (dump_file
, range
->high
, 0);
2138 fprintf (dump_file
, "]");
2139 for (i
= 0; i
< count
; i
++)
2145 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2146 print_generic_expr (dump_file
, r
->low
, 0);
2147 fprintf (dump_file
, ", ");
2148 print_generic_expr (dump_file
, r
->high
, 0);
2149 fprintf (dump_file
, "]");
2151 fprintf (dump_file
, "\n into ");
2152 print_generic_expr (dump_file
, tem
, 0);
2153 fprintf (dump_file
, "\n");
2156 if (opcode
== BIT_IOR_EXPR
2157 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2158 tem
= invert_truthvalue_loc (loc
, tem
);
2160 tem
= fold_convert_loc (loc
, optype
, tem
);
2161 gsi
= gsi_for_stmt (stmt
);
2162 /* In rare cases range->exp can be equal to lhs of stmt.
2163 In that case we have to insert after the stmt rather then before
2165 if (op
== range
->exp
)
2167 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2168 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2169 GSI_CONTINUE_LINKING
);
2173 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2174 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2178 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2179 if (gimple_uid (gsi_stmt (gsi
)))
2182 gimple_set_uid (gsi_stmt (gsi
), gimple_uid (stmt
));
2189 range
->strict_overflow_p
= false;
2191 for (i
= 0; i
< count
; i
++)
2194 range
= otherrange
+ i
;
2196 range
= otherrangep
[i
];
2197 oe
= (*ops
)[range
->idx
];
2198 /* Now change all the other range test immediate uses, so that
2199 those tests will be optimized away. */
2200 if (opcode
== ERROR_MARK
)
2203 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2204 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2206 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2207 ? boolean_false_node
: boolean_true_node
);
2210 oe
->op
= error_mark_node
;
2211 range
->exp
= NULL_TREE
;
2216 /* Optimize X == CST1 || X == CST2
2217 if popcount (CST1 ^ CST2) == 1 into
2218 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2219 Similarly for ranges. E.g.
2220 X != 2 && X != 3 && X != 10 && X != 11
2221 will be transformed by the previous optimization into
2222 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2223 and this loop can transform that into
2224 !(((X & ~8) - 2U) <= 1U). */
2227 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2228 tree lowi
, tree lowj
, tree highi
, tree highj
,
2229 vec
<operand_entry_t
> *ops
,
2230 struct range_entry
*rangei
,
2231 struct range_entry
*rangej
)
2233 tree lowxor
, highxor
, tem
, exp
;
2234 /* Check lowi ^ lowj == highi ^ highj and
2235 popcount (lowi ^ lowj) == 1. */
2236 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2237 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2239 if (!integer_pow2p (lowxor
))
2241 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2242 if (!tree_int_cst_equal (lowxor
, highxor
))
2245 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2246 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2247 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2248 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2249 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2250 NULL
, rangei
->in_p
, lowj
, highj
,
2251 rangei
->strict_overflow_p
2252 || rangej
->strict_overflow_p
))
2257 /* Optimize X == CST1 || X == CST2
2258 if popcount (CST2 - CST1) == 1 into
2259 ((X - CST1) & ~(CST2 - CST1)) == 0.
2260 Similarly for ranges. E.g.
2261 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2262 || X == 75 || X == 45
2263 will be transformed by the previous optimization into
2264 (X - 43U) <= 3U || (X - 75U) <= 3U
2265 and this loop can transform that into
2266 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2268 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2269 tree lowi
, tree lowj
, tree highi
, tree highj
,
2270 vec
<operand_entry_t
> *ops
,
2271 struct range_entry
*rangei
,
2272 struct range_entry
*rangej
)
2274 tree tem1
, tem2
, mask
;
2275 /* Check highi - lowi == highj - lowj. */
2276 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2277 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2279 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2280 if (!tree_int_cst_equal (tem1
, tem2
))
2282 /* Check popcount (lowj - lowi) == 1. */
2283 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2284 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2286 if (!integer_pow2p (tem1
))
2289 type
= unsigned_type_for (type
);
2290 tem1
= fold_convert (type
, tem1
);
2291 tem2
= fold_convert (type
, tem2
);
2292 lowi
= fold_convert (type
, lowi
);
2293 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2294 tem1
= fold_binary (MINUS_EXPR
, type
,
2295 fold_convert (type
, rangei
->exp
), lowi
);
2296 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2297 lowj
= build_int_cst (type
, 0);
2298 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2299 NULL
, rangei
->in_p
, lowj
, tem2
,
2300 rangei
->strict_overflow_p
2301 || rangej
->strict_overflow_p
))
2306 /* It does some common checks for function optimize_range_tests_xor and
2307 optimize_range_tests_diff.
2308 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2309 Else it calls optimize_range_tests_diff. */
2312 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2313 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2314 struct range_entry
*ranges
)
2317 bool any_changes
= false;
2318 for (i
= first
; i
< length
; i
++)
2320 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2322 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2324 type
= TREE_TYPE (ranges
[i
].exp
);
2325 if (!INTEGRAL_TYPE_P (type
))
2327 lowi
= ranges
[i
].low
;
2328 if (lowi
== NULL_TREE
)
2329 lowi
= TYPE_MIN_VALUE (type
);
2330 highi
= ranges
[i
].high
;
2331 if (highi
== NULL_TREE
)
2333 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2336 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2338 lowj
= ranges
[j
].low
;
2339 if (lowj
== NULL_TREE
)
2341 highj
= ranges
[j
].high
;
2342 if (highj
== NULL_TREE
)
2343 highj
= TYPE_MAX_VALUE (type
);
2344 /* Check lowj > highi. */
2345 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2347 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2350 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2352 ranges
+ i
, ranges
+ j
);
2354 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2356 ranges
+ i
, ranges
+ j
);
2367 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2368 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2369 EXP on success, NULL otherwise. */
2372 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2373 wide_int
*mask
, tree
*totallowp
)
2375 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2376 if (tem
== NULL_TREE
2377 || TREE_CODE (tem
) != INTEGER_CST
2378 || TREE_OVERFLOW (tem
)
2379 || tree_int_cst_sgn (tem
) == -1
2380 || compare_tree_int (tem
, prec
) != -1)
2383 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2384 *mask
= wi::shifted_mask (0, max
, false, prec
);
2385 if (TREE_CODE (exp
) == BIT_AND_EXPR
2386 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2388 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2389 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2390 if (wi::popcount (msk
) == 1
2391 && wi::ltu_p (msk
, prec
- max
))
2393 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2394 max
+= msk
.to_uhwi ();
2395 exp
= TREE_OPERAND (exp
, 0);
2396 if (integer_zerop (low
)
2397 && TREE_CODE (exp
) == PLUS_EXPR
2398 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2401 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2402 TYPE_PRECISION (TREE_TYPE (low
))));
2403 tree tbias
= wide_int_to_tree (TREE_TYPE (low
), bias
);
2407 exp
= TREE_OPERAND (exp
, 0);
2411 else if (!tree_int_cst_lt (totallow
, tbias
))
2413 bias
-= wi::to_widest (totallow
);
2414 if (wi::ges_p (bias
, 0) && wi::lts_p (bias
, prec
- max
))
2416 *mask
= wi::lshift (*mask
, bias
);
2417 exp
= TREE_OPERAND (exp
, 0);
2426 if (!tree_int_cst_lt (totallow
, low
))
2428 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2429 if (tem
== NULL_TREE
2430 || TREE_CODE (tem
) != INTEGER_CST
2431 || TREE_OVERFLOW (tem
)
2432 || compare_tree_int (tem
, prec
- max
) == 1)
2435 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2439 /* Attempt to optimize small range tests using bit test.
2441 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2442 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2443 has been by earlier optimizations optimized into:
2444 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2445 As all the 43 through 82 range is less than 64 numbers,
2446 for 64-bit word targets optimize that into:
2447 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2450 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2451 vec
<operand_entry_t
> *ops
,
2452 struct range_entry
*ranges
)
2455 bool any_changes
= false;
2456 int prec
= GET_MODE_BITSIZE (word_mode
);
2457 auto_vec
<struct range_entry
*, 64> candidates
;
2459 for (i
= first
; i
< length
- 2; i
++)
2461 tree lowi
, highi
, lowj
, highj
, type
;
2463 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2465 type
= TREE_TYPE (ranges
[i
].exp
);
2466 if (!INTEGRAL_TYPE_P (type
))
2468 lowi
= ranges
[i
].low
;
2469 if (lowi
== NULL_TREE
)
2470 lowi
= TYPE_MIN_VALUE (type
);
2471 highi
= ranges
[i
].high
;
2472 if (highi
== NULL_TREE
)
2475 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2476 highi
, &mask
, &lowi
);
2477 if (exp
== NULL_TREE
)
2479 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2480 candidates
.truncate (0);
2481 int end
= MIN (i
+ 64, length
);
2482 for (j
= i
+ 1; j
< end
; j
++)
2485 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2487 if (ranges
[j
].exp
== exp
)
2489 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2491 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2494 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2496 exp2
= TREE_OPERAND (exp2
, 0);
2506 lowj
= ranges
[j
].low
;
2507 if (lowj
== NULL_TREE
)
2509 highj
= ranges
[j
].high
;
2510 if (highj
== NULL_TREE
)
2511 highj
= TYPE_MAX_VALUE (type
);
2513 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2514 highj
, &mask2
, NULL
);
2518 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2519 candidates
.safe_push (&ranges
[j
]);
2522 /* If we need otherwise 3 or more comparisons, use a bit test. */
2523 if (candidates
.length () >= 2)
2525 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2526 wi::to_widest (lowi
)
2527 + prec
- 1 - wi::clz (mask
));
2528 operand_entry_t oe
= (*ops
)[ranges
[i
].idx
];
2530 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
)
2531 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2532 location_t loc
= gimple_location (stmt
);
2533 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2535 /* See if it isn't cheaper to pretend the minimum value of the
2536 range is 0, if maximum value is small enough.
2537 We can avoid then subtraction of the minimum value, but the
2538 mask constant could be perhaps more expensive. */
2539 if (compare_tree_int (lowi
, 0) > 0
2540 && compare_tree_int (high
, prec
) < 0)
2543 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2544 rtx reg
= gen_raw_REG (word_mode
, 10000);
2545 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2546 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2547 GEN_INT (-m
)), speed_p
);
2548 rtx r
= immed_wide_int_const (mask
, word_mode
);
2549 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2551 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2552 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2556 mask
= wi::lshift (mask
, m
);
2557 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2561 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2563 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2565 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2566 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2567 fold_convert_loc (loc
, etype
, exp
),
2568 fold_convert_loc (loc
, etype
, lowi
));
2569 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2570 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2571 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2572 build_int_cst (word_type
, 1), exp
);
2573 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2574 wide_int_to_tree (word_type
, mask
));
2575 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2576 build_zero_cst (word_type
));
2577 if (is_gimple_val (exp
))
2580 /* The shift might have undefined behavior if TEM is true,
2581 but reassociate_bb isn't prepared to have basic blocks
2582 split when it is running. So, temporarily emit a code
2583 with BIT_IOR_EXPR instead of &&, and fix it up in
2586 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2587 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2588 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2590 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2591 gimple_seq_add_seq_without_update (&seq
, seq2
);
2592 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2593 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2595 = gimple_build_assign_with_ops (BIT_IOR_EXPR
,
2596 make_ssa_name (optype
, NULL
),
2598 gimple_set_location (g
, loc
);
2599 gimple_seq_add_stmt_without_update (&seq
, g
);
2600 exp
= gimple_assign_lhs (g
);
2601 tree val
= build_zero_cst (optype
);
2602 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2603 candidates
.length (), opcode
, ops
, exp
,
2604 seq
, false, val
, val
, strict_overflow_p
))
2607 reassoc_branch_fixups
.safe_push (tem
);
2610 gimple_seq_discard (seq
);
2616 /* Optimize range tests, similarly how fold_range_test optimizes
2617 it on trees. The tree code for the binary
2618 operation between all the operands is OPCODE.
2619 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2620 maybe_optimize_range_tests for inter-bb range optimization.
2621 In that case if oe->op is NULL, oe->id is bb->index whose
2622 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2623 the actual opcode. */
2626 optimize_range_tests (enum tree_code opcode
,
2627 vec
<operand_entry_t
> *ops
)
2629 unsigned int length
= ops
->length (), i
, j
, first
;
2631 struct range_entry
*ranges
;
2632 bool any_changes
= false;
2637 ranges
= XNEWVEC (struct range_entry
, length
);
2638 for (i
= 0; i
< length
; i
++)
2642 init_range_entry (ranges
+ i
, oe
->op
,
2644 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2645 /* For | invert it now, we will invert it again before emitting
2646 the optimized expression. */
2647 if (opcode
== BIT_IOR_EXPR
2648 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2649 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2652 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2653 for (i
= 0; i
< length
; i
++)
2654 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2657 /* Try to merge ranges. */
2658 for (first
= i
; i
< length
; i
++)
2660 tree low
= ranges
[i
].low
;
2661 tree high
= ranges
[i
].high
;
2662 int in_p
= ranges
[i
].in_p
;
2663 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2664 int update_fail_count
= 0;
2666 for (j
= i
+ 1; j
< length
; j
++)
2668 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2670 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2671 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2673 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2679 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
2680 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
2681 low
, high
, strict_overflow_p
))
2686 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2687 while update_range_test would fail. */
2688 else if (update_fail_count
== 64)
2691 ++update_fail_count
;
2694 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2697 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2698 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2700 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
2701 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
2704 if (any_changes
&& opcode
!= ERROR_MARK
)
2707 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2709 if (oe
->op
== error_mark_node
)
2718 XDELETEVEC (ranges
);
2722 /* Return true if STMT is a cast like:
2728 # _345 = PHI <_123(N), 1(...), 1(...)>
2729 where _234 has bool type, _123 has single use and
2730 bb N has a single successor M. This is commonly used in
2731 the last block of a range test. */
2734 final_range_test_p (gimple stmt
)
2736 basic_block bb
, rhs_bb
;
2739 use_operand_p use_p
;
2742 if (!gimple_assign_cast_p (stmt
))
2744 bb
= gimple_bb (stmt
);
2745 if (!single_succ_p (bb
))
2747 e
= single_succ_edge (bb
);
2748 if (e
->flags
& EDGE_COMPLEX
)
2751 lhs
= gimple_assign_lhs (stmt
);
2752 rhs
= gimple_assign_rhs1 (stmt
);
2753 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2754 || TREE_CODE (rhs
) != SSA_NAME
2755 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2758 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2759 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2762 if (gimple_code (use_stmt
) != GIMPLE_PHI
2763 || gimple_bb (use_stmt
) != e
->dest
)
2766 /* And that the rhs is defined in the same loop. */
2767 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2769 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2775 /* Return true if BB is suitable basic block for inter-bb range test
2776 optimization. If BACKWARD is true, BB should be the only predecessor
2777 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2778 or compared with to find a common basic block to which all conditions
2779 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2780 be the only predecessor of BB. */
2783 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2786 edge_iterator ei
, ei2
;
2789 gimple_stmt_iterator gsi
;
2790 bool other_edge_seen
= false;
2795 /* Check last stmt first. */
2796 stmt
= last_stmt (bb
);
2798 || (gimple_code (stmt
) != GIMPLE_COND
2799 && (backward
|| !final_range_test_p (stmt
)))
2800 || gimple_visited_p (stmt
)
2801 || stmt_could_throw_p (stmt
)
2804 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2807 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2808 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2809 to *OTHER_BB (if not set yet, try to find it out). */
2810 if (EDGE_COUNT (bb
->succs
) != 2)
2812 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2814 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2816 if (e
->dest
== test_bb
)
2825 if (*other_bb
== NULL
)
2827 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2828 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2830 else if (e
->dest
== e2
->dest
)
2831 *other_bb
= e
->dest
;
2832 if (*other_bb
== NULL
)
2835 if (e
->dest
== *other_bb
)
2836 other_edge_seen
= true;
2840 if (*other_bb
== NULL
|| !other_edge_seen
)
2843 else if (single_succ (bb
) != *other_bb
)
2846 /* Now check all PHIs of *OTHER_BB. */
2847 e
= find_edge (bb
, *other_bb
);
2848 e2
= find_edge (test_bb
, *other_bb
);
2849 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2851 gimple phi
= gsi_stmt (gsi
);
2852 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2853 corresponding to BB and TEST_BB predecessor must be the same. */
2854 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2855 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2857 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2858 one of the PHIs should have the lhs of the last stmt in
2859 that block as PHI arg and that PHI should have 0 or 1
2860 corresponding to it in all other range test basic blocks
2864 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2865 == gimple_assign_lhs (stmt
)
2866 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2867 || integer_onep (gimple_phi_arg_def (phi
,
2873 gimple test_last
= last_stmt (test_bb
);
2874 if (gimple_code (test_last
) != GIMPLE_COND
2875 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2876 == gimple_assign_lhs (test_last
)
2877 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2878 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2888 /* Return true if BB doesn't have side-effects that would disallow
2889 range test optimization, all SSA_NAMEs set in the bb are consumed
2890 in the bb and there are no PHIs. */
2893 no_side_effect_bb (basic_block bb
)
2895 gimple_stmt_iterator gsi
;
2898 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2900 last
= last_stmt (bb
);
2901 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2903 gimple stmt
= gsi_stmt (gsi
);
2905 imm_use_iterator imm_iter
;
2906 use_operand_p use_p
;
2908 if (is_gimple_debug (stmt
))
2910 if (gimple_has_side_effects (stmt
))
2914 if (!is_gimple_assign (stmt
))
2916 lhs
= gimple_assign_lhs (stmt
);
2917 if (TREE_CODE (lhs
) != SSA_NAME
)
2919 if (gimple_assign_rhs_could_trap_p (stmt
))
2921 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2923 gimple use_stmt
= USE_STMT (use_p
);
2924 if (is_gimple_debug (use_stmt
))
2926 if (gimple_bb (use_stmt
) != bb
)
2933 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2934 return true and fill in *OPS recursively. */
2937 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2940 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2944 if (!is_reassociable_op (stmt
, code
, loop
))
2947 rhs
[0] = gimple_assign_rhs1 (stmt
);
2948 rhs
[1] = gimple_assign_rhs2 (stmt
);
2949 gimple_set_visited (stmt
, true);
2950 for (i
= 0; i
< 2; i
++)
2951 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2952 && !get_ops (rhs
[i
], code
, ops
, loop
)
2953 && has_single_use (rhs
[i
]))
2955 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
2961 ops
->safe_push (oe
);
2966 /* Find the ops that were added by get_ops starting from VAR, see if
2967 they were changed during update_range_test and if yes, create new
2971 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
2972 unsigned int *pidx
, struct loop
*loop
)
2974 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2978 if (!is_reassociable_op (stmt
, code
, loop
))
2981 rhs
[0] = gimple_assign_rhs1 (stmt
);
2982 rhs
[1] = gimple_assign_rhs2 (stmt
);
2985 for (i
= 0; i
< 2; i
++)
2986 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
2988 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
2989 if (rhs
[2 + i
] == NULL_TREE
)
2991 if (has_single_use (rhs
[i
]))
2992 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
2994 rhs
[2 + i
] = rhs
[i
];
2997 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
2998 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
3000 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3001 var
= make_ssa_name (TREE_TYPE (var
), NULL
);
3002 gimple g
= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
3003 var
, rhs
[2], rhs
[3]);
3004 gimple_set_uid (g
, gimple_uid (stmt
));
3005 gimple_set_visited (g
, true);
3006 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3011 /* Structure to track the initial value passed to get_ops and
3012 the range in the ops vector for each basic block. */
3014 struct inter_bb_range_test_entry
3017 unsigned int first_idx
, last_idx
;
3020 /* Inter-bb range test optimization. */
3023 maybe_optimize_range_tests (gimple stmt
)
3025 basic_block first_bb
= gimple_bb (stmt
);
3026 basic_block last_bb
= first_bb
;
3027 basic_block other_bb
= NULL
;
3031 auto_vec
<operand_entry_t
> ops
;
3032 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3033 bool any_changes
= false;
3035 /* Consider only basic blocks that end with GIMPLE_COND or
3036 a cast statement satisfying final_range_test_p. All
3037 but the last bb in the first_bb .. last_bb range
3038 should end with GIMPLE_COND. */
3039 if (gimple_code (stmt
) == GIMPLE_COND
)
3041 if (EDGE_COUNT (first_bb
->succs
) != 2)
3044 else if (final_range_test_p (stmt
))
3045 other_bb
= single_succ (first_bb
);
3049 if (stmt_could_throw_p (stmt
))
3052 /* As relative ordering of post-dominator sons isn't fixed,
3053 maybe_optimize_range_tests can be called first on any
3054 bb in the range we want to optimize. So, start searching
3055 backwards, if first_bb can be set to a predecessor. */
3056 while (single_pred_p (first_bb
))
3058 basic_block pred_bb
= single_pred (first_bb
);
3059 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3061 if (!no_side_effect_bb (first_bb
))
3065 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3066 Before starting forward search in last_bb successors, find
3067 out the other_bb. */
3068 if (first_bb
== last_bb
)
3071 /* As non-GIMPLE_COND last stmt always terminates the range,
3072 if forward search didn't discover anything, just give up. */
3073 if (gimple_code (stmt
) != GIMPLE_COND
)
3075 /* Look at both successors. Either it ends with a GIMPLE_COND
3076 and satisfies suitable_cond_bb, or ends with a cast and
3077 other_bb is that cast's successor. */
3078 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
3079 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
3080 || e
->dest
== first_bb
)
3082 else if (single_pred_p (e
->dest
))
3084 stmt
= last_stmt (e
->dest
);
3086 && gimple_code (stmt
) == GIMPLE_COND
3087 && EDGE_COUNT (e
->dest
->succs
) == 2)
3089 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
3095 && final_range_test_p (stmt
)
3096 && find_edge (first_bb
, single_succ (e
->dest
)))
3098 other_bb
= single_succ (e
->dest
);
3099 if (other_bb
== first_bb
)
3103 if (other_bb
== NULL
)
3106 /* Now do the forward search, moving last_bb to successor bbs
3107 that aren't other_bb. */
3108 while (EDGE_COUNT (last_bb
->succs
) == 2)
3110 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
3111 if (e
->dest
!= other_bb
)
3115 if (!single_pred_p (e
->dest
))
3117 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
3119 if (!no_side_effect_bb (e
->dest
))
3123 if (first_bb
== last_bb
)
3125 /* Here basic blocks first_bb through last_bb's predecessor
3126 end with GIMPLE_COND, all of them have one of the edges to
3127 other_bb and another to another block in the range,
3128 all blocks except first_bb don't have side-effects and
3129 last_bb ends with either GIMPLE_COND, or cast satisfying
3130 final_range_test_p. */
3131 for (bb
= last_bb
; ; bb
= single_pred (bb
))
3133 enum tree_code code
;
3135 inter_bb_range_test_entry bb_ent
;
3137 bb_ent
.op
= NULL_TREE
;
3138 bb_ent
.first_idx
= ops
.length ();
3139 bb_ent
.last_idx
= bb_ent
.first_idx
;
3140 e
= find_edge (bb
, other_bb
);
3141 stmt
= last_stmt (bb
);
3142 gimple_set_visited (stmt
, true);
3143 if (gimple_code (stmt
) != GIMPLE_COND
)
3145 use_operand_p use_p
;
3150 lhs
= gimple_assign_lhs (stmt
);
3151 rhs
= gimple_assign_rhs1 (stmt
);
3152 gcc_assert (bb
== last_bb
);
3159 # _345 = PHI <_123(N), 1(...), 1(...)>
3161 or 0 instead of 1. If it is 0, the _234
3162 range test is anded together with all the
3163 other range tests, if it is 1, it is ored with
3165 single_imm_use (lhs
, &use_p
, &phi
);
3166 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
3167 e2
= find_edge (first_bb
, other_bb
);
3169 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
3170 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
3171 code
= BIT_AND_EXPR
;
3174 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
3175 code
= BIT_IOR_EXPR
;
3178 /* If _234 SSA_NAME_DEF_STMT is
3180 (or &, corresponding to 1/0 in the phi arguments,
3181 push into ops the individual range test arguments
3182 of the bitwise or resp. and, recursively. */
3183 if (!get_ops (rhs
, code
, &ops
,
3184 loop_containing_stmt (stmt
))
3185 && has_single_use (rhs
))
3187 /* Otherwise, push the _234 range test itself. */
3189 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
3199 bb_ent
.last_idx
= ops
.length ();
3201 bbinfo
.safe_push (bb_ent
);
3204 /* Otherwise stmt is GIMPLE_COND. */
3205 code
= gimple_cond_code (stmt
);
3206 lhs
= gimple_cond_lhs (stmt
);
3207 rhs
= gimple_cond_rhs (stmt
);
3208 if (TREE_CODE (lhs
) == SSA_NAME
3209 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3210 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
3211 || rhs
!= boolean_false_node
3212 /* Either push into ops the individual bitwise
3213 or resp. and operands, depending on which
3214 edge is other_bb. */
3215 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
3216 ^ (code
== EQ_EXPR
))
3217 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
3218 loop_containing_stmt (stmt
))))
3220 /* Or push the GIMPLE_COND stmt itself. */
3222 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
3225 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
3226 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3227 /* oe->op = NULL signs that there is no SSA_NAME
3228 for the range test, and oe->id instead is the
3229 basic block number, at which's end the GIMPLE_COND
3237 else if (ops
.length () > bb_ent
.first_idx
)
3240 bb_ent
.last_idx
= ops
.length ();
3242 bbinfo
.safe_push (bb_ent
);
3246 if (ops
.length () > 1)
3247 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
3251 /* update_ops relies on has_single_use predicates returning the
3252 same values as it did during get_ops earlier. Additionally it
3253 never removes statements, only adds new ones and it should walk
3254 from the single imm use and check the predicate already before
3255 making those changes.
3256 On the other side, the handling of GIMPLE_COND directly can turn
3257 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3258 it needs to be done in a separate loop afterwards. */
3259 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3261 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3262 && bbinfo
[idx
].op
!= NULL_TREE
)
3266 stmt
= last_stmt (bb
);
3267 new_op
= update_ops (bbinfo
[idx
].op
,
3269 ops
[bbinfo
[idx
].first_idx
]->rank
,
3270 ops
, &bbinfo
[idx
].first_idx
,
3271 loop_containing_stmt (stmt
));
3272 if (new_op
== NULL_TREE
)
3274 gcc_assert (bb
== last_bb
);
3275 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
3277 if (bbinfo
[idx
].op
!= new_op
)
3279 imm_use_iterator iter
;
3280 use_operand_p use_p
;
3281 gimple use_stmt
, cast_stmt
= NULL
;
3283 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
3284 if (is_gimple_debug (use_stmt
))
3286 else if (gimple_code (use_stmt
) == GIMPLE_COND
3287 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3288 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3289 SET_USE (use_p
, new_op
);
3290 else if (gimple_assign_cast_p (use_stmt
))
3291 cast_stmt
= use_stmt
;
3296 gcc_assert (bb
== last_bb
);
3297 tree lhs
= gimple_assign_lhs (cast_stmt
);
3298 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3299 enum tree_code rhs_code
3300 = gimple_assign_rhs_code (cast_stmt
);
3302 if (is_gimple_min_invariant (new_op
))
3304 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
3305 g
= gimple_build_assign (new_lhs
, new_op
);
3308 g
= gimple_build_assign_with_ops (rhs_code
, new_lhs
,
3310 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
3311 gimple_set_uid (g
, gimple_uid (cast_stmt
));
3312 gimple_set_visited (g
, true);
3313 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3314 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3315 if (is_gimple_debug (use_stmt
))
3317 else if (gimple_code (use_stmt
) == GIMPLE_COND
3318 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3319 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3320 SET_USE (use_p
, new_lhs
);
3329 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3331 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3332 && bbinfo
[idx
].op
== NULL_TREE
3333 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
3335 stmt
= last_stmt (bb
);
3336 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
3337 gimple_cond_make_false (stmt
);
3338 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
3339 gimple_cond_make_true (stmt
);
3342 gimple_cond_set_code (stmt
, NE_EXPR
);
3343 gimple_cond_set_lhs (stmt
, ops
[bbinfo
[idx
].first_idx
]->op
);
3344 gimple_cond_set_rhs (stmt
, boolean_false_node
);
3354 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3355 of STMT in it's operands. This is also known as a "destructive
3356 update" operation. */
3359 is_phi_for_stmt (gimple stmt
, tree operand
)
3363 use_operand_p arg_p
;
3366 if (TREE_CODE (operand
) != SSA_NAME
)
3369 lhs
= gimple_assign_lhs (stmt
);
3371 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3372 if (gimple_code (def_stmt
) != GIMPLE_PHI
)
3375 FOR_EACH_PHI_ARG (arg_p
, def_stmt
, i
, SSA_OP_USE
)
3376 if (lhs
== USE_FROM_PTR (arg_p
))
3381 /* Remove def stmt of VAR if VAR has zero uses and recurse
3382 on rhs1 operand if so. */
3385 remove_visited_stmt_chain (tree var
)
3388 gimple_stmt_iterator gsi
;
3392 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3394 stmt
= SSA_NAME_DEF_STMT (var
);
3395 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3397 var
= gimple_assign_rhs1 (stmt
);
3398 gsi
= gsi_for_stmt (stmt
);
3399 reassoc_remove_stmt (&gsi
);
3400 release_defs (stmt
);
3407 /* This function checks three consequtive operands in
3408 passed operands vector OPS starting from OPINDEX and
3409 swaps two operands if it is profitable for binary operation
3410 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3412 We pair ops with the same rank if possible.
3414 The alternative we try is to see if STMT is a destructive
3415 update style statement, which is like:
3418 In that case, we want to use the destructive update form to
3419 expose the possible vectorizer sum reduction opportunity.
3420 In that case, the third operand will be the phi node. This
3421 check is not performed if STMT is null.
3423 We could, of course, try to be better as noted above, and do a
3424 lot of work to try to find these opportunities in >3 operand
3425 cases, but it is unlikely to be worth it. */
3428 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3429 unsigned int opindex
, gimple stmt
)
3431 operand_entry_t oe1
, oe2
, oe3
;
3434 oe2
= ops
[opindex
+ 1];
3435 oe3
= ops
[opindex
+ 2];
3437 if ((oe1
->rank
== oe2
->rank
3438 && oe2
->rank
!= oe3
->rank
)
3439 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3440 && !is_phi_for_stmt (stmt
, oe1
->op
)
3441 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3443 struct operand_entry temp
= *oe3
;
3445 oe3
->rank
= oe1
->rank
;
3447 oe1
->rank
= temp
.rank
;
3449 else if ((oe1
->rank
== oe3
->rank
3450 && oe2
->rank
!= oe3
->rank
)
3451 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3452 && !is_phi_for_stmt (stmt
, oe1
->op
)
3453 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3455 struct operand_entry temp
= *oe2
;
3457 oe2
->rank
= oe1
->rank
;
3459 oe1
->rank
= temp
.rank
;
3463 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3464 two definitions, otherwise return STMT. */
3466 static inline gimple
3467 find_insert_point (gimple stmt
, tree rhs1
, tree rhs2
)
3469 if (TREE_CODE (rhs1
) == SSA_NAME
3470 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3471 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3472 if (TREE_CODE (rhs2
) == SSA_NAME
3473 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3474 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3478 /* Recursively rewrite our linearized statements so that the operators
3479 match those in OPS[OPINDEX], putting the computation in rank
3480 order. Return new lhs. */
3483 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3484 vec
<operand_entry_t
> ops
, bool changed
)
3486 tree rhs1
= gimple_assign_rhs1 (stmt
);
3487 tree rhs2
= gimple_assign_rhs2 (stmt
);
3488 tree lhs
= gimple_assign_lhs (stmt
);
3491 /* The final recursion case for this function is that you have
3492 exactly two operations left.
3493 If we had one exactly one op in the entire list to start with, we
3494 would have never called this function, and the tail recursion
3495 rewrites them one at a time. */
3496 if (opindex
+ 2 == ops
.length ())
3498 operand_entry_t oe1
, oe2
;
3501 oe2
= ops
[opindex
+ 1];
3503 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3505 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3506 unsigned int uid
= gimple_uid (stmt
);
3508 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3510 fprintf (dump_file
, "Transforming ");
3511 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3516 gimple insert_point
= find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3517 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3519 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
3520 lhs
, oe1
->op
, oe2
->op
);
3521 gimple_set_uid (stmt
, uid
);
3522 gimple_set_visited (stmt
, true);
3523 if (insert_point
== gsi_stmt (gsi
))
3524 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3526 insert_stmt_after (stmt
, insert_point
);
3530 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3532 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3533 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3537 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3538 remove_visited_stmt_chain (rhs1
);
3540 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3542 fprintf (dump_file
, " into ");
3543 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3549 /* If we hit here, we should have 3 or more ops left. */
3550 gcc_assert (opindex
+ 2 < ops
.length ());
3552 /* Rewrite the next operator. */
3555 /* Recurse on the LHS of the binary operator, which is guaranteed to
3556 be the non-leaf side. */
3558 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3559 changed
|| oe
->op
!= rhs2
);
3561 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3563 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3565 fprintf (dump_file
, "Transforming ");
3566 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3569 /* If changed is false, this is either opindex == 0
3570 or all outer rhs2's were equal to corresponding oe->op,
3571 and powi_result is NULL.
3572 That means lhs is equivalent before and after reassociation.
3573 Otherwise ensure the old lhs SSA_NAME is not reused and
3574 create a new stmt as well, so that any debug stmts will be
3575 properly adjusted. */
3578 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3579 unsigned int uid
= gimple_uid (stmt
);
3580 gimple insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3582 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3583 stmt
= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
3584 lhs
, new_rhs1
, oe
->op
);
3585 gimple_set_uid (stmt
, uid
);
3586 gimple_set_visited (stmt
, true);
3587 if (insert_point
== gsi_stmt (gsi
))
3588 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3590 insert_stmt_after (stmt
, insert_point
);
3594 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3596 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3597 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3601 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3603 fprintf (dump_file
, " into ");
3604 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3610 /* Find out how many cycles we need to compute statements chain.
3611 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3612 maximum number of independent statements we may execute per cycle. */
3615 get_required_cycles (int ops_num
, int cpu_width
)
3621 /* While we have more than 2 * cpu_width operands
3622 we may reduce number of operands by cpu_width
3624 res
= ops_num
/ (2 * cpu_width
);
3626 /* Remained operands count may be reduced twice per cycle
3627 until we have only one operand. */
3628 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3629 elog
= exact_log2 (rest
);
3633 res
+= floor_log2 (rest
) + 1;
3638 /* Returns an optimal number of registers to use for computation of
3639 given statements. */
3642 get_reassociation_width (int ops_num
, enum tree_code opc
,
3645 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3650 if (param_width
> 0)
3651 width
= param_width
;
3653 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3658 /* Get the minimal time required for sequence computation. */
3659 cycles_best
= get_required_cycles (ops_num
, width
);
3661 /* Check if we may use less width and still compute sequence for
3662 the same time. It will allow us to reduce registers usage.
3663 get_required_cycles is monotonically increasing with lower width
3664 so we can perform a binary search for the minimal width that still
3665 results in the optimal cycle count. */
3667 while (width
> width_min
)
3669 int width_mid
= (width
+ width_min
) / 2;
3671 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3673 else if (width_min
< width_mid
)
3674 width_min
= width_mid
;
3682 /* Recursively rewrite our linearized statements so that the operators
3683 match those in OPS[OPINDEX], putting the computation in rank
3684 order and trying to allow operations to be executed in
3688 rewrite_expr_tree_parallel (gimple stmt
, int width
,
3689 vec
<operand_entry_t
> ops
)
3691 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3692 int op_num
= ops
.length ();
3693 int stmt_num
= op_num
- 1;
3694 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3695 int op_index
= op_num
- 1;
3697 int ready_stmts_end
= 0;
3699 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3701 /* We start expression rewriting from the top statements.
3702 So, in this loop we create a full list of statements
3703 we will work with. */
3704 stmts
[stmt_num
- 1] = stmt
;
3705 for (i
= stmt_num
- 2; i
>= 0; i
--)
3706 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3708 for (i
= 0; i
< stmt_num
; i
++)
3712 /* Determine whether we should use results of
3713 already handled statements or not. */
3714 if (ready_stmts_end
== 0
3715 && (i
- stmt_index
>= width
|| op_index
< 1))
3716 ready_stmts_end
= i
;
3718 /* Now we choose operands for the next statement. Non zero
3719 value in ready_stmts_end means here that we should use
3720 the result of already generated statements as new operand. */
3721 if (ready_stmts_end
> 0)
3723 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3724 if (ready_stmts_end
> stmt_index
)
3725 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3726 else if (op_index
>= 0)
3727 op2
= ops
[op_index
--]->op
;
3730 gcc_assert (stmt_index
< i
);
3731 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3734 if (stmt_index
>= ready_stmts_end
)
3735 ready_stmts_end
= 0;
3740 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3741 op2
= ops
[op_index
--]->op
;
3742 op1
= ops
[op_index
--]->op
;
3745 /* If we emit the last statement then we should put
3746 operands into the last statement. It will also
3748 if (op_index
< 0 && stmt_index
== i
)
3751 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3753 fprintf (dump_file
, "Transforming ");
3754 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3757 /* We keep original statement only for the last one. All
3758 others are recreated. */
3759 if (i
== stmt_num
- 1)
3761 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3762 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3763 update_stmt (stmts
[i
]);
3766 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3768 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3770 fprintf (dump_file
, " into ");
3771 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3775 remove_visited_stmt_chain (last_rhs1
);
3778 /* Transform STMT, which is really (A +B) + (C + D) into the left
3779 linear form, ((A+B)+C)+D.
3780 Recurse on D if necessary. */
3783 linearize_expr (gimple stmt
)
3785 gimple_stmt_iterator gsi
;
3786 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3787 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3788 gimple oldbinrhs
= binrhs
;
3789 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3790 gimple newbinrhs
= NULL
;
3791 struct loop
*loop
= loop_containing_stmt (stmt
);
3792 tree lhs
= gimple_assign_lhs (stmt
);
3794 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3795 && is_reassociable_op (binrhs
, rhscode
, loop
));
3797 gsi
= gsi_for_stmt (stmt
);
3799 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3800 binrhs
= gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs
),
3801 make_ssa_name (TREE_TYPE (lhs
), NULL
),
3802 gimple_assign_lhs (binlhs
),
3803 gimple_assign_rhs2 (binrhs
));
3804 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3805 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3806 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3808 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3809 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3811 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3813 fprintf (dump_file
, "Linearized: ");
3814 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3817 reassociate_stats
.linearized
++;
3820 gsi
= gsi_for_stmt (oldbinrhs
);
3821 reassoc_remove_stmt (&gsi
);
3822 release_defs (oldbinrhs
);
3824 gimple_set_visited (stmt
, true);
3825 gimple_set_visited (binlhs
, true);
3826 gimple_set_visited (binrhs
, true);
3828 /* Tail recurse on the new rhs if it still needs reassociation. */
3829 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3830 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3831 want to change the algorithm while converting to tuples. */
3832 linearize_expr (stmt
);
3835 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3836 it. Otherwise, return NULL. */
3839 get_single_immediate_use (tree lhs
)
3841 use_operand_p immuse
;
3844 if (TREE_CODE (lhs
) == SSA_NAME
3845 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3846 && is_gimple_assign (immusestmt
))
3852 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3853 representing the negated value. Insertions of any necessary
3854 instructions go before GSI.
3855 This function is recursive in that, if you hand it "a_5" as the
3856 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3857 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3860 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3862 gimple negatedefstmt
= NULL
;
3863 tree resultofnegate
;
3864 gimple_stmt_iterator gsi
;
3867 /* If we are trying to negate a name, defined by an add, negate the
3868 add operands instead. */
3869 if (TREE_CODE (tonegate
) == SSA_NAME
)
3870 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3871 if (TREE_CODE (tonegate
) == SSA_NAME
3872 && is_gimple_assign (negatedefstmt
)
3873 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3874 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3875 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3877 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3878 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3879 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3882 gsi
= gsi_for_stmt (negatedefstmt
);
3883 rhs1
= negate_value (rhs1
, &gsi
);
3885 gsi
= gsi_for_stmt (negatedefstmt
);
3886 rhs2
= negate_value (rhs2
, &gsi
);
3888 gsi
= gsi_for_stmt (negatedefstmt
);
3889 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3890 gimple_set_visited (negatedefstmt
, true);
3891 g
= gimple_build_assign_with_ops (PLUS_EXPR
, lhs
, rhs1
, rhs2
);
3892 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3893 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3897 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3898 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3899 NULL_TREE
, true, GSI_SAME_STMT
);
3901 uid
= gimple_uid (gsi_stmt (gsi
));
3902 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3904 gimple stmt
= gsi_stmt (gsi
);
3905 if (gimple_uid (stmt
) != 0)
3907 gimple_set_uid (stmt
, uid
);
3909 return resultofnegate
;
3912 /* Return true if we should break up the subtract in STMT into an add
3913 with negate. This is true when we the subtract operands are really
3914 adds, or the subtract itself is used in an add expression. In
3915 either case, breaking up the subtract into an add with negate
3916 exposes the adds to reassociation. */
3919 should_break_up_subtract (gimple stmt
)
3921 tree lhs
= gimple_assign_lhs (stmt
);
3922 tree binlhs
= gimple_assign_rhs1 (stmt
);
3923 tree binrhs
= gimple_assign_rhs2 (stmt
);
3925 struct loop
*loop
= loop_containing_stmt (stmt
);
3927 if (TREE_CODE (binlhs
) == SSA_NAME
3928 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3931 if (TREE_CODE (binrhs
) == SSA_NAME
3932 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3935 if (TREE_CODE (lhs
) == SSA_NAME
3936 && (immusestmt
= get_single_immediate_use (lhs
))
3937 && is_gimple_assign (immusestmt
)
3938 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3939 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3944 /* Transform STMT from A - B into A + -B. */
3947 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3949 tree rhs1
= gimple_assign_rhs1 (stmt
);
3950 tree rhs2
= gimple_assign_rhs2 (stmt
);
3952 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3954 fprintf (dump_file
, "Breaking up subtract ");
3955 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3958 rhs2
= negate_value (rhs2
, gsip
);
3959 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3963 /* Determine whether STMT is a builtin call that raises an SSA name
3964 to an integer power and has only one use. If so, and this is early
3965 reassociation and unsafe math optimizations are permitted, place
3966 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3967 If any of these conditions does not hold, return FALSE. */
3970 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3973 REAL_VALUE_TYPE c
, cint
;
3975 if (!first_pass_instance
3976 || !flag_unsafe_math_optimizations
3977 || !is_gimple_call (stmt
)
3978 || !has_single_use (gimple_call_lhs (stmt
)))
3981 fndecl
= gimple_call_fndecl (stmt
);
3984 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3987 switch (DECL_FUNCTION_CODE (fndecl
))
3989 CASE_FLT_FN (BUILT_IN_POW
):
3990 *base
= gimple_call_arg (stmt
, 0);
3991 arg1
= gimple_call_arg (stmt
, 1);
3993 if (TREE_CODE (arg1
) != REAL_CST
)
3996 c
= TREE_REAL_CST (arg1
);
3998 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
4001 *exponent
= real_to_integer (&c
);
4002 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
4003 if (!real_identical (&c
, &cint
))
4008 CASE_FLT_FN (BUILT_IN_POWI
):
4009 *base
= gimple_call_arg (stmt
, 0);
4010 arg1
= gimple_call_arg (stmt
, 1);
4012 if (!tree_fits_shwi_p (arg1
))
4015 *exponent
= tree_to_shwi (arg1
);
4022 /* Expanding negative exponents is generally unproductive, so we don't
4023 complicate matters with those. Exponents of zero and one should
4024 have been handled by expression folding. */
4025 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
4031 /* Recursively linearize a binary expression that is the RHS of STMT.
4032 Place the operands of the expression tree in the vector named OPS. */
4035 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
4036 bool is_associative
, bool set_visited
)
4038 tree binlhs
= gimple_assign_rhs1 (stmt
);
4039 tree binrhs
= gimple_assign_rhs2 (stmt
);
4040 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
4041 bool binlhsisreassoc
= false;
4042 bool binrhsisreassoc
= false;
4043 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4044 struct loop
*loop
= loop_containing_stmt (stmt
);
4045 tree base
= NULL_TREE
;
4046 HOST_WIDE_INT exponent
= 0;
4049 gimple_set_visited (stmt
, true);
4051 if (TREE_CODE (binlhs
) == SSA_NAME
)
4053 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
4054 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
4055 && !stmt_could_throw_p (binlhsdef
));
4058 if (TREE_CODE (binrhs
) == SSA_NAME
)
4060 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
4061 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
4062 && !stmt_could_throw_p (binrhsdef
));
4065 /* If the LHS is not reassociable, but the RHS is, we need to swap
4066 them. If neither is reassociable, there is nothing we can do, so
4067 just put them in the ops vector. If the LHS is reassociable,
4068 linearize it. If both are reassociable, then linearize the RHS
4071 if (!binlhsisreassoc
)
4075 /* If this is not a associative operation like division, give up. */
4076 if (!is_associative
)
4078 add_to_ops_vec (ops
, binrhs
);
4082 if (!binrhsisreassoc
)
4084 if (rhscode
== MULT_EXPR
4085 && TREE_CODE (binrhs
) == SSA_NAME
4086 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
4088 add_repeat_to_ops_vec (ops
, base
, exponent
);
4089 gimple_set_visited (binrhsdef
, true);
4092 add_to_ops_vec (ops
, binrhs
);
4094 if (rhscode
== MULT_EXPR
4095 && TREE_CODE (binlhs
) == SSA_NAME
4096 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
4098 add_repeat_to_ops_vec (ops
, base
, exponent
);
4099 gimple_set_visited (binlhsdef
, true);
4102 add_to_ops_vec (ops
, binlhs
);
4107 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4109 fprintf (dump_file
, "swapping operands of ");
4110 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4113 swap_ssa_operands (stmt
,
4114 gimple_assign_rhs1_ptr (stmt
),
4115 gimple_assign_rhs2_ptr (stmt
));
4118 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4120 fprintf (dump_file
, " is now ");
4121 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4124 /* We want to make it so the lhs is always the reassociative op,
4130 else if (binrhsisreassoc
)
4132 linearize_expr (stmt
);
4133 binlhs
= gimple_assign_rhs1 (stmt
);
4134 binrhs
= gimple_assign_rhs2 (stmt
);
4137 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
4138 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
4140 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
4141 is_associative
, set_visited
);
4143 if (rhscode
== MULT_EXPR
4144 && TREE_CODE (binrhs
) == SSA_NAME
4145 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
4147 add_repeat_to_ops_vec (ops
, base
, exponent
);
4148 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
4151 add_to_ops_vec (ops
, binrhs
);
4154 /* Repropagate the negates back into subtracts, since no other pass
4155 currently does it. */
4158 repropagate_negates (void)
4163 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
4165 gimple user
= get_single_immediate_use (negate
);
4167 if (!user
|| !is_gimple_assign (user
))
4170 /* The negate operand can be either operand of a PLUS_EXPR
4171 (it can be the LHS if the RHS is a constant for example).
4173 Force the negate operand to the RHS of the PLUS_EXPR, then
4174 transform the PLUS_EXPR into a MINUS_EXPR. */
4175 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
4177 /* If the negated operand appears on the LHS of the
4178 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4179 to force the negated operand to the RHS of the PLUS_EXPR. */
4180 if (gimple_assign_rhs1 (user
) == negate
)
4182 swap_ssa_operands (user
,
4183 gimple_assign_rhs1_ptr (user
),
4184 gimple_assign_rhs2_ptr (user
));
4187 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4188 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4189 if (gimple_assign_rhs2 (user
) == negate
)
4191 tree rhs1
= gimple_assign_rhs1 (user
);
4192 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
4193 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4194 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
4198 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
4200 if (gimple_assign_rhs1 (user
) == negate
)
4205 which we transform into
4208 This pushes down the negate which we possibly can merge
4209 into some other operation, hence insert it into the
4210 plus_negates vector. */
4211 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4212 tree a
= gimple_assign_rhs1 (feed
);
4213 tree b
= gimple_assign_rhs2 (user
);
4214 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
4215 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
4216 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)), NULL
);
4217 gimple g
= gimple_build_assign_with_ops (PLUS_EXPR
, x
, a
, b
);
4218 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
4219 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
, NULL
);
4220 user
= gsi_stmt (gsi2
);
4222 reassoc_remove_stmt (&gsi
);
4223 release_defs (feed
);
4224 plus_negates
.safe_push (gimple_assign_lhs (user
));
4228 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4229 rid of one operation. */
4230 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4231 tree a
= gimple_assign_rhs1 (feed
);
4232 tree rhs1
= gimple_assign_rhs1 (user
);
4233 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4234 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
4235 update_stmt (gsi_stmt (gsi
));
4241 /* Returns true if OP is of a type for which we can do reassociation.
4242 That is for integral or non-saturating fixed-point types, and for
4243 floating point type when associative-math is enabled. */
4246 can_reassociate_p (tree op
)
4248 tree type
= TREE_TYPE (op
);
4249 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
4250 || NON_SAT_FIXED_POINT_TYPE_P (type
)
4251 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
4256 /* Break up subtract operations in block BB.
4258 We do this top down because we don't know whether the subtract is
4259 part of a possible chain of reassociation except at the top.
4268 we want to break up k = t - q, but we won't until we've transformed q
4269 = b - r, which won't be broken up until we transform b = c - d.
4271 En passant, clear the GIMPLE visited flag on every statement
4272 and set UIDs within each basic block. */
4275 break_up_subtract_bb (basic_block bb
)
4277 gimple_stmt_iterator gsi
;
4279 unsigned int uid
= 1;
4281 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4283 gimple stmt
= gsi_stmt (gsi
);
4284 gimple_set_visited (stmt
, false);
4285 gimple_set_uid (stmt
, uid
++);
4287 if (!is_gimple_assign (stmt
)
4288 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
4291 /* Look for simple gimple subtract operations. */
4292 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
4294 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
4295 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
4298 /* Check for a subtract used only in an addition. If this
4299 is the case, transform it into add of a negate for better
4300 reassociation. IE transform C = A-B into C = A + -B if C
4301 is only used in an addition. */
4302 if (should_break_up_subtract (stmt
))
4303 break_up_subtract (stmt
, &gsi
);
4305 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
4306 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
4307 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
4309 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
4311 son
= next_dom_son (CDI_DOMINATORS
, son
))
4312 break_up_subtract_bb (son
);
4315 /* Used for repeated factor analysis. */
4316 struct repeat_factor_d
4318 /* An SSA name that occurs in a multiply chain. */
4321 /* Cached rank of the factor. */
4324 /* Number of occurrences of the factor in the chain. */
4325 HOST_WIDE_INT count
;
4327 /* An SSA name representing the product of this factor and
4328 all factors appearing later in the repeated factor vector. */
4332 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
4333 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
4336 static vec
<repeat_factor
> repeat_factor_vec
;
4338 /* Used for sorting the repeat factor vector. Sort primarily by
4339 ascending occurrence count, secondarily by descending rank. */
4342 compare_repeat_factors (const void *x1
, const void *x2
)
4344 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
4345 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
4347 if (rf1
->count
!= rf2
->count
)
4348 return rf1
->count
- rf2
->count
;
4350 return rf2
->rank
- rf1
->rank
;
4353 /* Look for repeated operands in OPS in the multiply tree rooted at
4354 STMT. Replace them with an optimal sequence of multiplies and powi
4355 builtin calls, and remove the used operands from OPS. Return an
4356 SSA name representing the value of the replacement sequence. */
4359 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
4361 unsigned i
, j
, vec_len
;
4364 repeat_factor_t rf1
, rf2
;
4365 repeat_factor rfnew
;
4366 tree result
= NULL_TREE
;
4367 tree target_ssa
, iter_result
;
4368 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4369 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4370 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4371 gimple mul_stmt
, pow_stmt
;
4373 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4378 /* Allocate the repeated factor vector. */
4379 repeat_factor_vec
.create (10);
4381 /* Scan the OPS vector for all SSA names in the product and build
4382 up a vector of occurrence counts for each factor. */
4383 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4385 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4387 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4389 if (rf1
->factor
== oe
->op
)
4391 rf1
->count
+= oe
->count
;
4396 if (j
>= repeat_factor_vec
.length ())
4398 rfnew
.factor
= oe
->op
;
4399 rfnew
.rank
= oe
->rank
;
4400 rfnew
.count
= oe
->count
;
4401 rfnew
.repr
= NULL_TREE
;
4402 repeat_factor_vec
.safe_push (rfnew
);
4407 /* Sort the repeated factor vector by (a) increasing occurrence count,
4408 and (b) decreasing rank. */
4409 repeat_factor_vec
.qsort (compare_repeat_factors
);
4411 /* It is generally best to combine as many base factors as possible
4412 into a product before applying __builtin_powi to the result.
4413 However, the sort order chosen for the repeated factor vector
4414 allows us to cache partial results for the product of the base
4415 factors for subsequent use. When we already have a cached partial
4416 result from a previous iteration, it is best to make use of it
4417 before looking for another __builtin_pow opportunity.
4419 As an example, consider x * x * y * y * y * z * z * z * z.
4420 We want to first compose the product x * y * z, raise it to the
4421 second power, then multiply this by y * z, and finally multiply
4422 by z. This can be done in 5 multiplies provided we cache y * z
4423 for use in both expressions:
4431 If we instead ignored the cached y * z and first multiplied by
4432 the __builtin_pow opportunity z * z, we would get the inferior:
4441 vec_len
= repeat_factor_vec
.length ();
4443 /* Repeatedly look for opportunities to create a builtin_powi call. */
4446 HOST_WIDE_INT power
;
4448 /* First look for the largest cached product of factors from
4449 preceding iterations. If found, create a builtin_powi for
4450 it if the minimum occurrence count for its factors is at
4451 least 2, or just use this cached product as our next
4452 multiplicand if the minimum occurrence count is 1. */
4453 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4455 if (rf1
->repr
&& rf1
->count
> 0)
4465 iter_result
= rf1
->repr
;
4467 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4471 fputs ("Multiplying by cached product ", dump_file
);
4472 for (elt
= j
; elt
< vec_len
; elt
++)
4474 rf
= &repeat_factor_vec
[elt
];
4475 print_generic_expr (dump_file
, rf
->factor
, 0);
4476 if (elt
< vec_len
- 1)
4477 fputs (" * ", dump_file
);
4479 fputs ("\n", dump_file
);
4484 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4485 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4486 build_int_cst (integer_type_node
,
4488 gimple_call_set_lhs (pow_stmt
, iter_result
);
4489 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4490 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4492 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4496 fputs ("Building __builtin_pow call for cached product (",
4498 for (elt
= j
; elt
< vec_len
; elt
++)
4500 rf
= &repeat_factor_vec
[elt
];
4501 print_generic_expr (dump_file
, rf
->factor
, 0);
4502 if (elt
< vec_len
- 1)
4503 fputs (" * ", dump_file
);
4505 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n",
4512 /* Otherwise, find the first factor in the repeated factor
4513 vector whose occurrence count is at least 2. If no such
4514 factor exists, there are no builtin_powi opportunities
4516 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4518 if (rf1
->count
>= 2)
4527 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4531 fputs ("Building __builtin_pow call for (", dump_file
);
4532 for (elt
= j
; elt
< vec_len
; elt
++)
4534 rf
= &repeat_factor_vec
[elt
];
4535 print_generic_expr (dump_file
, rf
->factor
, 0);
4536 if (elt
< vec_len
- 1)
4537 fputs (" * ", dump_file
);
4539 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4542 reassociate_stats
.pows_created
++;
4544 /* Visit each element of the vector in reverse order (so that
4545 high-occurrence elements are visited first, and within the
4546 same occurrence count, lower-ranked elements are visited
4547 first). Form a linear product of all elements in this order
4548 whose occurrencce count is at least that of element J.
4549 Record the SSA name representing the product of each element
4550 with all subsequent elements in the vector. */
4551 if (j
== vec_len
- 1)
4552 rf1
->repr
= rf1
->factor
;
4555 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4559 rf1
= &repeat_factor_vec
[ii
];
4560 rf2
= &repeat_factor_vec
[ii
+ 1];
4562 /* Init the last factor's representative to be itself. */
4564 rf2
->repr
= rf2
->factor
;
4569 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4570 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
,
4573 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4574 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4575 rf1
->repr
= target_ssa
;
4577 /* Don't reprocess the multiply we just introduced. */
4578 gimple_set_visited (mul_stmt
, true);
4582 /* Form a call to __builtin_powi for the maximum product
4583 just formed, raised to the power obtained earlier. */
4584 rf1
= &repeat_factor_vec
[j
];
4585 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4586 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4587 build_int_cst (integer_type_node
,
4589 gimple_call_set_lhs (pow_stmt
, iter_result
);
4590 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4591 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4594 /* If we previously formed at least one other builtin_powi call,
4595 form the product of this one and those others. */
4598 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4599 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, new_result
,
4600 result
, iter_result
);
4601 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4602 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4603 gimple_set_visited (mul_stmt
, true);
4604 result
= new_result
;
4607 result
= iter_result
;
4609 /* Decrement the occurrence count of each element in the product
4610 by the count found above, and remove this many copies of each
4612 for (i
= j
; i
< vec_len
; i
++)
4617 rf1
= &repeat_factor_vec
[i
];
4618 rf1
->count
-= power
;
4620 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4622 if (oe
->op
== rf1
->factor
)
4626 ops
->ordered_remove (n
);
4642 /* At this point all elements in the repeated factor vector have a
4643 remaining occurrence count of 0 or 1, and those with a count of 1
4644 don't have cached representatives. Re-sort the ops vector and
4646 ops
->qsort (sort_by_operand_rank
);
4647 repeat_factor_vec
.release ();
4649 /* Return the final product computed herein. Note that there may
4650 still be some elements with single occurrence count left in OPS;
4651 those will be handled by the normal reassociation logic. */
4655 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4658 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4662 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4664 fprintf (dump_file
, "Transforming ");
4665 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4668 rhs1
= gimple_assign_rhs1 (stmt
);
4669 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4671 remove_visited_stmt_chain (rhs1
);
4673 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4675 fprintf (dump_file
, " into ");
4676 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4680 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4683 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4684 tree rhs1
, tree rhs2
)
4686 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4688 fprintf (dump_file
, "Transforming ");
4689 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4692 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4693 update_stmt (gsi_stmt (*gsi
));
4694 remove_visited_stmt_chain (rhs1
);
4696 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4698 fprintf (dump_file
, " into ");
4699 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4703 /* Reassociate expressions in basic block BB and its post-dominator as
4707 reassociate_bb (basic_block bb
)
4709 gimple_stmt_iterator gsi
;
4711 gimple stmt
= last_stmt (bb
);
4713 if (stmt
&& !gimple_visited_p (stmt
))
4714 maybe_optimize_range_tests (stmt
);
4716 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4718 stmt
= gsi_stmt (gsi
);
4720 if (is_gimple_assign (stmt
)
4721 && !stmt_could_throw_p (stmt
))
4723 tree lhs
, rhs1
, rhs2
;
4724 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4726 /* If this is not a gimple binary expression, there is
4727 nothing for us to do with it. */
4728 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4731 /* If this was part of an already processed statement,
4732 we don't need to touch it again. */
4733 if (gimple_visited_p (stmt
))
4735 /* This statement might have become dead because of previous
4737 if (has_zero_uses (gimple_get_lhs (stmt
)))
4739 reassoc_remove_stmt (&gsi
);
4740 release_defs (stmt
);
4741 /* We might end up removing the last stmt above which
4742 places the iterator to the end of the sequence.
4743 Reset it to the last stmt in this case which might
4744 be the end of the sequence as well if we removed
4745 the last statement of the sequence. In which case
4746 we need to bail out. */
4747 if (gsi_end_p (gsi
))
4749 gsi
= gsi_last_bb (bb
);
4750 if (gsi_end_p (gsi
))
4757 lhs
= gimple_assign_lhs (stmt
);
4758 rhs1
= gimple_assign_rhs1 (stmt
);
4759 rhs2
= gimple_assign_rhs2 (stmt
);
4761 /* For non-bit or min/max operations we can't associate
4762 all types. Verify that here. */
4763 if (rhs_code
!= BIT_IOR_EXPR
4764 && rhs_code
!= BIT_AND_EXPR
4765 && rhs_code
!= BIT_XOR_EXPR
4766 && rhs_code
!= MIN_EXPR
4767 && rhs_code
!= MAX_EXPR
4768 && (!can_reassociate_p (lhs
)
4769 || !can_reassociate_p (rhs1
)
4770 || !can_reassociate_p (rhs2
)))
4773 if (associative_tree_code (rhs_code
))
4775 auto_vec
<operand_entry_t
> ops
;
4776 tree powi_result
= NULL_TREE
;
4778 /* There may be no immediate uses left by the time we
4779 get here because we may have eliminated them all. */
4780 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4783 gimple_set_visited (stmt
, true);
4784 linearize_expr_tree (&ops
, stmt
, true, true);
4785 ops
.qsort (sort_by_operand_rank
);
4786 optimize_ops_list (rhs_code
, &ops
);
4787 if (undistribute_ops_list (rhs_code
, &ops
,
4788 loop_containing_stmt (stmt
)))
4790 ops
.qsort (sort_by_operand_rank
);
4791 optimize_ops_list (rhs_code
, &ops
);
4794 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4795 optimize_range_tests (rhs_code
, &ops
);
4797 if (first_pass_instance
4798 && rhs_code
== MULT_EXPR
4799 && flag_unsafe_math_optimizations
)
4800 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4802 /* If the operand vector is now empty, all operands were
4803 consumed by the __builtin_powi optimization. */
4804 if (ops
.length () == 0)
4805 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4806 else if (ops
.length () == 1)
4808 tree last_op
= ops
.last ()->op
;
4811 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4814 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4818 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4819 int ops_num
= ops
.length ();
4820 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4823 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4825 "Width = %d was chosen for reassociation\n", width
);
4828 && ops
.length () > 3)
4829 rewrite_expr_tree_parallel (stmt
, width
, ops
);
4832 /* When there are three operands left, we want
4833 to make sure the ones that get the double
4834 binary op are chosen wisely. */
4835 int len
= ops
.length ();
4837 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4839 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4840 powi_result
!= NULL
);
4843 /* If we combined some repeated factors into a
4844 __builtin_powi call, multiply that result by the
4845 reassociated operands. */
4848 gimple mul_stmt
, lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4849 tree type
= TREE_TYPE (lhs
);
4850 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4852 gimple_set_lhs (lhs_stmt
, target_ssa
);
4853 update_stmt (lhs_stmt
);
4855 target_ssa
= new_lhs
;
4856 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, lhs
,
4859 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4860 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4866 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4868 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4869 reassociate_bb (son
);
4872 /* Add jumps around shifts for range tests turned into bit tests.
4873 For each SSA_NAME VAR we have code like:
4874 VAR = ...; // final stmt of range comparison
4875 // bit test here...;
4876 OTHERVAR = ...; // final stmt of the bit test sequence
4877 RES = VAR | OTHERVAR;
4878 Turn the above into:
4885 // bit test here...;
4888 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4896 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
4898 gimple def_stmt
= SSA_NAME_DEF_STMT (var
);
4901 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
4903 && is_gimple_assign (use_stmt
)
4904 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
4905 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
4907 basic_block cond_bb
= gimple_bb (def_stmt
);
4908 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
4909 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
4911 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
4912 gimple g
= gimple_build_cond (NE_EXPR
, var
,
4913 build_zero_cst (TREE_TYPE (var
)),
4914 NULL_TREE
, NULL_TREE
);
4915 location_t loc
= gimple_location (use_stmt
);
4916 gimple_set_location (g
, loc
);
4917 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
4919 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
4920 etrue
->probability
= REG_BR_PROB_BASE
/ 2;
4921 etrue
->count
= cond_bb
->count
/ 2;
4922 edge efalse
= find_edge (cond_bb
, then_bb
);
4923 efalse
->flags
= EDGE_FALSE_VALUE
;
4924 efalse
->probability
-= etrue
->probability
;
4925 efalse
->count
-= etrue
->count
;
4926 then_bb
->count
-= etrue
->count
;
4928 tree othervar
= NULL_TREE
;
4929 if (gimple_assign_rhs1 (use_stmt
) == var
)
4930 othervar
= gimple_assign_rhs2 (use_stmt
);
4931 else if (gimple_assign_rhs2 (use_stmt
) == var
)
4932 othervar
= gimple_assign_rhs1 (use_stmt
);
4935 tree lhs
= gimple_assign_lhs (use_stmt
);
4936 gimple phi
= create_phi_node (lhs
, merge_bb
);
4937 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
4938 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
4939 gsi
= gsi_for_stmt (use_stmt
);
4940 gsi_remove (&gsi
, true);
4942 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
4943 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
4945 reassoc_branch_fixups
.release ();
4948 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4949 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4951 /* Dump the operand entry vector OPS to FILE. */
4954 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4959 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4961 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4962 print_generic_expr (file
, oe
->op
, 0);
4966 /* Dump the operand entry vector OPS to STDERR. */
4969 debug_ops_vector (vec
<operand_entry_t
> ops
)
4971 dump_ops_vector (stderr
, ops
);
4977 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
4978 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
4981 /* Initialize the reassociation pass. */
4988 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
4990 /* Find the loops, so that we can prevent moving calculations in
4992 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
4994 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
4996 operand_entry_pool
= create_alloc_pool ("operand entry pool",
4997 sizeof (struct operand_entry
), 30);
4998 next_operand_entry_id
= 0;
5000 /* Reverse RPO (Reverse Post Order) will give us something where
5001 deeper loops come later. */
5002 pre_and_rev_post_order_compute (NULL
, bbs
, false);
5003 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
5004 operand_rank
= new hash_map
<tree
, long>;
5006 /* Give each default definition a distinct rank. This includes
5007 parameters and the static chain. Walk backwards over all
5008 SSA names so that we get proper rank ordering according
5009 to tree_swap_operands_p. */
5010 for (i
= num_ssa_names
- 1; i
> 0; --i
)
5012 tree name
= ssa_name (i
);
5013 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
5014 insert_operand_rank (name
, ++rank
);
5017 /* Set up rank for each BB */
5018 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
5019 bb_rank
[bbs
[i
]] = ++rank
<< 16;
5022 calculate_dominance_info (CDI_POST_DOMINATORS
);
5023 plus_negates
= vNULL
;
5026 /* Cleanup after the reassociation pass, and print stats if
5032 statistics_counter_event (cfun
, "Linearized",
5033 reassociate_stats
.linearized
);
5034 statistics_counter_event (cfun
, "Constants eliminated",
5035 reassociate_stats
.constants_eliminated
);
5036 statistics_counter_event (cfun
, "Ops eliminated",
5037 reassociate_stats
.ops_eliminated
);
5038 statistics_counter_event (cfun
, "Statements rewritten",
5039 reassociate_stats
.rewritten
);
5040 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
5041 reassociate_stats
.pows_encountered
);
5042 statistics_counter_event (cfun
, "Built-in powi calls created",
5043 reassociate_stats
.pows_created
);
5045 delete operand_rank
;
5046 free_alloc_pool (operand_entry_pool
);
5048 plus_negates
.release ();
5049 free_dominance_info (CDI_POST_DOMINATORS
);
5050 loop_optimizer_finalize ();
5053 /* Gate and execute functions for Reassociation. */
5056 execute_reassoc (void)
5061 repropagate_negates ();
5070 const pass_data pass_data_reassoc
=
5072 GIMPLE_PASS
, /* type */
5073 "reassoc", /* name */
5074 OPTGROUP_NONE
, /* optinfo_flags */
5075 TV_TREE_REASSOC
, /* tv_id */
5076 ( PROP_cfg
| PROP_ssa
), /* properties_required */
5077 0, /* properties_provided */
5078 0, /* properties_destroyed */
5079 0, /* todo_flags_start */
5080 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
5083 class pass_reassoc
: public gimple_opt_pass
5086 pass_reassoc (gcc::context
*ctxt
)
5087 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
5090 /* opt_pass methods: */
5091 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
5092 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
5093 virtual unsigned int execute (function
*) { return execute_reassoc (); }
5095 }; // class pass_reassoc
5100 make_pass_reassoc (gcc::context
*ctxt
)
5102 return new pass_reassoc (ctxt
);