1 /* Reassociation for trees.
2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
34 #include "optabs-tree.h"
35 #include "gimple-pretty-print.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
40 #include "gimple-fold.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
45 #include "tree-ssa-loop.h"
48 #include "langhooks.h"
53 #include "case-cfn-macros.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
69 3. Optimization of the operand lists, eliminating things like a +
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
106 You want to first merge all leaves of the same rank, as much as
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
160 newstmt = mergetmp + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p
;
184 int constants_eliminated
;
187 int pows_encountered
;
191 /* Operator, rank pair. */
198 gimple
*stmt_to_insert
;
201 static object_allocator
<operand_entry
> operand_entry_pool
202 ("operand entry pool");
204 /* This is used to assign a unique ID to each struct operand_entry
205 so that qsort results are identical on different hosts. */
206 static int next_operand_entry_id
;
208 /* Starting rank number for a given basic block, so that we can rank
209 operations using unmovable instructions in that BB based on the bb
211 static long *bb_rank
;
213 /* Operand->rank hashtable. */
214 static hash_map
<tree
, long> *operand_rank
;
216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
217 all basic blocks the CFG should be adjusted - basic blocks
218 split right after that SSA_NAME's definition statement and before
219 the only use, which must be a bit ior. */
220 static vec
<tree
> reassoc_branch_fixups
;
223 static long get_rank (tree
);
224 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
227 possibly added by gsi_remove. */
230 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
232 gimple
*stmt
= gsi_stmt (*gsi
);
234 if (!MAY_HAVE_DEBUG_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
235 return gsi_remove (gsi
, true);
237 gimple_stmt_iterator prev
= *gsi
;
239 unsigned uid
= gimple_uid (stmt
);
240 basic_block bb
= gimple_bb (stmt
);
241 bool ret
= gsi_remove (gsi
, true);
242 if (!gsi_end_p (prev
))
245 prev
= gsi_start_bb (bb
);
246 gimple
*end_stmt
= gsi_stmt (*gsi
);
247 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
249 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
250 gimple_set_uid (stmt
, uid
);
256 /* Bias amount for loop-carried phis. We want this to be larger than
257 the depth of any reassociation tree we can see, but not larger than
258 the rank difference between two blocks. */
259 #define PHI_LOOP_BIAS (1 << 15)
261 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
262 an innermost loop, and the phi has only a single use which is inside
263 the loop, then the rank is the block rank of the loop latch plus an
264 extra bias for the loop-carried dependence. This causes expressions
265 calculated into an accumulator variable to be independent for each
266 iteration of the loop. If STMT is some other phi, the rank is the
267 block rank of its containing block. */
269 phi_rank (gimple
*stmt
)
271 basic_block bb
= gimple_bb (stmt
);
272 struct loop
*father
= bb
->loop_father
;
278 /* We only care about real loops (those with a latch). */
280 return bb_rank
[bb
->index
];
282 /* Interesting phis must be in headers of innermost loops. */
283 if (bb
!= father
->header
285 return bb_rank
[bb
->index
];
287 /* Ignore virtual SSA_NAMEs. */
288 res
= gimple_phi_result (stmt
);
289 if (virtual_operand_p (res
))
290 return bb_rank
[bb
->index
];
292 /* The phi definition must have a single use, and that use must be
293 within the loop. Otherwise this isn't an accumulator pattern. */
294 if (!single_imm_use (res
, &use
, &use_stmt
)
295 || gimple_bb (use_stmt
)->loop_father
!= father
)
296 return bb_rank
[bb
->index
];
298 /* Look for phi arguments from within the loop. If found, bias this phi. */
299 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
301 tree arg
= gimple_phi_arg_def (stmt
, i
);
302 if (TREE_CODE (arg
) == SSA_NAME
303 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
305 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
306 if (gimple_bb (def_stmt
)->loop_father
== father
)
307 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
311 /* Must be an uninteresting phi. */
312 return bb_rank
[bb
->index
];
315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
316 loop-carried dependence of an innermost loop, return TRUE; else
319 loop_carried_phi (tree exp
)
324 if (TREE_CODE (exp
) != SSA_NAME
325 || SSA_NAME_IS_DEFAULT_DEF (exp
))
328 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
330 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
333 /* Non-loop-carried phis have block rank. Loop-carried phis have
334 an additional bias added in. If this phi doesn't have block rank,
335 it's biased and should not be propagated. */
336 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
338 if (phi_rank (phi_stmt
) != block_rank
)
344 /* Return the maximum of RANK and the rank that should be propagated
345 from expression OP. For most operands, this is just the rank of OP.
346 For loop-carried phis, the value is zero to avoid undoing the bias
347 in favor of the phi. */
349 propagate_rank (long rank
, tree op
)
353 if (loop_carried_phi (op
))
356 op_rank
= get_rank (op
);
358 return MAX (rank
, op_rank
);
361 /* Look up the operand rank structure for expression E. */
364 find_operand_rank (tree e
)
366 long *slot
= operand_rank
->get (e
);
367 return slot
? *slot
: -1;
370 /* Insert {E,RANK} into the operand rank hashtable. */
373 insert_operand_rank (tree e
, long rank
)
375 gcc_assert (rank
> 0);
376 gcc_assert (!operand_rank
->put (e
, rank
));
379 /* Given an expression E, return the rank of the expression. */
384 /* SSA_NAME's have the rank of the expression they are the result
386 For globals and uninitialized values, the rank is 0.
387 For function arguments, use the pre-setup rank.
388 For PHI nodes, stores, asm statements, etc, we use the rank of
390 For simple operations, the rank is the maximum rank of any of
391 its operands, or the bb_rank, whichever is less.
392 I make no claims that this is optimal, however, it gives good
395 /* We make an exception to the normal ranking system to break
396 dependences of accumulator variables in loops. Suppose we
397 have a simple one-block loop containing:
404 As shown, each iteration of the calculation into x is fully
405 dependent upon the iteration before it. We would prefer to
406 see this in the form:
413 If the loop is unrolled, the calculations of b and c from
414 different iterations can be interleaved.
416 To obtain this result during reassociation, we bias the rank
417 of the phi definition x_1 upward, when it is recognized as an
418 accumulator pattern. The artificial rank causes it to be
419 added last, providing the desired independence. */
421 if (TREE_CODE (e
) == SSA_NAME
)
428 if (SSA_NAME_IS_DEFAULT_DEF (e
))
429 return find_operand_rank (e
);
431 stmt
= SSA_NAME_DEF_STMT (e
);
432 if (gimple_code (stmt
) == GIMPLE_PHI
)
433 return phi_rank (stmt
);
435 if (!is_gimple_assign (stmt
))
436 return bb_rank
[gimple_bb (stmt
)->index
];
438 /* If we already have a rank for this expression, use that. */
439 rank
= find_operand_rank (e
);
443 /* Otherwise, find the maximum rank for the operands. As an
444 exception, remove the bias from loop-carried phis when propagating
445 the rank so that dependent operations are not also biased. */
446 /* Simply walk over all SSA uses - this takes advatage of the
447 fact that non-SSA operands are is_gimple_min_invariant and
450 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
451 rank
= propagate_rank (rank
, op
);
453 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
455 fprintf (dump_file
, "Rank for ");
456 print_generic_expr (dump_file
, e
, 0);
457 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
460 /* Note the rank in the hashtable so we don't recompute it. */
461 insert_operand_rank (e
, (rank
+ 1));
465 /* Constants, globals, etc., are rank 0 */
470 /* We want integer ones to end up last no matter what, since they are
471 the ones we can do the most with. */
472 #define INTEGER_CONST_TYPE 1 << 3
473 #define FLOAT_CONST_TYPE 1 << 2
474 #define OTHER_CONST_TYPE 1 << 1
476 /* Classify an invariant tree into integer, float, or other, so that
477 we can sort them to be near other constants of the same type. */
479 constant_type (tree t
)
481 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
482 return INTEGER_CONST_TYPE
;
483 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
484 return FLOAT_CONST_TYPE
;
486 return OTHER_CONST_TYPE
;
489 /* qsort comparison function to sort operand entries PA and PB by rank
490 so that the sorted array is ordered by rank in decreasing order. */
492 sort_by_operand_rank (const void *pa
, const void *pb
)
494 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
495 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
497 /* It's nicer for optimize_expression if constants that are likely
498 to fold when added/multiplied//whatever are put next to each
499 other. Since all constants have rank 0, order them by type. */
500 if (oeb
->rank
== 0 && oea
->rank
== 0)
502 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
503 return constant_type (oeb
->op
) - constant_type (oea
->op
);
505 /* To make sorting result stable, we use unique IDs to determine
507 return oeb
->id
- oea
->id
;
510 /* Lastly, make sure the versions that are the same go next to each
512 if ((oeb
->rank
- oea
->rank
== 0)
513 && TREE_CODE (oea
->op
) == SSA_NAME
514 && TREE_CODE (oeb
->op
) == SSA_NAME
)
516 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
517 versions of removed SSA_NAMEs, so if possible, prefer to sort
518 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
520 if (!SSA_NAME_IS_DEFAULT_DEF (oea
->op
)
521 && !SSA_NAME_IS_DEFAULT_DEF (oeb
->op
)
522 && !oea
->stmt_to_insert
523 && !oeb
->stmt_to_insert
524 && SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
526 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
527 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
528 basic_block bba
= gimple_bb (stmta
);
529 basic_block bbb
= gimple_bb (stmtb
);
532 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
533 return bb_rank
[bbb
->index
] - bb_rank
[bba
->index
];
537 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
538 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
544 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
545 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
547 return oeb
->id
- oea
->id
;
550 if (oeb
->rank
!= oea
->rank
)
551 return oeb
->rank
- oea
->rank
;
553 return oeb
->id
- oea
->id
;
556 /* Add an operand entry to *OPS for the tree operand OP. */
559 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
, gimple
*stmt_to_insert
= NULL
)
561 operand_entry
*oe
= operand_entry_pool
.allocate ();
564 oe
->rank
= get_rank (op
);
565 oe
->id
= next_operand_entry_id
++;
567 oe
->stmt_to_insert
= stmt_to_insert
;
571 /* Add an operand entry to *OPS for the tree operand OP with repeat
575 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
576 HOST_WIDE_INT repeat
)
578 operand_entry
*oe
= operand_entry_pool
.allocate ();
581 oe
->rank
= get_rank (op
);
582 oe
->id
= next_operand_entry_id
++;
584 oe
->stmt_to_insert
= NULL
;
587 reassociate_stats
.pows_encountered
++;
590 /* Return true if STMT is reassociable operation containing a binary
591 operation with tree code CODE, and is inside LOOP. */
594 is_reassociable_op (gimple
*stmt
, enum tree_code code
, struct loop
*loop
)
596 basic_block bb
= gimple_bb (stmt
);
598 if (gimple_bb (stmt
) == NULL
)
601 if (!flow_bb_inside_loop_p (loop
, bb
))
604 if (is_gimple_assign (stmt
)
605 && gimple_assign_rhs_code (stmt
) == code
606 && has_single_use (gimple_assign_lhs (stmt
)))
613 /* Return true if STMT is a nop-conversion. */
616 gimple_nop_conversion_p (gimple
*stmt
)
618 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
620 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
621 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
622 TREE_TYPE (gimple_assign_rhs1 (ass
))))
628 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
629 operand of the negate operation. Otherwise, return NULL. */
632 get_unary_op (tree name
, enum tree_code opcode
)
634 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
636 /* Look through nop conversions (sign changes). */
637 if (gimple_nop_conversion_p (stmt
)
638 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
639 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
641 if (!is_gimple_assign (stmt
))
644 if (gimple_assign_rhs_code (stmt
) == opcode
)
645 return gimple_assign_rhs1 (stmt
);
649 /* Return true if OP1 and OP2 have the same value if casted to either type. */
652 ops_equal_values_p (tree op1
, tree op2
)
658 if (TREE_CODE (op1
) == SSA_NAME
)
660 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
661 if (gimple_nop_conversion_p (stmt
))
663 op1
= gimple_assign_rhs1 (stmt
);
669 if (TREE_CODE (op2
) == SSA_NAME
)
671 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
672 if (gimple_nop_conversion_p (stmt
))
674 op2
= gimple_assign_rhs1 (stmt
);
685 /* If CURR and LAST are a pair of ops that OPCODE allows us to
686 eliminate through equivalences, do so, remove them from OPS, and
687 return true. Otherwise, return false. */
690 eliminate_duplicate_pair (enum tree_code opcode
,
691 vec
<operand_entry
*> *ops
,
698 /* If we have two of the same op, and the opcode is & |, min, or max,
699 we can eliminate one of them.
700 If we have two of the same op, and the opcode is ^, we can
701 eliminate both of them. */
703 if (last
&& last
->op
== curr
->op
)
711 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
713 fprintf (dump_file
, "Equivalence: ");
714 print_generic_expr (dump_file
, curr
->op
, 0);
715 fprintf (dump_file
, " [&|minmax] ");
716 print_generic_expr (dump_file
, last
->op
, 0);
717 fprintf (dump_file
, " -> ");
718 print_generic_stmt (dump_file
, last
->op
, 0);
721 ops
->ordered_remove (i
);
722 reassociate_stats
.ops_eliminated
++;
727 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
729 fprintf (dump_file
, "Equivalence: ");
730 print_generic_expr (dump_file
, curr
->op
, 0);
731 fprintf (dump_file
, " ^ ");
732 print_generic_expr (dump_file
, last
->op
, 0);
733 fprintf (dump_file
, " -> nothing\n");
736 reassociate_stats
.ops_eliminated
+= 2;
738 if (ops
->length () == 2)
741 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
746 ops
->ordered_remove (i
-1);
747 ops
->ordered_remove (i
-1);
759 static vec
<tree
> plus_negates
;
761 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
762 expression, look in OPS for a corresponding positive operation to cancel
763 it out. If we find one, remove the other from OPS, replace
764 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
768 eliminate_plus_minus_pair (enum tree_code opcode
,
769 vec
<operand_entry
*> *ops
,
770 unsigned int currindex
,
778 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
781 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
782 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
783 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
786 /* Any non-negated version will have a rank that is one less than
787 the current rank. So once we hit those ranks, if we don't find
790 for (i
= currindex
+ 1;
791 ops
->iterate (i
, &oe
)
792 && oe
->rank
>= curr
->rank
- 1 ;
796 && ops_equal_values_p (oe
->op
, negateop
))
798 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
800 fprintf (dump_file
, "Equivalence: ");
801 print_generic_expr (dump_file
, negateop
, 0);
802 fprintf (dump_file
, " + -");
803 print_generic_expr (dump_file
, oe
->op
, 0);
804 fprintf (dump_file
, " -> 0\n");
807 ops
->ordered_remove (i
);
808 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
809 ops
->ordered_remove (currindex
);
810 reassociate_stats
.ops_eliminated
++;
815 && ops_equal_values_p (oe
->op
, notop
))
817 tree op_type
= TREE_TYPE (oe
->op
);
819 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
821 fprintf (dump_file
, "Equivalence: ");
822 print_generic_expr (dump_file
, notop
, 0);
823 fprintf (dump_file
, " + ~");
824 print_generic_expr (dump_file
, oe
->op
, 0);
825 fprintf (dump_file
, " -> -1\n");
828 ops
->ordered_remove (i
);
829 add_to_ops_vec (ops
, build_all_ones_cst (op_type
));
830 ops
->ordered_remove (currindex
);
831 reassociate_stats
.ops_eliminated
++;
837 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
838 save it for later inspection in repropagate_negates(). */
839 if (negateop
!= NULL_TREE
840 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
841 plus_negates
.safe_push (curr
->op
);
846 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
847 bitwise not expression, look in OPS for a corresponding operand to
848 cancel it out. If we find one, remove the other from OPS, replace
849 OPS[CURRINDEX] with 0, and return true. Otherwise, return
853 eliminate_not_pairs (enum tree_code opcode
,
854 vec
<operand_entry
*> *ops
,
855 unsigned int currindex
,
862 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
863 || TREE_CODE (curr
->op
) != SSA_NAME
)
866 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
867 if (notop
== NULL_TREE
)
870 /* Any non-not version will have a rank that is one less than
871 the current rank. So once we hit those ranks, if we don't find
874 for (i
= currindex
+ 1;
875 ops
->iterate (i
, &oe
)
876 && oe
->rank
>= curr
->rank
- 1;
881 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
883 fprintf (dump_file
, "Equivalence: ");
884 print_generic_expr (dump_file
, notop
, 0);
885 if (opcode
== BIT_AND_EXPR
)
886 fprintf (dump_file
, " & ~");
887 else if (opcode
== BIT_IOR_EXPR
)
888 fprintf (dump_file
, " | ~");
889 print_generic_expr (dump_file
, oe
->op
, 0);
890 if (opcode
== BIT_AND_EXPR
)
891 fprintf (dump_file
, " -> 0\n");
892 else if (opcode
== BIT_IOR_EXPR
)
893 fprintf (dump_file
, " -> -1\n");
896 if (opcode
== BIT_AND_EXPR
)
897 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
898 else if (opcode
== BIT_IOR_EXPR
)
899 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
901 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
903 ops
->quick_push (oe
);
911 /* Use constant value that may be present in OPS to try to eliminate
912 operands. Note that this function is only really used when we've
913 eliminated ops for other reasons, or merged constants. Across
914 single statements, fold already does all of this, plus more. There
915 is little point in duplicating logic, so I've only included the
916 identities that I could ever construct testcases to trigger. */
919 eliminate_using_constants (enum tree_code opcode
,
920 vec
<operand_entry
*> *ops
)
922 operand_entry
*oelast
= ops
->last ();
923 tree type
= TREE_TYPE (oelast
->op
);
925 if (oelast
->rank
== 0
926 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
931 if (integer_zerop (oelast
->op
))
933 if (ops
->length () != 1)
935 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
936 fprintf (dump_file
, "Found & 0, removing all other ops\n");
938 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
941 ops
->quick_push (oelast
);
945 else if (integer_all_onesp (oelast
->op
))
947 if (ops
->length () != 1)
949 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
950 fprintf (dump_file
, "Found & -1, removing\n");
952 reassociate_stats
.ops_eliminated
++;
957 if (integer_all_onesp (oelast
->op
))
959 if (ops
->length () != 1)
961 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
962 fprintf (dump_file
, "Found | -1, removing all other ops\n");
964 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
967 ops
->quick_push (oelast
);
971 else if (integer_zerop (oelast
->op
))
973 if (ops
->length () != 1)
975 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
976 fprintf (dump_file
, "Found | 0, removing\n");
978 reassociate_stats
.ops_eliminated
++;
983 if (integer_zerop (oelast
->op
)
984 || (FLOAT_TYPE_P (type
)
985 && !HONOR_NANS (type
)
986 && !HONOR_SIGNED_ZEROS (type
)
987 && real_zerop (oelast
->op
)))
989 if (ops
->length () != 1)
991 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
992 fprintf (dump_file
, "Found * 0, removing all other ops\n");
994 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
996 ops
->quick_push (oelast
);
1000 else if (integer_onep (oelast
->op
)
1001 || (FLOAT_TYPE_P (type
)
1002 && !HONOR_SNANS (type
)
1003 && real_onep (oelast
->op
)))
1005 if (ops
->length () != 1)
1007 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1008 fprintf (dump_file
, "Found * 1, removing\n");
1010 reassociate_stats
.ops_eliminated
++;
1018 if (integer_zerop (oelast
->op
)
1019 || (FLOAT_TYPE_P (type
)
1020 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1021 && fold_real_zero_addition_p (type
, oelast
->op
,
1022 opcode
== MINUS_EXPR
)))
1024 if (ops
->length () != 1)
1026 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1027 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1029 reassociate_stats
.ops_eliminated
++;
1041 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1044 /* Structure for tracking and counting operands. */
1048 enum tree_code oecode
;
1053 /* The heap for the oecount hashtable and the sorted list of operands. */
1054 static vec
<oecount
> cvec
;
1057 /* Oecount hashtable helpers. */
1059 struct oecount_hasher
: int_hash
<int, 0, 1>
1061 static inline hashval_t
hash (int);
1062 static inline bool equal (int, int);
1065 /* Hash function for oecount. */
1068 oecount_hasher::hash (int p
)
1070 const oecount
*c
= &cvec
[p
- 42];
1071 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1074 /* Comparison function for oecount. */
1077 oecount_hasher::equal (int p1
, int p2
)
1079 const oecount
*c1
= &cvec
[p1
- 42];
1080 const oecount
*c2
= &cvec
[p2
- 42];
1081 return (c1
->oecode
== c2
->oecode
1082 && c1
->op
== c2
->op
);
1085 /* Comparison function for qsort sorting oecount elements by count. */
1088 oecount_cmp (const void *p1
, const void *p2
)
1090 const oecount
*c1
= (const oecount
*)p1
;
1091 const oecount
*c2
= (const oecount
*)p2
;
1092 if (c1
->cnt
!= c2
->cnt
)
1093 return c1
->cnt
- c2
->cnt
;
1095 /* If counts are identical, use unique IDs to stabilize qsort. */
1096 return c1
->id
- c2
->id
;
1099 /* Return TRUE iff STMT represents a builtin call that raises OP
1100 to some exponent. */
1103 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1105 if (!is_gimple_call (stmt
))
1108 switch (gimple_call_combined_fn (stmt
))
1112 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1119 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1120 in place and return the result. Assumes that stmt_is_power_of_op
1121 was previously called for STMT and returned TRUE. */
1123 static HOST_WIDE_INT
1124 decrement_power (gimple
*stmt
)
1126 REAL_VALUE_TYPE c
, cint
;
1127 HOST_WIDE_INT power
;
1130 switch (gimple_call_combined_fn (stmt
))
1133 arg1
= gimple_call_arg (stmt
, 1);
1134 c
= TREE_REAL_CST (arg1
);
1135 power
= real_to_integer (&c
) - 1;
1136 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1137 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1141 arg1
= gimple_call_arg (stmt
, 1);
1142 power
= TREE_INT_CST_LOW (arg1
) - 1;
1143 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1151 /* Find the single immediate use of STMT's LHS, and replace it
1152 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1153 replace *DEF with OP as well. */
1156 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1161 gimple_stmt_iterator gsi
;
1163 if (is_gimple_call (stmt
))
1164 lhs
= gimple_call_lhs (stmt
);
1166 lhs
= gimple_assign_lhs (stmt
);
1168 gcc_assert (has_single_use (lhs
));
1169 single_imm_use (lhs
, &use
, &use_stmt
);
1173 if (TREE_CODE (op
) != SSA_NAME
)
1174 update_stmt (use_stmt
);
1175 gsi
= gsi_for_stmt (stmt
);
1176 unlink_stmt_vdef (stmt
);
1177 reassoc_remove_stmt (&gsi
);
1178 release_defs (stmt
);
1181 /* Walks the linear chain with result *DEF searching for an operation
1182 with operand OP and code OPCODE removing that from the chain. *DEF
1183 is updated if there is only one operand but no operation left. */
1186 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1188 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1194 if (opcode
== MULT_EXPR
)
1196 if (stmt_is_power_of_op (stmt
, op
))
1198 if (decrement_power (stmt
) == 1)
1199 propagate_op_to_single_use (op
, stmt
, def
);
1202 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
)
1204 if (gimple_assign_rhs1 (stmt
) == op
)
1206 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1207 propagate_op_to_single_use (cst
, stmt
, def
);
1210 else if (integer_minus_onep (op
)
1211 || real_minus_onep (op
))
1213 gimple_assign_set_rhs_code
1214 (stmt
, TREE_CODE (gimple_assign_rhs1 (stmt
)));
1220 name
= gimple_assign_rhs1 (stmt
);
1222 /* If this is the operation we look for and one of the operands
1223 is ours simply propagate the other operand into the stmts
1225 if (gimple_assign_rhs_code (stmt
) == opcode
1227 || gimple_assign_rhs2 (stmt
) == op
))
1230 name
= gimple_assign_rhs2 (stmt
);
1231 propagate_op_to_single_use (name
, stmt
, def
);
1235 /* We might have a multiply of two __builtin_pow* calls, and
1236 the operand might be hiding in the rightmost one. Likewise
1237 this can happen for a negate. */
1238 if (opcode
== MULT_EXPR
1239 && gimple_assign_rhs_code (stmt
) == opcode
1240 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1241 && has_single_use (gimple_assign_rhs2 (stmt
)))
1243 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1244 if (stmt_is_power_of_op (stmt2
, op
))
1246 if (decrement_power (stmt2
) == 1)
1247 propagate_op_to_single_use (op
, stmt2
, def
);
1250 else if (is_gimple_assign (stmt2
)
1251 && gimple_assign_rhs_code (stmt2
) == NEGATE_EXPR
)
1253 if (gimple_assign_rhs1 (stmt2
) == op
)
1255 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1256 propagate_op_to_single_use (cst
, stmt2
, def
);
1259 else if (integer_minus_onep (op
)
1260 || real_minus_onep (op
))
1262 gimple_assign_set_rhs_code
1263 (stmt2
, TREE_CODE (gimple_assign_rhs1 (stmt2
)));
1269 /* Continue walking the chain. */
1270 gcc_assert (name
!= op
1271 && TREE_CODE (name
) == SSA_NAME
);
1272 stmt
= SSA_NAME_DEF_STMT (name
);
1277 /* Returns true if statement S1 dominates statement S2. Like
1278 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1281 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1283 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1285 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1286 SSA_NAME. Assume it lives at the beginning of function and
1287 thus dominates everything. */
1288 if (!bb1
|| s1
== s2
)
1291 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1297 /* PHIs in the same basic block are assumed to be
1298 executed all in parallel, if only one stmt is a PHI,
1299 it dominates the other stmt in the same basic block. */
1300 if (gimple_code (s1
) == GIMPLE_PHI
)
1303 if (gimple_code (s2
) == GIMPLE_PHI
)
1306 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1308 if (gimple_uid (s1
) < gimple_uid (s2
))
1311 if (gimple_uid (s1
) > gimple_uid (s2
))
1314 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1315 unsigned int uid
= gimple_uid (s1
);
1316 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1318 gimple
*s
= gsi_stmt (gsi
);
1319 if (gimple_uid (s
) != uid
)
1328 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1331 /* Insert STMT after INSERT_POINT. */
1334 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1336 gimple_stmt_iterator gsi
;
1339 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1340 bb
= gimple_bb (insert_point
);
1341 else if (!stmt_ends_bb_p (insert_point
))
1343 gsi
= gsi_for_stmt (insert_point
);
1344 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1345 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1349 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1350 thus if it must end a basic block, it should be a call that can
1351 throw, or some assignment that can throw. If it throws, the LHS
1352 of it will not be initialized though, so only valid places using
1353 the SSA_NAME should be dominated by the fallthru edge. */
1354 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1355 gsi
= gsi_after_labels (bb
);
1356 if (gsi_end_p (gsi
))
1358 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1359 gimple_set_uid (stmt
,
1360 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1363 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1364 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1367 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1368 the result. Places the statement after the definition of either
1369 OP1 or OP2. Returns the new statement. */
1372 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1374 gimple
*op1def
= NULL
, *op2def
= NULL
;
1375 gimple_stmt_iterator gsi
;
1379 /* Create the addition statement. */
1380 op
= make_ssa_name (type
);
1381 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1383 /* Find an insertion place and insert. */
1384 if (TREE_CODE (op1
) == SSA_NAME
)
1385 op1def
= SSA_NAME_DEF_STMT (op1
);
1386 if (TREE_CODE (op2
) == SSA_NAME
)
1387 op2def
= SSA_NAME_DEF_STMT (op2
);
1388 if ((!op1def
|| gimple_nop_p (op1def
))
1389 && (!op2def
|| gimple_nop_p (op2def
)))
1391 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1392 if (gsi_end_p (gsi
))
1394 gimple_stmt_iterator gsi2
1395 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1396 gimple_set_uid (sum
,
1397 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1400 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1401 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1405 gimple
*insert_point
;
1406 if ((!op1def
|| gimple_nop_p (op1def
))
1407 || (op2def
&& !gimple_nop_p (op2def
)
1408 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1409 insert_point
= op2def
;
1411 insert_point
= op1def
;
1412 insert_stmt_after (sum
, insert_point
);
1419 /* Perform un-distribution of divisions and multiplications.
1420 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1421 to (A + B) / X for real X.
1423 The algorithm is organized as follows.
1425 - First we walk the addition chain *OPS looking for summands that
1426 are defined by a multiplication or a real division. This results
1427 in the candidates bitmap with relevant indices into *OPS.
1429 - Second we build the chains of multiplications or divisions for
1430 these candidates, counting the number of occurrences of (operand, code)
1431 pairs in all of the candidates chains.
1433 - Third we sort the (operand, code) pairs by number of occurrence and
1434 process them starting with the pair with the most uses.
1436 * For each such pair we walk the candidates again to build a
1437 second candidate bitmap noting all multiplication/division chains
1438 that have at least one occurrence of (operand, code).
1440 * We build an alternate addition chain only covering these
1441 candidates with one (operand, code) operation removed from their
1442 multiplication/division chain.
1444 * The first candidate gets replaced by the alternate addition chain
1445 multiplied/divided by the operand.
1447 * All candidate chains get disabled for further processing and
1448 processing of (operand, code) pairs continues.
1450 The alternate addition chains built are re-processed by the main
1451 reassociation algorithm which allows optimizing a * x * y + b * y * x
1452 to (a + b ) * x * y in one invocation of the reassociation pass. */
1455 undistribute_ops_list (enum tree_code opcode
,
1456 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1458 unsigned int length
= ops
->length ();
1461 sbitmap candidates
, candidates2
;
1462 unsigned nr_candidates
, nr_candidates2
;
1463 sbitmap_iterator sbi0
;
1464 vec
<operand_entry
*> *subops
;
1465 bool changed
= false;
1466 int next_oecount_id
= 0;
1469 || opcode
!= PLUS_EXPR
)
1472 /* Build a list of candidates to process. */
1473 candidates
= sbitmap_alloc (length
);
1474 bitmap_clear (candidates
);
1476 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1478 enum tree_code dcode
;
1481 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1483 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1484 if (!is_gimple_assign (oe1def
))
1486 dcode
= gimple_assign_rhs_code (oe1def
);
1487 if ((dcode
!= MULT_EXPR
1488 && dcode
!= RDIV_EXPR
)
1489 || !is_reassociable_op (oe1def
, dcode
, loop
))
1492 bitmap_set_bit (candidates
, i
);
1496 if (nr_candidates
< 2)
1498 sbitmap_free (candidates
);
1502 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1504 fprintf (dump_file
, "searching for un-distribute opportunities ");
1505 print_generic_expr (dump_file
,
1506 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1507 fprintf (dump_file
, " %d\n", nr_candidates
);
1510 /* Build linearized sub-operand lists and the counting table. */
1513 hash_table
<oecount_hasher
> ctable (15);
1515 /* ??? Macro arguments cannot have multi-argument template types in
1516 them. This typedef is needed to workaround that limitation. */
1517 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1518 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1519 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1522 enum tree_code oecode
;
1525 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1526 oecode
= gimple_assign_rhs_code (oedef
);
1527 linearize_expr_tree (&subops
[i
], oedef
,
1528 associative_tree_code (oecode
), false);
1530 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1537 c
.id
= next_oecount_id
++;
1540 idx
= cvec
.length () + 41;
1541 slot
= ctable
.find_slot (idx
, INSERT
);
1549 cvec
[*slot
- 42].cnt
++;
1554 /* Sort the counting table. */
1555 cvec
.qsort (oecount_cmp
);
1557 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1560 fprintf (dump_file
, "Candidates:\n");
1561 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1563 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1564 c
->oecode
== MULT_EXPR
1565 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1566 print_generic_expr (dump_file
, c
->op
, 0);
1567 fprintf (dump_file
, "\n");
1571 /* Process the (operand, code) pairs in order of most occurrence. */
1572 candidates2
= sbitmap_alloc (length
);
1573 while (!cvec
.is_empty ())
1575 oecount
*c
= &cvec
.last ();
1579 /* Now collect the operands in the outer chain that contain
1580 the common operand in their inner chain. */
1581 bitmap_clear (candidates2
);
1583 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1586 enum tree_code oecode
;
1588 tree op
= (*ops
)[i
]->op
;
1590 /* If we undistributed in this chain already this may be
1592 if (TREE_CODE (op
) != SSA_NAME
)
1595 oedef
= SSA_NAME_DEF_STMT (op
);
1596 oecode
= gimple_assign_rhs_code (oedef
);
1597 if (oecode
!= c
->oecode
)
1600 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1602 if (oe1
->op
== c
->op
)
1604 bitmap_set_bit (candidates2
, i
);
1611 if (nr_candidates2
>= 2)
1613 operand_entry
*oe1
, *oe2
;
1615 int first
= bitmap_first_set_bit (candidates2
);
1617 /* Build the new addition chain. */
1618 oe1
= (*ops
)[first
];
1619 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1621 fprintf (dump_file
, "Building (");
1622 print_generic_expr (dump_file
, oe1
->op
, 0);
1624 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1625 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1629 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1631 fprintf (dump_file
, " + ");
1632 print_generic_expr (dump_file
, oe2
->op
, 0);
1634 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1635 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1636 oe1
->op
, oe2
->op
, opcode
);
1637 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1639 oe1
->op
= gimple_get_lhs (sum
);
1642 /* Apply the multiplication/division. */
1643 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1644 oe1
->op
, c
->op
, c
->oecode
);
1645 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1647 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1648 print_generic_expr (dump_file
, c
->op
, 0);
1649 fprintf (dump_file
, "\n");
1652 /* Record it in the addition chain and disable further
1653 undistribution with this op. */
1654 oe1
->op
= gimple_assign_lhs (prod
);
1655 oe1
->rank
= get_rank (oe1
->op
);
1656 subops
[first
].release ();
1664 for (i
= 0; i
< ops
->length (); ++i
)
1665 subops
[i
].release ();
1668 sbitmap_free (candidates
);
1669 sbitmap_free (candidates2
);
1674 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1675 expression, examine the other OPS to see if any of them are comparisons
1676 of the same values, which we may be able to combine or eliminate.
1677 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1680 eliminate_redundant_comparison (enum tree_code opcode
,
1681 vec
<operand_entry
*> *ops
,
1682 unsigned int currindex
,
1683 operand_entry
*curr
)
1686 enum tree_code lcode
, rcode
;
1687 gimple
*def1
, *def2
;
1691 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1694 /* Check that CURR is a comparison. */
1695 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1697 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1698 if (!is_gimple_assign (def1
))
1700 lcode
= gimple_assign_rhs_code (def1
);
1701 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1703 op1
= gimple_assign_rhs1 (def1
);
1704 op2
= gimple_assign_rhs2 (def1
);
1706 /* Now look for a similar comparison in the remaining OPS. */
1707 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1711 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1713 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1714 if (!is_gimple_assign (def2
))
1716 rcode
= gimple_assign_rhs_code (def2
);
1717 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1720 /* If we got here, we have a match. See if we can combine the
1722 if (opcode
== BIT_IOR_EXPR
)
1723 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1724 rcode
, gimple_assign_rhs1 (def2
),
1725 gimple_assign_rhs2 (def2
));
1727 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1728 rcode
, gimple_assign_rhs1 (def2
),
1729 gimple_assign_rhs2 (def2
));
1733 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1734 always give us a boolean_type_node value back. If the original
1735 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1736 we need to convert. */
1737 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1738 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1740 if (TREE_CODE (t
) != INTEGER_CST
1741 && !operand_equal_p (t
, curr
->op
, 0))
1743 enum tree_code subcode
;
1744 tree newop1
, newop2
;
1745 if (!COMPARISON_CLASS_P (t
))
1747 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1748 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1749 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1750 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1754 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1756 fprintf (dump_file
, "Equivalence: ");
1757 print_generic_expr (dump_file
, curr
->op
, 0);
1758 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1759 print_generic_expr (dump_file
, oe
->op
, 0);
1760 fprintf (dump_file
, " -> ");
1761 print_generic_expr (dump_file
, t
, 0);
1762 fprintf (dump_file
, "\n");
1765 /* Now we can delete oe, as it has been subsumed by the new combined
1767 ops
->ordered_remove (i
);
1768 reassociate_stats
.ops_eliminated
++;
1770 /* If t is the same as curr->op, we're done. Otherwise we must
1771 replace curr->op with t. Special case is if we got a constant
1772 back, in which case we add it to the end instead of in place of
1773 the current entry. */
1774 if (TREE_CODE (t
) == INTEGER_CST
)
1776 ops
->ordered_remove (currindex
);
1777 add_to_ops_vec (ops
, t
);
1779 else if (!operand_equal_p (t
, curr
->op
, 0))
1782 enum tree_code subcode
;
1785 gcc_assert (COMPARISON_CLASS_P (t
));
1786 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1787 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1788 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1789 gcc_checking_assert (is_gimple_val (newop1
)
1790 && is_gimple_val (newop2
));
1791 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1792 curr
->op
= gimple_get_lhs (sum
);
1801 /* Transform repeated addition of same values into multiply with
1804 transform_add_to_multiply (vec
<operand_entry
*> *ops
)
1807 tree op
= NULL_TREE
;
1809 int i
, start
= -1, end
= 0, count
= 0;
1810 auto_vec
<std::pair
<int, int> > indxs
;
1811 bool changed
= false;
1813 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
1814 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
1815 || !flag_unsafe_math_optimizations
))
1818 /* Look for repeated operands. */
1819 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
1827 else if (operand_equal_p (oe
->op
, op
, 0))
1835 indxs
.safe_push (std::make_pair (start
, end
));
1843 indxs
.safe_push (std::make_pair (start
, end
));
1845 for (j
= indxs
.length () - 1; j
>= 0; --j
)
1847 /* Convert repeated operand addition to multiplication. */
1848 start
= indxs
[j
].first
;
1849 end
= indxs
[j
].second
;
1850 op
= (*ops
)[start
]->op
;
1851 count
= end
- start
+ 1;
1852 for (i
= end
; i
>= start
; --i
)
1853 ops
->unordered_remove (i
);
1854 tree tmp
= make_ssa_name (TREE_TYPE (op
));
1855 tree cst
= build_int_cst (integer_type_node
, count
);
1857 = gimple_build_assign (tmp
, MULT_EXPR
,
1858 op
, fold_convert (TREE_TYPE (op
), cst
));
1859 gimple_set_visited (mul_stmt
, true);
1860 add_to_ops_vec (ops
, tmp
, mul_stmt
);
1868 /* Perform various identities and other optimizations on the list of
1869 operand entries, stored in OPS. The tree code for the binary
1870 operation between all the operands is OPCODE. */
1873 optimize_ops_list (enum tree_code opcode
,
1874 vec
<operand_entry
*> *ops
)
1876 unsigned int length
= ops
->length ();
1879 operand_entry
*oelast
= NULL
;
1880 bool iterate
= false;
1885 oelast
= ops
->last ();
1887 /* If the last two are constants, pop the constants off, merge them
1888 and try the next two. */
1889 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1891 operand_entry
*oelm1
= (*ops
)[length
- 2];
1893 if (oelm1
->rank
== 0
1894 && is_gimple_min_invariant (oelm1
->op
)
1895 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1896 TREE_TYPE (oelast
->op
)))
1898 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1899 oelm1
->op
, oelast
->op
);
1901 if (folded
&& is_gimple_min_invariant (folded
))
1903 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1904 fprintf (dump_file
, "Merging constants\n");
1909 add_to_ops_vec (ops
, folded
);
1910 reassociate_stats
.constants_eliminated
++;
1912 optimize_ops_list (opcode
, ops
);
1918 eliminate_using_constants (opcode
, ops
);
1921 for (i
= 0; ops
->iterate (i
, &oe
);)
1925 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1927 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1928 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1929 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1941 length
= ops
->length ();
1942 oelast
= ops
->last ();
1945 optimize_ops_list (opcode
, ops
);
1948 /* The following functions are subroutines to optimize_range_tests and allow
1949 it to try to change a logical combination of comparisons into a range
1953 X == 2 || X == 5 || X == 3 || X == 4
1957 (unsigned) (X - 2) <= 3
1959 For more information see comments above fold_test_range in fold-const.c,
1960 this implementation is for GIMPLE. */
1968 bool strict_overflow_p
;
1969 unsigned int idx
, next
;
1972 /* This is similar to make_range in fold-const.c, but on top of
1973 GIMPLE instead of trees. If EXP is non-NULL, it should be
1974 an SSA_NAME and STMT argument is ignored, otherwise STMT
1975 argument should be a GIMPLE_COND. */
1978 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
1982 bool is_bool
, strict_overflow_p
;
1986 r
->strict_overflow_p
= false;
1988 r
->high
= NULL_TREE
;
1989 if (exp
!= NULL_TREE
1990 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1993 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1994 and see if we can refine the range. Some of the cases below may not
1995 happen, but it doesn't seem worth worrying about this. We "continue"
1996 the outer loop when we've changed something; otherwise we "break"
1997 the switch, which will "break" the while. */
1998 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
2001 strict_overflow_p
= false;
2003 if (exp
== NULL_TREE
)
2005 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
2007 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
2012 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
2017 enum tree_code code
;
2018 tree arg0
, arg1
, exp_type
;
2022 if (exp
!= NULL_TREE
)
2024 if (TREE_CODE (exp
) != SSA_NAME
2025 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
2028 stmt
= SSA_NAME_DEF_STMT (exp
);
2029 if (!is_gimple_assign (stmt
))
2032 code
= gimple_assign_rhs_code (stmt
);
2033 arg0
= gimple_assign_rhs1 (stmt
);
2034 arg1
= gimple_assign_rhs2 (stmt
);
2035 exp_type
= TREE_TYPE (exp
);
2039 code
= gimple_cond_code (stmt
);
2040 arg0
= gimple_cond_lhs (stmt
);
2041 arg1
= gimple_cond_rhs (stmt
);
2042 exp_type
= boolean_type_node
;
2045 if (TREE_CODE (arg0
) != SSA_NAME
)
2047 loc
= gimple_location (stmt
);
2051 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
2052 /* Ensure the range is either +[-,0], +[0,0],
2053 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2054 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2055 or similar expression of unconditional true or
2056 false, it should not be negated. */
2057 && ((high
&& integer_zerop (high
))
2058 || (low
&& integer_onep (low
))))
2071 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
2073 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
2078 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2093 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2095 &strict_overflow_p
);
2096 if (nexp
!= NULL_TREE
)
2099 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2112 r
->strict_overflow_p
= strict_overflow_p
;
2116 /* Comparison function for qsort. Sort entries
2117 without SSA_NAME exp first, then with SSA_NAMEs sorted
2118 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2119 by increasing ->low and if ->low is the same, by increasing
2120 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2124 range_entry_cmp (const void *a
, const void *b
)
2126 const struct range_entry
*p
= (const struct range_entry
*) a
;
2127 const struct range_entry
*q
= (const struct range_entry
*) b
;
2129 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2131 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2133 /* Group range_entries for the same SSA_NAME together. */
2134 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2136 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2138 /* If ->low is different, NULL low goes first, then by
2140 if (p
->low
!= NULL_TREE
)
2142 if (q
->low
!= NULL_TREE
)
2144 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2146 if (tem
&& integer_onep (tem
))
2148 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2150 if (tem
&& integer_onep (tem
))
2156 else if (q
->low
!= NULL_TREE
)
2158 /* If ->high is different, NULL high goes last, before that by
2160 if (p
->high
!= NULL_TREE
)
2162 if (q
->high
!= NULL_TREE
)
2164 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2166 if (tem
&& integer_onep (tem
))
2168 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2170 if (tem
&& integer_onep (tem
))
2176 else if (q
->high
!= NULL_TREE
)
2178 /* If both ranges are the same, sort below by ascending idx. */
2183 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2186 if (p
->idx
< q
->idx
)
2190 gcc_checking_assert (p
->idx
> q
->idx
);
2195 /* Helper routine of optimize_range_test.
2196 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2197 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2198 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2199 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2200 an array of COUNT pointers to other ranges. Return
2201 true if the range merge has been successful.
2202 If OPCODE is ERROR_MARK, this is called from within
2203 maybe_optimize_range_tests and is performing inter-bb range optimization.
2204 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2208 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2209 struct range_entry
**otherrangep
,
2210 unsigned int count
, enum tree_code opcode
,
2211 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2212 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2214 operand_entry
*oe
= (*ops
)[range
->idx
];
2216 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2217 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2218 location_t loc
= gimple_location (stmt
);
2219 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2220 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2222 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2223 gimple_stmt_iterator gsi
;
2224 unsigned int i
, uid
;
2226 if (tem
== NULL_TREE
)
2229 /* If op is default def SSA_NAME, there is no place to insert the
2230 new comparison. Give up, unless we can use OP itself as the
2232 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2234 if (op
== range
->exp
2235 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2236 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2238 || (TREE_CODE (tem
) == EQ_EXPR
2239 && TREE_OPERAND (tem
, 0) == op
2240 && integer_onep (TREE_OPERAND (tem
, 1))))
2241 && opcode
!= BIT_IOR_EXPR
2242 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2251 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2252 warning_at (loc
, OPT_Wstrict_overflow
,
2253 "assuming signed overflow does not occur "
2254 "when simplifying range test");
2256 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2258 struct range_entry
*r
;
2259 fprintf (dump_file
, "Optimizing range tests ");
2260 print_generic_expr (dump_file
, range
->exp
, 0);
2261 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2262 print_generic_expr (dump_file
, range
->low
, 0);
2263 fprintf (dump_file
, ", ");
2264 print_generic_expr (dump_file
, range
->high
, 0);
2265 fprintf (dump_file
, "]");
2266 for (i
= 0; i
< count
; i
++)
2272 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2273 print_generic_expr (dump_file
, r
->low
, 0);
2274 fprintf (dump_file
, ", ");
2275 print_generic_expr (dump_file
, r
->high
, 0);
2276 fprintf (dump_file
, "]");
2278 fprintf (dump_file
, "\n into ");
2279 print_generic_expr (dump_file
, tem
, 0);
2280 fprintf (dump_file
, "\n");
2283 if (opcode
== BIT_IOR_EXPR
2284 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2285 tem
= invert_truthvalue_loc (loc
, tem
);
2287 tem
= fold_convert_loc (loc
, optype
, tem
);
2290 gsi
= gsi_for_stmt (stmt
);
2291 uid
= gimple_uid (stmt
);
2299 gcc_checking_assert (tem
== op
);
2300 /* In rare cases range->exp can be equal to lhs of stmt.
2301 In that case we have to insert after the stmt rather then before
2302 it. If stmt is a PHI, insert it at the start of the basic block. */
2303 else if (op
!= range
->exp
)
2305 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2306 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2310 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2312 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2313 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2314 GSI_CONTINUE_LINKING
);
2318 gsi
= gsi_after_labels (gimple_bb (stmt
));
2319 if (!gsi_end_p (gsi
))
2320 uid
= gimple_uid (gsi_stmt (gsi
));
2323 gsi
= gsi_start_bb (gimple_bb (stmt
));
2325 while (!gsi_end_p (gsi
))
2327 uid
= gimple_uid (gsi_stmt (gsi
));
2331 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2332 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2334 if (gsi_end_p (gsi
))
2335 gsi
= gsi_last_bb (gimple_bb (stmt
));
2339 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2340 if (gimple_uid (gsi_stmt (gsi
)))
2343 gimple_set_uid (gsi_stmt (gsi
), uid
);
2350 range
->strict_overflow_p
= false;
2352 for (i
= 0; i
< count
; i
++)
2355 range
= otherrange
+ i
;
2357 range
= otherrangep
[i
];
2358 oe
= (*ops
)[range
->idx
];
2359 /* Now change all the other range test immediate uses, so that
2360 those tests will be optimized away. */
2361 if (opcode
== ERROR_MARK
)
2364 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2365 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2367 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2368 ? boolean_false_node
: boolean_true_node
);
2371 oe
->op
= error_mark_node
;
2372 range
->exp
= NULL_TREE
;
2377 /* Optimize X == CST1 || X == CST2
2378 if popcount (CST1 ^ CST2) == 1 into
2379 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2380 Similarly for ranges. E.g.
2381 X != 2 && X != 3 && X != 10 && X != 11
2382 will be transformed by the previous optimization into
2383 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2384 and this loop can transform that into
2385 !(((X & ~8) - 2U) <= 1U). */
2388 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2389 tree lowi
, tree lowj
, tree highi
, tree highj
,
2390 vec
<operand_entry
*> *ops
,
2391 struct range_entry
*rangei
,
2392 struct range_entry
*rangej
)
2394 tree lowxor
, highxor
, tem
, exp
;
2395 /* Check lowi ^ lowj == highi ^ highj and
2396 popcount (lowi ^ lowj) == 1. */
2397 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2398 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2400 if (!integer_pow2p (lowxor
))
2402 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2403 if (!tree_int_cst_equal (lowxor
, highxor
))
2406 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2407 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2408 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2409 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2410 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2411 NULL
, rangei
->in_p
, lowj
, highj
,
2412 rangei
->strict_overflow_p
2413 || rangej
->strict_overflow_p
))
2418 /* Optimize X == CST1 || X == CST2
2419 if popcount (CST2 - CST1) == 1 into
2420 ((X - CST1) & ~(CST2 - CST1)) == 0.
2421 Similarly for ranges. E.g.
2422 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2423 || X == 75 || X == 45
2424 will be transformed by the previous optimization into
2425 (X - 43U) <= 3U || (X - 75U) <= 3U
2426 and this loop can transform that into
2427 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2429 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2430 tree lowi
, tree lowj
, tree highi
, tree highj
,
2431 vec
<operand_entry
*> *ops
,
2432 struct range_entry
*rangei
,
2433 struct range_entry
*rangej
)
2435 tree tem1
, tem2
, mask
;
2436 /* Check highi - lowi == highj - lowj. */
2437 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2438 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2440 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2441 if (!tree_int_cst_equal (tem1
, tem2
))
2443 /* Check popcount (lowj - lowi) == 1. */
2444 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2445 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2447 if (!integer_pow2p (tem1
))
2450 type
= unsigned_type_for (type
);
2451 tem1
= fold_convert (type
, tem1
);
2452 tem2
= fold_convert (type
, tem2
);
2453 lowi
= fold_convert (type
, lowi
);
2454 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2455 tem1
= fold_binary (MINUS_EXPR
, type
,
2456 fold_convert (type
, rangei
->exp
), lowi
);
2457 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2458 lowj
= build_int_cst (type
, 0);
2459 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2460 NULL
, rangei
->in_p
, lowj
, tem2
,
2461 rangei
->strict_overflow_p
2462 || rangej
->strict_overflow_p
))
2467 /* It does some common checks for function optimize_range_tests_xor and
2468 optimize_range_tests_diff.
2469 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2470 Else it calls optimize_range_tests_diff. */
2473 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2474 bool optimize_xor
, vec
<operand_entry
*> *ops
,
2475 struct range_entry
*ranges
)
2478 bool any_changes
= false;
2479 for (i
= first
; i
< length
; i
++)
2481 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2483 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2485 type
= TREE_TYPE (ranges
[i
].exp
);
2486 if (!INTEGRAL_TYPE_P (type
))
2488 lowi
= ranges
[i
].low
;
2489 if (lowi
== NULL_TREE
)
2490 lowi
= TYPE_MIN_VALUE (type
);
2491 highi
= ranges
[i
].high
;
2492 if (highi
== NULL_TREE
)
2494 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2497 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2499 lowj
= ranges
[j
].low
;
2500 if (lowj
== NULL_TREE
)
2502 highj
= ranges
[j
].high
;
2503 if (highj
== NULL_TREE
)
2504 highj
= TYPE_MAX_VALUE (type
);
2505 /* Check lowj > highi. */
2506 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2508 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2511 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2513 ranges
+ i
, ranges
+ j
);
2515 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2517 ranges
+ i
, ranges
+ j
);
2528 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2529 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2530 EXP on success, NULL otherwise. */
2533 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2534 wide_int
*mask
, tree
*totallowp
)
2536 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2537 if (tem
== NULL_TREE
2538 || TREE_CODE (tem
) != INTEGER_CST
2539 || TREE_OVERFLOW (tem
)
2540 || tree_int_cst_sgn (tem
) == -1
2541 || compare_tree_int (tem
, prec
) != -1)
2544 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2545 *mask
= wi::shifted_mask (0, max
, false, prec
);
2546 if (TREE_CODE (exp
) == BIT_AND_EXPR
2547 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2549 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2550 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2551 if (wi::popcount (msk
) == 1
2552 && wi::ltu_p (msk
, prec
- max
))
2554 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2555 max
+= msk
.to_uhwi ();
2556 exp
= TREE_OPERAND (exp
, 0);
2557 if (integer_zerop (low
)
2558 && TREE_CODE (exp
) == PLUS_EXPR
2559 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2561 tree ret
= TREE_OPERAND (exp
, 0);
2564 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2565 TYPE_PRECISION (TREE_TYPE (low
))));
2566 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
2572 else if (!tree_int_cst_lt (totallow
, tbias
))
2574 bias
= wi::to_widest (tbias
);
2575 bias
-= wi::to_widest (totallow
);
2576 if (bias
>= 0 && bias
< prec
- max
)
2578 *mask
= wi::lshift (*mask
, bias
);
2586 if (!tree_int_cst_lt (totallow
, low
))
2588 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2589 if (tem
== NULL_TREE
2590 || TREE_CODE (tem
) != INTEGER_CST
2591 || TREE_OVERFLOW (tem
)
2592 || compare_tree_int (tem
, prec
- max
) == 1)
2595 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2599 /* Attempt to optimize small range tests using bit test.
2601 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2602 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2603 has been by earlier optimizations optimized into:
2604 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2605 As all the 43 through 82 range is less than 64 numbers,
2606 for 64-bit word targets optimize that into:
2607 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2610 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2611 vec
<operand_entry
*> *ops
,
2612 struct range_entry
*ranges
)
2615 bool any_changes
= false;
2616 int prec
= GET_MODE_BITSIZE (word_mode
);
2617 auto_vec
<struct range_entry
*, 64> candidates
;
2619 for (i
= first
; i
< length
- 2; i
++)
2621 tree lowi
, highi
, lowj
, highj
, type
;
2623 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2625 type
= TREE_TYPE (ranges
[i
].exp
);
2626 if (!INTEGRAL_TYPE_P (type
))
2628 lowi
= ranges
[i
].low
;
2629 if (lowi
== NULL_TREE
)
2630 lowi
= TYPE_MIN_VALUE (type
);
2631 highi
= ranges
[i
].high
;
2632 if (highi
== NULL_TREE
)
2635 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2636 highi
, &mask
, &lowi
);
2637 if (exp
== NULL_TREE
)
2639 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2640 candidates
.truncate (0);
2641 int end
= MIN (i
+ 64, length
);
2642 for (j
= i
+ 1; j
< end
; j
++)
2645 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2647 if (ranges
[j
].exp
== exp
)
2649 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2651 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2654 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2656 exp2
= TREE_OPERAND (exp2
, 0);
2666 lowj
= ranges
[j
].low
;
2667 if (lowj
== NULL_TREE
)
2669 highj
= ranges
[j
].high
;
2670 if (highj
== NULL_TREE
)
2671 highj
= TYPE_MAX_VALUE (type
);
2673 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2674 highj
, &mask2
, NULL
);
2678 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2679 candidates
.safe_push (&ranges
[j
]);
2682 /* If we need otherwise 3 or more comparisons, use a bit test. */
2683 if (candidates
.length () >= 2)
2685 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2686 wi::to_widest (lowi
)
2687 + prec
- 1 - wi::clz (mask
));
2688 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
2690 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2691 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2692 location_t loc
= gimple_location (stmt
);
2693 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2695 /* See if it isn't cheaper to pretend the minimum value of the
2696 range is 0, if maximum value is small enough.
2697 We can avoid then subtraction of the minimum value, but the
2698 mask constant could be perhaps more expensive. */
2699 if (compare_tree_int (lowi
, 0) > 0
2700 && compare_tree_int (high
, prec
) < 0)
2703 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2704 rtx reg
= gen_raw_REG (word_mode
, 10000);
2705 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2706 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2707 GEN_INT (-m
)), speed_p
);
2708 rtx r
= immed_wide_int_const (mask
, word_mode
);
2709 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2710 word_mode
, speed_p
);
2711 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2712 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2713 word_mode
, speed_p
);
2716 mask
= wi::lshift (mask
, m
);
2717 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2721 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2723 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2725 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2726 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2727 fold_convert_loc (loc
, etype
, exp
),
2728 fold_convert_loc (loc
, etype
, lowi
));
2729 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2730 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2731 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2732 build_int_cst (word_type
, 1), exp
);
2733 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2734 wide_int_to_tree (word_type
, mask
));
2735 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2736 build_zero_cst (word_type
));
2737 if (is_gimple_val (exp
))
2740 /* The shift might have undefined behavior if TEM is true,
2741 but reassociate_bb isn't prepared to have basic blocks
2742 split when it is running. So, temporarily emit a code
2743 with BIT_IOR_EXPR instead of &&, and fix it up in
2746 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2747 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2748 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2750 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2751 gimple_seq_add_seq_without_update (&seq
, seq2
);
2752 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2753 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2754 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
2755 BIT_IOR_EXPR
, tem
, exp
);
2756 gimple_set_location (g
, loc
);
2757 gimple_seq_add_stmt_without_update (&seq
, g
);
2758 exp
= gimple_assign_lhs (g
);
2759 tree val
= build_zero_cst (optype
);
2760 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2761 candidates
.length (), opcode
, ops
, exp
,
2762 seq
, false, val
, val
, strict_overflow_p
))
2765 reassoc_branch_fixups
.safe_push (tem
);
2768 gimple_seq_discard (seq
);
2774 /* Optimize range tests, similarly how fold_range_test optimizes
2775 it on trees. The tree code for the binary
2776 operation between all the operands is OPCODE.
2777 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2778 maybe_optimize_range_tests for inter-bb range optimization.
2779 In that case if oe->op is NULL, oe->id is bb->index whose
2780 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2781 the actual opcode. */
2784 optimize_range_tests (enum tree_code opcode
,
2785 vec
<operand_entry
*> *ops
)
2787 unsigned int length
= ops
->length (), i
, j
, first
;
2789 struct range_entry
*ranges
;
2790 bool any_changes
= false;
2795 ranges
= XNEWVEC (struct range_entry
, length
);
2796 for (i
= 0; i
< length
; i
++)
2800 init_range_entry (ranges
+ i
, oe
->op
,
2803 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2804 /* For | invert it now, we will invert it again before emitting
2805 the optimized expression. */
2806 if (opcode
== BIT_IOR_EXPR
2807 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2808 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2811 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2812 for (i
= 0; i
< length
; i
++)
2813 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2816 /* Try to merge ranges. */
2817 for (first
= i
; i
< length
; i
++)
2819 tree low
= ranges
[i
].low
;
2820 tree high
= ranges
[i
].high
;
2821 int in_p
= ranges
[i
].in_p
;
2822 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2823 int update_fail_count
= 0;
2825 for (j
= i
+ 1; j
< length
; j
++)
2827 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2829 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2830 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2832 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2838 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
2839 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
2840 low
, high
, strict_overflow_p
))
2845 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2846 while update_range_test would fail. */
2847 else if (update_fail_count
== 64)
2850 ++update_fail_count
;
2853 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2856 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2857 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2859 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
2860 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
2863 if (any_changes
&& opcode
!= ERROR_MARK
)
2866 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2868 if (oe
->op
== error_mark_node
)
2877 XDELETEVEC (ranges
);
2881 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
2882 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
2883 otherwise the comparison code. */
2886 ovce_extract_ops (tree var
, gassign
**rets
, bool *reti
)
2888 if (TREE_CODE (var
) != SSA_NAME
)
2891 gassign
*stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (var
));
2895 /* ??? If we start creating more COND_EXPR, we could perform
2896 this same optimization with them. For now, simplify. */
2897 if (gimple_assign_rhs_code (stmt
) != VEC_COND_EXPR
)
2900 tree cond
= gimple_assign_rhs1 (stmt
);
2901 tree_code cmp
= TREE_CODE (cond
);
2902 if (TREE_CODE_CLASS (cmp
) != tcc_comparison
)
2905 /* ??? For now, allow only canonical true and false result vectors.
2906 We could expand this to other constants should the need arise,
2907 but at the moment we don't create them. */
2908 tree t
= gimple_assign_rhs2 (stmt
);
2909 tree f
= gimple_assign_rhs3 (stmt
);
2911 if (integer_all_onesp (t
))
2913 else if (integer_all_onesp (f
))
2915 cmp
= invert_tree_comparison (cmp
, false);
2920 if (!integer_zerop (f
))
2931 /* Optimize the condition of VEC_COND_EXPRs which have been combined
2932 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
2935 optimize_vec_cond_expr (tree_code opcode
, vec
<operand_entry
*> *ops
)
2937 unsigned int length
= ops
->length (), i
, j
;
2938 bool any_changes
= false;
2943 for (i
= 0; i
< length
; ++i
)
2945 tree elt0
= (*ops
)[i
]->op
;
2949 tree_code cmp0
= ovce_extract_ops (elt0
, &stmt0
, &invert
);
2950 if (cmp0
== ERROR_MARK
)
2953 for (j
= i
+ 1; j
< length
; ++j
)
2955 tree
&elt1
= (*ops
)[j
]->op
;
2958 tree_code cmp1
= ovce_extract_ops (elt1
, &stmt1
, NULL
);
2959 if (cmp1
== ERROR_MARK
)
2962 tree cond0
= gimple_assign_rhs1 (stmt0
);
2963 tree x0
= TREE_OPERAND (cond0
, 0);
2964 tree y0
= TREE_OPERAND (cond0
, 1);
2966 tree cond1
= gimple_assign_rhs1 (stmt1
);
2967 tree x1
= TREE_OPERAND (cond1
, 0);
2968 tree y1
= TREE_OPERAND (cond1
, 1);
2971 if (opcode
== BIT_AND_EXPR
)
2972 comb
= maybe_fold_and_comparisons (cmp0
, x0
, y0
, cmp1
, x1
, y1
);
2973 else if (opcode
== BIT_IOR_EXPR
)
2974 comb
= maybe_fold_or_comparisons (cmp0
, x0
, y0
, cmp1
, x1
, y1
);
2981 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2983 fprintf (dump_file
, "Transforming ");
2984 print_generic_expr (dump_file
, cond0
, 0);
2985 fprintf (dump_file
, " %c ", opcode
== BIT_AND_EXPR
? '&' : '|');
2986 print_generic_expr (dump_file
, cond1
, 0);
2987 fprintf (dump_file
, " into ");
2988 print_generic_expr (dump_file
, comb
, 0);
2989 fputc ('\n', dump_file
);
2992 gimple_assign_set_rhs1 (stmt0
, comb
);
2994 std::swap (*gimple_assign_rhs2_ptr (stmt0
),
2995 *gimple_assign_rhs3_ptr (stmt0
));
2996 update_stmt (stmt0
);
2998 elt1
= error_mark_node
;
3007 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3009 if (oe
->op
== error_mark_node
)
3021 /* Return true if STMT is a cast like:
3027 # _345 = PHI <_123(N), 1(...), 1(...)>
3028 where _234 has bool type, _123 has single use and
3029 bb N has a single successor M. This is commonly used in
3030 the last block of a range test. */
3033 final_range_test_p (gimple
*stmt
)
3035 basic_block bb
, rhs_bb
;
3038 use_operand_p use_p
;
3041 if (!gimple_assign_cast_p (stmt
))
3043 bb
= gimple_bb (stmt
);
3044 if (!single_succ_p (bb
))
3046 e
= single_succ_edge (bb
);
3047 if (e
->flags
& EDGE_COMPLEX
)
3050 lhs
= gimple_assign_lhs (stmt
);
3051 rhs
= gimple_assign_rhs1 (stmt
);
3052 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3053 || TREE_CODE (rhs
) != SSA_NAME
3054 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
3057 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3058 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
3061 if (gimple_code (use_stmt
) != GIMPLE_PHI
3062 || gimple_bb (use_stmt
) != e
->dest
)
3065 /* And that the rhs is defined in the same loop. */
3066 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
3068 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
3074 /* Return true if BB is suitable basic block for inter-bb range test
3075 optimization. If BACKWARD is true, BB should be the only predecessor
3076 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3077 or compared with to find a common basic block to which all conditions
3078 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3079 be the only predecessor of BB. */
3082 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
3085 edge_iterator ei
, ei2
;
3089 bool other_edge_seen
= false;
3094 /* Check last stmt first. */
3095 stmt
= last_stmt (bb
);
3097 || (gimple_code (stmt
) != GIMPLE_COND
3098 && (backward
|| !final_range_test_p (stmt
)))
3099 || gimple_visited_p (stmt
)
3100 || stmt_could_throw_p (stmt
)
3103 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
3106 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3107 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3108 to *OTHER_BB (if not set yet, try to find it out). */
3109 if (EDGE_COUNT (bb
->succs
) != 2)
3111 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3113 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
3115 if (e
->dest
== test_bb
)
3124 if (*other_bb
== NULL
)
3126 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
3127 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
3129 else if (e
->dest
== e2
->dest
)
3130 *other_bb
= e
->dest
;
3131 if (*other_bb
== NULL
)
3134 if (e
->dest
== *other_bb
)
3135 other_edge_seen
= true;
3139 if (*other_bb
== NULL
|| !other_edge_seen
)
3142 else if (single_succ (bb
) != *other_bb
)
3145 /* Now check all PHIs of *OTHER_BB. */
3146 e
= find_edge (bb
, *other_bb
);
3147 e2
= find_edge (test_bb
, *other_bb
);
3148 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3150 gphi
*phi
= gsi
.phi ();
3151 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3152 corresponding to BB and TEST_BB predecessor must be the same. */
3153 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
3154 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
3156 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3157 one of the PHIs should have the lhs of the last stmt in
3158 that block as PHI arg and that PHI should have 0 or 1
3159 corresponding to it in all other range test basic blocks
3163 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
3164 == gimple_assign_lhs (stmt
)
3165 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
3166 || integer_onep (gimple_phi_arg_def (phi
,
3172 gimple
*test_last
= last_stmt (test_bb
);
3173 if (gimple_code (test_last
) != GIMPLE_COND
3174 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
3175 == gimple_assign_lhs (test_last
)
3176 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
3177 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
3187 /* Return true if BB doesn't have side-effects that would disallow
3188 range test optimization, all SSA_NAMEs set in the bb are consumed
3189 in the bb and there are no PHIs. */
3192 no_side_effect_bb (basic_block bb
)
3194 gimple_stmt_iterator gsi
;
3197 if (!gimple_seq_empty_p (phi_nodes (bb
)))
3199 last
= last_stmt (bb
);
3200 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3202 gimple
*stmt
= gsi_stmt (gsi
);
3204 imm_use_iterator imm_iter
;
3205 use_operand_p use_p
;
3207 if (is_gimple_debug (stmt
))
3209 if (gimple_has_side_effects (stmt
))
3213 if (!is_gimple_assign (stmt
))
3215 lhs
= gimple_assign_lhs (stmt
);
3216 if (TREE_CODE (lhs
) != SSA_NAME
)
3218 if (gimple_assign_rhs_could_trap_p (stmt
))
3220 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
3222 gimple
*use_stmt
= USE_STMT (use_p
);
3223 if (is_gimple_debug (use_stmt
))
3225 if (gimple_bb (use_stmt
) != bb
)
3232 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3233 return true and fill in *OPS recursively. */
3236 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
3239 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3243 if (!is_reassociable_op (stmt
, code
, loop
))
3246 rhs
[0] = gimple_assign_rhs1 (stmt
);
3247 rhs
[1] = gimple_assign_rhs2 (stmt
);
3248 gimple_set_visited (stmt
, true);
3249 for (i
= 0; i
< 2; i
++)
3250 if (TREE_CODE (rhs
[i
]) == SSA_NAME
3251 && !get_ops (rhs
[i
], code
, ops
, loop
)
3252 && has_single_use (rhs
[i
]))
3254 operand_entry
*oe
= operand_entry_pool
.allocate ();
3260 oe
->stmt_to_insert
= NULL
;
3261 ops
->safe_push (oe
);
3266 /* Find the ops that were added by get_ops starting from VAR, see if
3267 they were changed during update_range_test and if yes, create new
3271 update_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> ops
,
3272 unsigned int *pidx
, struct loop
*loop
)
3274 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3278 if (!is_reassociable_op (stmt
, code
, loop
))
3281 rhs
[0] = gimple_assign_rhs1 (stmt
);
3282 rhs
[1] = gimple_assign_rhs2 (stmt
);
3285 for (i
= 0; i
< 2; i
++)
3286 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
3288 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
3289 if (rhs
[2 + i
] == NULL_TREE
)
3291 if (has_single_use (rhs
[i
]))
3292 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
3294 rhs
[2 + i
] = rhs
[i
];
3297 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
3298 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
3300 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3301 var
= make_ssa_name (TREE_TYPE (var
));
3302 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
3304 gimple_set_uid (g
, gimple_uid (stmt
));
3305 gimple_set_visited (g
, true);
3306 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3311 /* Structure to track the initial value passed to get_ops and
3312 the range in the ops vector for each basic block. */
3314 struct inter_bb_range_test_entry
3317 unsigned int first_idx
, last_idx
;
3320 /* Inter-bb range test optimization.
3322 Returns TRUE if a gimple conditional is optimized to a true/false,
3323 otherwise return FALSE.
3325 This indicates to the caller that it should run a CFG cleanup pass
3326 once reassociation is completed. */
3329 maybe_optimize_range_tests (gimple
*stmt
)
3331 basic_block first_bb
= gimple_bb (stmt
);
3332 basic_block last_bb
= first_bb
;
3333 basic_block other_bb
= NULL
;
3337 auto_vec
<operand_entry
*> ops
;
3338 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3339 bool any_changes
= false;
3340 bool cfg_cleanup_needed
= false;
3342 /* Consider only basic blocks that end with GIMPLE_COND or
3343 a cast statement satisfying final_range_test_p. All
3344 but the last bb in the first_bb .. last_bb range
3345 should end with GIMPLE_COND. */
3346 if (gimple_code (stmt
) == GIMPLE_COND
)
3348 if (EDGE_COUNT (first_bb
->succs
) != 2)
3349 return cfg_cleanup_needed
;
3351 else if (final_range_test_p (stmt
))
3352 other_bb
= single_succ (first_bb
);
3354 return cfg_cleanup_needed
;
3356 if (stmt_could_throw_p (stmt
))
3357 return cfg_cleanup_needed
;
3359 /* As relative ordering of post-dominator sons isn't fixed,
3360 maybe_optimize_range_tests can be called first on any
3361 bb in the range we want to optimize. So, start searching
3362 backwards, if first_bb can be set to a predecessor. */
3363 while (single_pred_p (first_bb
))
3365 basic_block pred_bb
= single_pred (first_bb
);
3366 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3368 if (!no_side_effect_bb (first_bb
))
3372 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3373 Before starting forward search in last_bb successors, find
3374 out the other_bb. */
3375 if (first_bb
== last_bb
)
3378 /* As non-GIMPLE_COND last stmt always terminates the range,
3379 if forward search didn't discover anything, just give up. */
3380 if (gimple_code (stmt
) != GIMPLE_COND
)
3381 return cfg_cleanup_needed
;
3382 /* Look at both successors. Either it ends with a GIMPLE_COND
3383 and satisfies suitable_cond_bb, or ends with a cast and
3384 other_bb is that cast's successor. */
3385 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
3386 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
3387 || e
->dest
== first_bb
)
3388 return cfg_cleanup_needed
;
3389 else if (single_pred_p (e
->dest
))
3391 stmt
= last_stmt (e
->dest
);
3393 && gimple_code (stmt
) == GIMPLE_COND
3394 && EDGE_COUNT (e
->dest
->succs
) == 2)
3396 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
3402 && final_range_test_p (stmt
)
3403 && find_edge (first_bb
, single_succ (e
->dest
)))
3405 other_bb
= single_succ (e
->dest
);
3406 if (other_bb
== first_bb
)
3410 if (other_bb
== NULL
)
3411 return cfg_cleanup_needed
;
3413 /* Now do the forward search, moving last_bb to successor bbs
3414 that aren't other_bb. */
3415 while (EDGE_COUNT (last_bb
->succs
) == 2)
3417 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
3418 if (e
->dest
!= other_bb
)
3422 if (!single_pred_p (e
->dest
))
3424 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
3426 if (!no_side_effect_bb (e
->dest
))
3430 if (first_bb
== last_bb
)
3431 return cfg_cleanup_needed
;
3432 /* Here basic blocks first_bb through last_bb's predecessor
3433 end with GIMPLE_COND, all of them have one of the edges to
3434 other_bb and another to another block in the range,
3435 all blocks except first_bb don't have side-effects and
3436 last_bb ends with either GIMPLE_COND, or cast satisfying
3437 final_range_test_p. */
3438 for (bb
= last_bb
; ; bb
= single_pred (bb
))
3440 enum tree_code code
;
3442 inter_bb_range_test_entry bb_ent
;
3444 bb_ent
.op
= NULL_TREE
;
3445 bb_ent
.first_idx
= ops
.length ();
3446 bb_ent
.last_idx
= bb_ent
.first_idx
;
3447 e
= find_edge (bb
, other_bb
);
3448 stmt
= last_stmt (bb
);
3449 gimple_set_visited (stmt
, true);
3450 if (gimple_code (stmt
) != GIMPLE_COND
)
3452 use_operand_p use_p
;
3457 lhs
= gimple_assign_lhs (stmt
);
3458 rhs
= gimple_assign_rhs1 (stmt
);
3459 gcc_assert (bb
== last_bb
);
3466 # _345 = PHI <_123(N), 1(...), 1(...)>
3468 or 0 instead of 1. If it is 0, the _234
3469 range test is anded together with all the
3470 other range tests, if it is 1, it is ored with
3472 single_imm_use (lhs
, &use_p
, &phi
);
3473 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
3474 e2
= find_edge (first_bb
, other_bb
);
3476 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
3477 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
3478 code
= BIT_AND_EXPR
;
3481 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
3482 code
= BIT_IOR_EXPR
;
3485 /* If _234 SSA_NAME_DEF_STMT is
3487 (or &, corresponding to 1/0 in the phi arguments,
3488 push into ops the individual range test arguments
3489 of the bitwise or resp. and, recursively. */
3490 if (!get_ops (rhs
, code
, &ops
,
3491 loop_containing_stmt (stmt
))
3492 && has_single_use (rhs
))
3494 /* Otherwise, push the _234 range test itself. */
3495 operand_entry
*oe
= operand_entry_pool
.allocate ();
3501 oe
->stmt_to_insert
= NULL
;
3506 bb_ent
.last_idx
= ops
.length ();
3508 bbinfo
.safe_push (bb_ent
);
3511 /* Otherwise stmt is GIMPLE_COND. */
3512 code
= gimple_cond_code (stmt
);
3513 lhs
= gimple_cond_lhs (stmt
);
3514 rhs
= gimple_cond_rhs (stmt
);
3515 if (TREE_CODE (lhs
) == SSA_NAME
3516 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3517 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
3518 || rhs
!= boolean_false_node
3519 /* Either push into ops the individual bitwise
3520 or resp. and operands, depending on which
3521 edge is other_bb. */
3522 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
3523 ^ (code
== EQ_EXPR
))
3524 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
3525 loop_containing_stmt (stmt
))))
3527 /* Or push the GIMPLE_COND stmt itself. */
3528 operand_entry
*oe
= operand_entry_pool
.allocate ();
3531 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
3532 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3533 /* oe->op = NULL signs that there is no SSA_NAME
3534 for the range test, and oe->id instead is the
3535 basic block number, at which's end the GIMPLE_COND
3539 oe
->stmt_to_insert
= NULL
;
3544 else if (ops
.length () > bb_ent
.first_idx
)
3547 bb_ent
.last_idx
= ops
.length ();
3549 bbinfo
.safe_push (bb_ent
);
3553 if (ops
.length () > 1)
3554 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
3557 unsigned int idx
, max_idx
= 0;
3558 /* update_ops relies on has_single_use predicates returning the
3559 same values as it did during get_ops earlier. Additionally it
3560 never removes statements, only adds new ones and it should walk
3561 from the single imm use and check the predicate already before
3562 making those changes.
3563 On the other side, the handling of GIMPLE_COND directly can turn
3564 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3565 it needs to be done in a separate loop afterwards. */
3566 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3568 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3569 && bbinfo
[idx
].op
!= NULL_TREE
)
3574 stmt
= last_stmt (bb
);
3575 new_op
= update_ops (bbinfo
[idx
].op
,
3577 ops
[bbinfo
[idx
].first_idx
]->rank
,
3578 ops
, &bbinfo
[idx
].first_idx
,
3579 loop_containing_stmt (stmt
));
3580 if (new_op
== NULL_TREE
)
3582 gcc_assert (bb
== last_bb
);
3583 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
3585 if (bbinfo
[idx
].op
!= new_op
)
3587 imm_use_iterator iter
;
3588 use_operand_p use_p
;
3589 gimple
*use_stmt
, *cast_stmt
= NULL
;
3591 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
3592 if (is_gimple_debug (use_stmt
))
3594 else if (gimple_code (use_stmt
) == GIMPLE_COND
3595 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3596 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3597 SET_USE (use_p
, new_op
);
3598 else if (gimple_assign_cast_p (use_stmt
))
3599 cast_stmt
= use_stmt
;
3604 gcc_assert (bb
== last_bb
);
3605 tree lhs
= gimple_assign_lhs (cast_stmt
);
3606 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
3607 enum tree_code rhs_code
3608 = gimple_assign_rhs_code (cast_stmt
);
3610 if (is_gimple_min_invariant (new_op
))
3612 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
3613 g
= gimple_build_assign (new_lhs
, new_op
);
3616 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
3617 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
3618 gimple_set_uid (g
, gimple_uid (cast_stmt
));
3619 gimple_set_visited (g
, true);
3620 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3621 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3622 if (is_gimple_debug (use_stmt
))
3624 else if (gimple_code (use_stmt
) == GIMPLE_COND
3625 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3626 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3627 SET_USE (use_p
, new_lhs
);
3636 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3638 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3639 && bbinfo
[idx
].op
== NULL_TREE
3640 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
3642 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
3647 /* If we collapse the conditional to a true/false
3648 condition, then bubble that knowledge up to our caller. */
3649 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
3651 gimple_cond_make_false (cond_stmt
);
3652 cfg_cleanup_needed
= true;
3654 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
3656 gimple_cond_make_true (cond_stmt
);
3657 cfg_cleanup_needed
= true;
3661 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
3662 gimple_cond_set_lhs (cond_stmt
,
3663 ops
[bbinfo
[idx
].first_idx
]->op
);
3664 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
3666 update_stmt (cond_stmt
);
3672 /* The above changes could result in basic blocks after the first
3673 modified one, up to and including last_bb, to be executed even if
3674 they would not be in the original program. If the value ranges of
3675 assignment lhs' in those bbs were dependent on the conditions
3676 guarding those basic blocks which now can change, the VRs might
3677 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
3678 are only used within the same bb, it should be not a big deal if
3679 we just reset all the VRs in those bbs. See PR68671. */
3680 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
3681 reset_flow_sensitive_info_in_bb (bb
);
3683 return cfg_cleanup_needed
;
3686 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3687 of STMT in it's operands. This is also known as a "destructive
3688 update" operation. */
3691 is_phi_for_stmt (gimple
*stmt
, tree operand
)
3696 use_operand_p arg_p
;
3699 if (TREE_CODE (operand
) != SSA_NAME
)
3702 lhs
= gimple_assign_lhs (stmt
);
3704 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3705 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
3709 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
3710 if (lhs
== USE_FROM_PTR (arg_p
))
3715 /* Remove def stmt of VAR if VAR has zero uses and recurse
3716 on rhs1 operand if so. */
3719 remove_visited_stmt_chain (tree var
)
3722 gimple_stmt_iterator gsi
;
3726 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3728 stmt
= SSA_NAME_DEF_STMT (var
);
3729 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3731 var
= gimple_assign_rhs1 (stmt
);
3732 gsi
= gsi_for_stmt (stmt
);
3733 reassoc_remove_stmt (&gsi
);
3734 release_defs (stmt
);
3741 /* This function checks three consequtive operands in
3742 passed operands vector OPS starting from OPINDEX and
3743 swaps two operands if it is profitable for binary operation
3744 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3746 We pair ops with the same rank if possible.
3748 The alternative we try is to see if STMT is a destructive
3749 update style statement, which is like:
3752 In that case, we want to use the destructive update form to
3753 expose the possible vectorizer sum reduction opportunity.
3754 In that case, the third operand will be the phi node. This
3755 check is not performed if STMT is null.
3757 We could, of course, try to be better as noted above, and do a
3758 lot of work to try to find these opportunities in >3 operand
3759 cases, but it is unlikely to be worth it. */
3762 swap_ops_for_binary_stmt (vec
<operand_entry
*> ops
,
3763 unsigned int opindex
, gimple
*stmt
)
3765 operand_entry
*oe1
, *oe2
, *oe3
;
3768 oe2
= ops
[opindex
+ 1];
3769 oe3
= ops
[opindex
+ 2];
3771 if ((oe1
->rank
== oe2
->rank
3772 && oe2
->rank
!= oe3
->rank
)
3773 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3774 && !is_phi_for_stmt (stmt
, oe1
->op
)
3775 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3776 std::swap (*oe1
, *oe3
);
3777 else if ((oe1
->rank
== oe3
->rank
3778 && oe2
->rank
!= oe3
->rank
)
3779 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3780 && !is_phi_for_stmt (stmt
, oe1
->op
)
3781 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3782 std::swap (*oe1
, *oe2
);
3785 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3786 two definitions, otherwise return STMT. */
3788 static inline gimple
*
3789 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
)
3791 if (TREE_CODE (rhs1
) == SSA_NAME
3792 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3793 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3794 if (TREE_CODE (rhs2
) == SSA_NAME
3795 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3796 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3800 /* If the stmt that defines operand has to be inserted, insert it
3803 insert_stmt_before_use (gimple
*stmt
, gimple
*stmt_to_insert
)
3805 gcc_assert (is_gimple_assign (stmt_to_insert
));
3806 tree rhs1
= gimple_assign_rhs1 (stmt_to_insert
);
3807 tree rhs2
= gimple_assign_rhs2 (stmt_to_insert
);
3808 gimple
*insert_point
= find_insert_point (stmt
, rhs1
, rhs2
);
3809 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_point
);
3810 gimple_set_uid (stmt_to_insert
, gimple_uid (insert_point
));
3812 /* If the insert point is not stmt, then insert_point would be
3813 the point where operand rhs1 or rhs2 is defined. In this case,
3814 stmt_to_insert has to be inserted afterwards. This would
3815 only happen when the stmt insertion point is flexible. */
3816 if (stmt
== insert_point
)
3817 gsi_insert_before (&gsi
, stmt_to_insert
, GSI_NEW_STMT
);
3819 insert_stmt_after (stmt_to_insert
, insert_point
);
3823 /* Recursively rewrite our linearized statements so that the operators
3824 match those in OPS[OPINDEX], putting the computation in rank
3825 order. Return new lhs. */
3828 rewrite_expr_tree (gimple
*stmt
, unsigned int opindex
,
3829 vec
<operand_entry
*> ops
, bool changed
)
3831 tree rhs1
= gimple_assign_rhs1 (stmt
);
3832 tree rhs2
= gimple_assign_rhs2 (stmt
);
3833 tree lhs
= gimple_assign_lhs (stmt
);
3836 /* The final recursion case for this function is that you have
3837 exactly two operations left.
3838 If we had exactly one op in the entire list to start with, we
3839 would have never called this function, and the tail recursion
3840 rewrites them one at a time. */
3841 if (opindex
+ 2 == ops
.length ())
3843 operand_entry
*oe1
, *oe2
;
3846 oe2
= ops
[opindex
+ 1];
3848 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3850 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3851 unsigned int uid
= gimple_uid (stmt
);
3853 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3855 fprintf (dump_file
, "Transforming ");
3856 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3859 /* If the stmt that defines operand has to be inserted, insert it
3861 if (oe1
->stmt_to_insert
)
3862 insert_stmt_before_use (stmt
, oe1
->stmt_to_insert
);
3863 if (oe2
->stmt_to_insert
)
3864 insert_stmt_before_use (stmt
, oe2
->stmt_to_insert
);
3865 /* Even when changed is false, reassociation could have e.g. removed
3866 some redundant operations, so unless we are just swapping the
3867 arguments or unless there is no change at all (then we just
3868 return lhs), force creation of a new SSA_NAME. */
3869 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
3871 gimple
*insert_point
3872 = find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3873 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3875 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3877 gimple_set_uid (stmt
, uid
);
3878 gimple_set_visited (stmt
, true);
3879 if (insert_point
== gsi_stmt (gsi
))
3880 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3882 insert_stmt_after (stmt
, insert_point
);
3886 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3888 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3889 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3893 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3894 remove_visited_stmt_chain (rhs1
);
3896 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3898 fprintf (dump_file
, " into ");
3899 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3905 /* If we hit here, we should have 3 or more ops left. */
3906 gcc_assert (opindex
+ 2 < ops
.length ());
3908 /* Rewrite the next operator. */
3911 /* If the stmt that defines operand has to be inserted, insert it
3913 if (oe
->stmt_to_insert
)
3914 insert_stmt_before_use (stmt
, oe
->stmt_to_insert
);
3916 /* Recurse on the LHS of the binary operator, which is guaranteed to
3917 be the non-leaf side. */
3919 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3920 changed
|| oe
->op
!= rhs2
);
3922 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3924 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3926 fprintf (dump_file
, "Transforming ");
3927 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3930 /* If changed is false, this is either opindex == 0
3931 or all outer rhs2's were equal to corresponding oe->op,
3932 and powi_result is NULL.
3933 That means lhs is equivalent before and after reassociation.
3934 Otherwise ensure the old lhs SSA_NAME is not reused and
3935 create a new stmt as well, so that any debug stmts will be
3936 properly adjusted. */
3939 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3940 unsigned int uid
= gimple_uid (stmt
);
3941 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3943 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3944 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3946 gimple_set_uid (stmt
, uid
);
3947 gimple_set_visited (stmt
, true);
3948 if (insert_point
== gsi_stmt (gsi
))
3949 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3951 insert_stmt_after (stmt
, insert_point
);
3955 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3957 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3958 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3962 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3964 fprintf (dump_file
, " into ");
3965 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3971 /* Find out how many cycles we need to compute statements chain.
3972 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3973 maximum number of independent statements we may execute per cycle. */
3976 get_required_cycles (int ops_num
, int cpu_width
)
3982 /* While we have more than 2 * cpu_width operands
3983 we may reduce number of operands by cpu_width
3985 res
= ops_num
/ (2 * cpu_width
);
3987 /* Remained operands count may be reduced twice per cycle
3988 until we have only one operand. */
3989 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3990 elog
= exact_log2 (rest
);
3994 res
+= floor_log2 (rest
) + 1;
3999 /* Returns an optimal number of registers to use for computation of
4000 given statements. */
4003 get_reassociation_width (int ops_num
, enum tree_code opc
,
4006 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
4011 if (param_width
> 0)
4012 width
= param_width
;
4014 width
= targetm
.sched
.reassociation_width (opc
, mode
);
4019 /* Get the minimal time required for sequence computation. */
4020 cycles_best
= get_required_cycles (ops_num
, width
);
4022 /* Check if we may use less width and still compute sequence for
4023 the same time. It will allow us to reduce registers usage.
4024 get_required_cycles is monotonically increasing with lower width
4025 so we can perform a binary search for the minimal width that still
4026 results in the optimal cycle count. */
4028 while (width
> width_min
)
4030 int width_mid
= (width
+ width_min
) / 2;
4032 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
4034 else if (width_min
< width_mid
)
4035 width_min
= width_mid
;
4043 /* Recursively rewrite our linearized statements so that the operators
4044 match those in OPS[OPINDEX], putting the computation in rank
4045 order and trying to allow operations to be executed in
4049 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
4050 vec
<operand_entry
*> ops
)
4052 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
4053 int op_num
= ops
.length ();
4054 int stmt_num
= op_num
- 1;
4055 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
4056 int op_index
= op_num
- 1;
4058 int ready_stmts_end
= 0;
4060 gimple
*stmt1
= NULL
, *stmt2
= NULL
;
4061 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
4063 /* We start expression rewriting from the top statements.
4064 So, in this loop we create a full list of statements
4065 we will work with. */
4066 stmts
[stmt_num
- 1] = stmt
;
4067 for (i
= stmt_num
- 2; i
>= 0; i
--)
4068 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
4070 for (i
= 0; i
< stmt_num
; i
++)
4074 /* Determine whether we should use results of
4075 already handled statements or not. */
4076 if (ready_stmts_end
== 0
4077 && (i
- stmt_index
>= width
|| op_index
< 1))
4078 ready_stmts_end
= i
;
4080 /* Now we choose operands for the next statement. Non zero
4081 value in ready_stmts_end means here that we should use
4082 the result of already generated statements as new operand. */
4083 if (ready_stmts_end
> 0)
4085 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
4086 if (ready_stmts_end
> stmt_index
)
4087 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
4088 else if (op_index
>= 0)
4090 operand_entry
*oe
= ops
[op_index
--];
4091 stmt2
= oe
->stmt_to_insert
;
4096 gcc_assert (stmt_index
< i
);
4097 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
4100 if (stmt_index
>= ready_stmts_end
)
4101 ready_stmts_end
= 0;
4106 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
4107 operand_entry
*oe2
= ops
[op_index
--];
4108 operand_entry
*oe1
= ops
[op_index
--];
4110 stmt2
= oe2
->stmt_to_insert
;
4112 stmt1
= oe1
->stmt_to_insert
;
4115 /* If we emit the last statement then we should put
4116 operands into the last statement. It will also
4118 if (op_index
< 0 && stmt_index
== i
)
4121 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4123 fprintf (dump_file
, "Transforming ");
4124 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
4127 /* If the stmt that defines operand has to be inserted, insert it
4130 insert_stmt_before_use (stmts
[i
], stmt1
);
4132 insert_stmt_before_use (stmts
[i
], stmt2
);
4133 stmt1
= stmt2
= NULL
;
4135 /* We keep original statement only for the last one. All
4136 others are recreated. */
4137 if (i
== stmt_num
- 1)
4139 gimple_assign_set_rhs1 (stmts
[i
], op1
);
4140 gimple_assign_set_rhs2 (stmts
[i
], op2
);
4141 update_stmt (stmts
[i
]);
4145 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
4147 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4149 fprintf (dump_file
, " into ");
4150 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
4154 remove_visited_stmt_chain (last_rhs1
);
4157 /* Transform STMT, which is really (A +B) + (C + D) into the left
4158 linear form, ((A+B)+C)+D.
4159 Recurse on D if necessary. */
4162 linearize_expr (gimple
*stmt
)
4164 gimple_stmt_iterator gsi
;
4165 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
4166 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
4167 gimple
*oldbinrhs
= binrhs
;
4168 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4169 gimple
*newbinrhs
= NULL
;
4170 struct loop
*loop
= loop_containing_stmt (stmt
);
4171 tree lhs
= gimple_assign_lhs (stmt
);
4173 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
4174 && is_reassociable_op (binrhs
, rhscode
, loop
));
4176 gsi
= gsi_for_stmt (stmt
);
4178 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
4179 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
4180 gimple_assign_rhs_code (binrhs
),
4181 gimple_assign_lhs (binlhs
),
4182 gimple_assign_rhs2 (binrhs
));
4183 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
4184 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
4185 gimple_set_uid (binrhs
, gimple_uid (stmt
));
4187 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
4188 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
4190 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4192 fprintf (dump_file
, "Linearized: ");
4193 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4196 reassociate_stats
.linearized
++;
4199 gsi
= gsi_for_stmt (oldbinrhs
);
4200 reassoc_remove_stmt (&gsi
);
4201 release_defs (oldbinrhs
);
4203 gimple_set_visited (stmt
, true);
4204 gimple_set_visited (binlhs
, true);
4205 gimple_set_visited (binrhs
, true);
4207 /* Tail recurse on the new rhs if it still needs reassociation. */
4208 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
4209 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4210 want to change the algorithm while converting to tuples. */
4211 linearize_expr (stmt
);
4214 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4215 it. Otherwise, return NULL. */
4218 get_single_immediate_use (tree lhs
)
4220 use_operand_p immuse
;
4223 if (TREE_CODE (lhs
) == SSA_NAME
4224 && single_imm_use (lhs
, &immuse
, &immusestmt
)
4225 && is_gimple_assign (immusestmt
))
4231 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4232 representing the negated value. Insertions of any necessary
4233 instructions go before GSI.
4234 This function is recursive in that, if you hand it "a_5" as the
4235 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4236 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4239 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
4241 gimple
*negatedefstmt
= NULL
;
4242 tree resultofnegate
;
4243 gimple_stmt_iterator gsi
;
4246 /* If we are trying to negate a name, defined by an add, negate the
4247 add operands instead. */
4248 if (TREE_CODE (tonegate
) == SSA_NAME
)
4249 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
4250 if (TREE_CODE (tonegate
) == SSA_NAME
4251 && is_gimple_assign (negatedefstmt
)
4252 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
4253 && has_single_use (gimple_assign_lhs (negatedefstmt
))
4254 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
4256 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
4257 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
4258 tree lhs
= gimple_assign_lhs (negatedefstmt
);
4261 gsi
= gsi_for_stmt (negatedefstmt
);
4262 rhs1
= negate_value (rhs1
, &gsi
);
4264 gsi
= gsi_for_stmt (negatedefstmt
);
4265 rhs2
= negate_value (rhs2
, &gsi
);
4267 gsi
= gsi_for_stmt (negatedefstmt
);
4268 lhs
= make_ssa_name (TREE_TYPE (lhs
));
4269 gimple_set_visited (negatedefstmt
, true);
4270 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
4271 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
4272 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4276 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
4277 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
4278 NULL_TREE
, true, GSI_SAME_STMT
);
4280 uid
= gimple_uid (gsi_stmt (gsi
));
4281 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4283 gimple
*stmt
= gsi_stmt (gsi
);
4284 if (gimple_uid (stmt
) != 0)
4286 gimple_set_uid (stmt
, uid
);
4288 return resultofnegate
;
4291 /* Return true if we should break up the subtract in STMT into an add
4292 with negate. This is true when we the subtract operands are really
4293 adds, or the subtract itself is used in an add expression. In
4294 either case, breaking up the subtract into an add with negate
4295 exposes the adds to reassociation. */
4298 should_break_up_subtract (gimple
*stmt
)
4300 tree lhs
= gimple_assign_lhs (stmt
);
4301 tree binlhs
= gimple_assign_rhs1 (stmt
);
4302 tree binrhs
= gimple_assign_rhs2 (stmt
);
4304 struct loop
*loop
= loop_containing_stmt (stmt
);
4306 if (TREE_CODE (binlhs
) == SSA_NAME
4307 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
4310 if (TREE_CODE (binrhs
) == SSA_NAME
4311 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
4314 if (TREE_CODE (lhs
) == SSA_NAME
4315 && (immusestmt
= get_single_immediate_use (lhs
))
4316 && is_gimple_assign (immusestmt
)
4317 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
4318 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
4323 /* Transform STMT from A - B into A + -B. */
4326 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
4328 tree rhs1
= gimple_assign_rhs1 (stmt
);
4329 tree rhs2
= gimple_assign_rhs2 (stmt
);
4331 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4333 fprintf (dump_file
, "Breaking up subtract ");
4334 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4337 rhs2
= negate_value (rhs2
, gsip
);
4338 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
4342 /* Determine whether STMT is a builtin call that raises an SSA name
4343 to an integer power and has only one use. If so, and this is early
4344 reassociation and unsafe math optimizations are permitted, place
4345 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4346 If any of these conditions does not hold, return FALSE. */
4349 acceptable_pow_call (gcall
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
4352 REAL_VALUE_TYPE c
, cint
;
4354 switch (gimple_call_combined_fn (stmt
))
4357 if (flag_errno_math
)
4360 *base
= gimple_call_arg (stmt
, 0);
4361 arg1
= gimple_call_arg (stmt
, 1);
4363 if (TREE_CODE (arg1
) != REAL_CST
)
4366 c
= TREE_REAL_CST (arg1
);
4368 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
4371 *exponent
= real_to_integer (&c
);
4372 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
4373 if (!real_identical (&c
, &cint
))
4379 *base
= gimple_call_arg (stmt
, 0);
4380 arg1
= gimple_call_arg (stmt
, 1);
4382 if (!tree_fits_shwi_p (arg1
))
4385 *exponent
= tree_to_shwi (arg1
);
4392 /* Expanding negative exponents is generally unproductive, so we don't
4393 complicate matters with those. Exponents of zero and one should
4394 have been handled by expression folding. */
4395 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
4401 /* Try to derive and add operand entry for OP to *OPS. Return false if
4405 try_special_add_to_ops (vec
<operand_entry
*> *ops
,
4406 enum tree_code code
,
4407 tree op
, gimple
* def_stmt
)
4409 tree base
= NULL_TREE
;
4410 HOST_WIDE_INT exponent
= 0;
4412 if (TREE_CODE (op
) != SSA_NAME
4413 || ! has_single_use (op
))
4416 if (code
== MULT_EXPR
4417 && reassoc_insert_powi_p
4418 && flag_unsafe_math_optimizations
4419 && is_gimple_call (def_stmt
)
4420 && acceptable_pow_call (as_a
<gcall
*> (def_stmt
), &base
, &exponent
))
4422 add_repeat_to_ops_vec (ops
, base
, exponent
);
4423 gimple_set_visited (def_stmt
, true);
4426 else if (code
== MULT_EXPR
4427 && is_gimple_assign (def_stmt
)
4428 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
4429 && !HONOR_SNANS (TREE_TYPE (op
))
4430 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op
))
4431 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op
))))
4433 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
4434 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
4435 add_to_ops_vec (ops
, rhs1
);
4436 add_to_ops_vec (ops
, cst
);
4437 gimple_set_visited (def_stmt
, true);
4444 /* Recursively linearize a binary expression that is the RHS of STMT.
4445 Place the operands of the expression tree in the vector named OPS. */
4448 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
4449 bool is_associative
, bool set_visited
)
4451 tree binlhs
= gimple_assign_rhs1 (stmt
);
4452 tree binrhs
= gimple_assign_rhs2 (stmt
);
4453 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
4454 bool binlhsisreassoc
= false;
4455 bool binrhsisreassoc
= false;
4456 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4457 struct loop
*loop
= loop_containing_stmt (stmt
);
4460 gimple_set_visited (stmt
, true);
4462 if (TREE_CODE (binlhs
) == SSA_NAME
)
4464 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
4465 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
4466 && !stmt_could_throw_p (binlhsdef
));
4469 if (TREE_CODE (binrhs
) == SSA_NAME
)
4471 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
4472 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
4473 && !stmt_could_throw_p (binrhsdef
));
4476 /* If the LHS is not reassociable, but the RHS is, we need to swap
4477 them. If neither is reassociable, there is nothing we can do, so
4478 just put them in the ops vector. If the LHS is reassociable,
4479 linearize it. If both are reassociable, then linearize the RHS
4482 if (!binlhsisreassoc
)
4484 /* If this is not a associative operation like division, give up. */
4485 if (!is_associative
)
4487 add_to_ops_vec (ops
, binrhs
);
4491 if (!binrhsisreassoc
)
4493 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
4494 add_to_ops_vec (ops
, binrhs
);
4496 if (!try_special_add_to_ops (ops
, rhscode
, binlhs
, binlhsdef
))
4497 add_to_ops_vec (ops
, binlhs
);
4502 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4504 fprintf (dump_file
, "swapping operands of ");
4505 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4508 swap_ssa_operands (stmt
,
4509 gimple_assign_rhs1_ptr (stmt
),
4510 gimple_assign_rhs2_ptr (stmt
));
4513 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4515 fprintf (dump_file
, " is now ");
4516 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4519 /* We want to make it so the lhs is always the reassociative op,
4521 std::swap (binlhs
, binrhs
);
4523 else if (binrhsisreassoc
)
4525 linearize_expr (stmt
);
4526 binlhs
= gimple_assign_rhs1 (stmt
);
4527 binrhs
= gimple_assign_rhs2 (stmt
);
4530 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
4531 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
4533 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
4534 is_associative
, set_visited
);
4536 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
4537 add_to_ops_vec (ops
, binrhs
);
4540 /* Repropagate the negates back into subtracts, since no other pass
4541 currently does it. */
4544 repropagate_negates (void)
4549 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
4551 gimple
*user
= get_single_immediate_use (negate
);
4553 if (!user
|| !is_gimple_assign (user
))
4556 /* The negate operand can be either operand of a PLUS_EXPR
4557 (it can be the LHS if the RHS is a constant for example).
4559 Force the negate operand to the RHS of the PLUS_EXPR, then
4560 transform the PLUS_EXPR into a MINUS_EXPR. */
4561 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
4563 /* If the negated operand appears on the LHS of the
4564 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4565 to force the negated operand to the RHS of the PLUS_EXPR. */
4566 if (gimple_assign_rhs1 (user
) == negate
)
4568 swap_ssa_operands (user
,
4569 gimple_assign_rhs1_ptr (user
),
4570 gimple_assign_rhs2_ptr (user
));
4573 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4574 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4575 if (gimple_assign_rhs2 (user
) == negate
)
4577 tree rhs1
= gimple_assign_rhs1 (user
);
4578 tree rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
4579 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4580 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
4584 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
4586 if (gimple_assign_rhs1 (user
) == negate
)
4591 which we transform into
4594 This pushes down the negate which we possibly can merge
4595 into some other operation, hence insert it into the
4596 plus_negates vector. */
4597 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
4598 tree a
= gimple_assign_rhs1 (feed
);
4599 tree b
= gimple_assign_rhs2 (user
);
4600 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
4601 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
4602 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
4603 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
4604 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
4605 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
4606 user
= gsi_stmt (gsi2
);
4608 reassoc_remove_stmt (&gsi
);
4609 release_defs (feed
);
4610 plus_negates
.safe_push (gimple_assign_lhs (user
));
4614 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4615 rid of one operation. */
4616 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
4617 tree a
= gimple_assign_rhs1 (feed
);
4618 tree rhs1
= gimple_assign_rhs1 (user
);
4619 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4620 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
4621 update_stmt (gsi_stmt (gsi
));
4627 /* Returns true if OP is of a type for which we can do reassociation.
4628 That is for integral or non-saturating fixed-point types, and for
4629 floating point type when associative-math is enabled. */
4632 can_reassociate_p (tree op
)
4634 tree type
= TREE_TYPE (op
);
4635 if ((ANY_INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
4636 || NON_SAT_FIXED_POINT_TYPE_P (type
)
4637 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
4642 /* Break up subtract operations in block BB.
4644 We do this top down because we don't know whether the subtract is
4645 part of a possible chain of reassociation except at the top.
4654 we want to break up k = t - q, but we won't until we've transformed q
4655 = b - r, which won't be broken up until we transform b = c - d.
4657 En passant, clear the GIMPLE visited flag on every statement
4658 and set UIDs within each basic block. */
4661 break_up_subtract_bb (basic_block bb
)
4663 gimple_stmt_iterator gsi
;
4665 unsigned int uid
= 1;
4667 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4669 gimple
*stmt
= gsi_stmt (gsi
);
4670 gimple_set_visited (stmt
, false);
4671 gimple_set_uid (stmt
, uid
++);
4673 if (!is_gimple_assign (stmt
)
4674 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
4677 /* Look for simple gimple subtract operations. */
4678 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
4680 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
4681 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
4684 /* Check for a subtract used only in an addition. If this
4685 is the case, transform it into add of a negate for better
4686 reassociation. IE transform C = A-B into C = A + -B if C
4687 is only used in an addition. */
4688 if (should_break_up_subtract (stmt
))
4689 break_up_subtract (stmt
, &gsi
);
4691 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
4692 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
4693 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
4695 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
4697 son
= next_dom_son (CDI_DOMINATORS
, son
))
4698 break_up_subtract_bb (son
);
4701 /* Used for repeated factor analysis. */
4702 struct repeat_factor
4704 /* An SSA name that occurs in a multiply chain. */
4707 /* Cached rank of the factor. */
4710 /* Number of occurrences of the factor in the chain. */
4711 HOST_WIDE_INT count
;
4713 /* An SSA name representing the product of this factor and
4714 all factors appearing later in the repeated factor vector. */
4719 static vec
<repeat_factor
> repeat_factor_vec
;
4721 /* Used for sorting the repeat factor vector. Sort primarily by
4722 ascending occurrence count, secondarily by descending rank. */
4725 compare_repeat_factors (const void *x1
, const void *x2
)
4727 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
4728 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
4730 if (rf1
->count
!= rf2
->count
)
4731 return rf1
->count
- rf2
->count
;
4733 return rf2
->rank
- rf1
->rank
;
4736 /* Look for repeated operands in OPS in the multiply tree rooted at
4737 STMT. Replace them with an optimal sequence of multiplies and powi
4738 builtin calls, and remove the used operands from OPS. Return an
4739 SSA name representing the value of the replacement sequence. */
4742 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
4744 unsigned i
, j
, vec_len
;
4747 repeat_factor
*rf1
, *rf2
;
4748 repeat_factor rfnew
;
4749 tree result
= NULL_TREE
;
4750 tree target_ssa
, iter_result
;
4751 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4752 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4753 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4754 gimple
*mul_stmt
, *pow_stmt
;
4756 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4761 /* Allocate the repeated factor vector. */
4762 repeat_factor_vec
.create (10);
4764 /* Scan the OPS vector for all SSA names in the product and build
4765 up a vector of occurrence counts for each factor. */
4766 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4768 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4770 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4772 if (rf1
->factor
== oe
->op
)
4774 rf1
->count
+= oe
->count
;
4779 if (j
>= repeat_factor_vec
.length ())
4781 rfnew
.factor
= oe
->op
;
4782 rfnew
.rank
= oe
->rank
;
4783 rfnew
.count
= oe
->count
;
4784 rfnew
.repr
= NULL_TREE
;
4785 repeat_factor_vec
.safe_push (rfnew
);
4790 /* Sort the repeated factor vector by (a) increasing occurrence count,
4791 and (b) decreasing rank. */
4792 repeat_factor_vec
.qsort (compare_repeat_factors
);
4794 /* It is generally best to combine as many base factors as possible
4795 into a product before applying __builtin_powi to the result.
4796 However, the sort order chosen for the repeated factor vector
4797 allows us to cache partial results for the product of the base
4798 factors for subsequent use. When we already have a cached partial
4799 result from a previous iteration, it is best to make use of it
4800 before looking for another __builtin_pow opportunity.
4802 As an example, consider x * x * y * y * y * z * z * z * z.
4803 We want to first compose the product x * y * z, raise it to the
4804 second power, then multiply this by y * z, and finally multiply
4805 by z. This can be done in 5 multiplies provided we cache y * z
4806 for use in both expressions:
4814 If we instead ignored the cached y * z and first multiplied by
4815 the __builtin_pow opportunity z * z, we would get the inferior:
4824 vec_len
= repeat_factor_vec
.length ();
4826 /* Repeatedly look for opportunities to create a builtin_powi call. */
4829 HOST_WIDE_INT power
;
4831 /* First look for the largest cached product of factors from
4832 preceding iterations. If found, create a builtin_powi for
4833 it if the minimum occurrence count for its factors is at
4834 least 2, or just use this cached product as our next
4835 multiplicand if the minimum occurrence count is 1. */
4836 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4838 if (rf1
->repr
&& rf1
->count
> 0)
4848 iter_result
= rf1
->repr
;
4850 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4854 fputs ("Multiplying by cached product ", dump_file
);
4855 for (elt
= j
; elt
< vec_len
; elt
++)
4857 rf
= &repeat_factor_vec
[elt
];
4858 print_generic_expr (dump_file
, rf
->factor
, 0);
4859 if (elt
< vec_len
- 1)
4860 fputs (" * ", dump_file
);
4862 fputs ("\n", dump_file
);
4867 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4868 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4869 build_int_cst (integer_type_node
,
4871 gimple_call_set_lhs (pow_stmt
, iter_result
);
4872 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4873 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
4874 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4876 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4880 fputs ("Building __builtin_pow call for cached product (",
4882 for (elt
= j
; elt
< vec_len
; elt
++)
4884 rf
= &repeat_factor_vec
[elt
];
4885 print_generic_expr (dump_file
, rf
->factor
, 0);
4886 if (elt
< vec_len
- 1)
4887 fputs (" * ", dump_file
);
4889 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
4896 /* Otherwise, find the first factor in the repeated factor
4897 vector whose occurrence count is at least 2. If no such
4898 factor exists, there are no builtin_powi opportunities
4900 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4902 if (rf1
->count
>= 2)
4911 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4915 fputs ("Building __builtin_pow call for (", dump_file
);
4916 for (elt
= j
; elt
< vec_len
; elt
++)
4918 rf
= &repeat_factor_vec
[elt
];
4919 print_generic_expr (dump_file
, rf
->factor
, 0);
4920 if (elt
< vec_len
- 1)
4921 fputs (" * ", dump_file
);
4923 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4926 reassociate_stats
.pows_created
++;
4928 /* Visit each element of the vector in reverse order (so that
4929 high-occurrence elements are visited first, and within the
4930 same occurrence count, lower-ranked elements are visited
4931 first). Form a linear product of all elements in this order
4932 whose occurrencce count is at least that of element J.
4933 Record the SSA name representing the product of each element
4934 with all subsequent elements in the vector. */
4935 if (j
== vec_len
- 1)
4936 rf1
->repr
= rf1
->factor
;
4939 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4943 rf1
= &repeat_factor_vec
[ii
];
4944 rf2
= &repeat_factor_vec
[ii
+ 1];
4946 /* Init the last factor's representative to be itself. */
4948 rf2
->repr
= rf2
->factor
;
4953 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4954 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
4956 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4957 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
4958 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4959 rf1
->repr
= target_ssa
;
4961 /* Don't reprocess the multiply we just introduced. */
4962 gimple_set_visited (mul_stmt
, true);
4966 /* Form a call to __builtin_powi for the maximum product
4967 just formed, raised to the power obtained earlier. */
4968 rf1
= &repeat_factor_vec
[j
];
4969 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4970 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4971 build_int_cst (integer_type_node
,
4973 gimple_call_set_lhs (pow_stmt
, iter_result
);
4974 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4975 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
4976 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4979 /* If we previously formed at least one other builtin_powi call,
4980 form the product of this one and those others. */
4983 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4984 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
4985 result
, iter_result
);
4986 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4987 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
4988 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4989 gimple_set_visited (mul_stmt
, true);
4990 result
= new_result
;
4993 result
= iter_result
;
4995 /* Decrement the occurrence count of each element in the product
4996 by the count found above, and remove this many copies of each
4998 for (i
= j
; i
< vec_len
; i
++)
5003 rf1
= &repeat_factor_vec
[i
];
5004 rf1
->count
-= power
;
5006 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
5008 if (oe
->op
== rf1
->factor
)
5012 ops
->ordered_remove (n
);
5028 /* At this point all elements in the repeated factor vector have a
5029 remaining occurrence count of 0 or 1, and those with a count of 1
5030 don't have cached representatives. Re-sort the ops vector and
5032 ops
->qsort (sort_by_operand_rank
);
5033 repeat_factor_vec
.release ();
5035 /* Return the final product computed herein. Note that there may
5036 still be some elements with single occurrence count left in OPS;
5037 those will be handled by the normal reassociation logic. */
5041 /* Attempt to optimize
5042 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5043 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5046 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
5050 unsigned int length
= ops
->length ();
5051 tree cst
= ops
->last ()->op
;
5053 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
5056 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
5058 if (TREE_CODE (oe
->op
) == SSA_NAME
5059 && has_single_use (oe
->op
))
5061 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
5062 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
5065 switch (gimple_call_combined_fn (old_call
))
5068 arg0
= gimple_call_arg (old_call
, 0);
5069 arg1
= gimple_call_arg (old_call
, 1);
5070 /* The first argument of copysign must be a constant,
5071 otherwise there's nothing to do. */
5072 if (TREE_CODE (arg0
) == REAL_CST
)
5074 tree type
= TREE_TYPE (arg0
);
5075 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
5076 /* If we couldn't fold to a single constant, skip it.
5077 That happens e.g. for inexact multiplication when
5079 if (mul
== NULL_TREE
)
5081 /* Instead of adjusting OLD_CALL, let's build a new
5082 call to not leak the LHS and prevent keeping bogus
5083 debug statements. DCE will clean up the old call. */
5085 if (gimple_call_internal_p (old_call
))
5086 new_call
= gimple_build_call_internal
5087 (IFN_COPYSIGN
, 2, mul
, arg1
);
5089 new_call
= gimple_build_call
5090 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
5091 tree lhs
= make_ssa_name (type
);
5092 gimple_call_set_lhs (new_call
, lhs
);
5093 gimple_set_location (new_call
,
5094 gimple_location (old_call
));
5095 insert_stmt_after (new_call
, old_call
);
5096 /* We've used the constant, get rid of it. */
5098 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
5099 /* Handle the CST1 < 0 case by negating the result. */
5102 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
5104 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
5105 insert_stmt_after (negate_stmt
, new_call
);
5110 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5112 fprintf (dump_file
, "Optimizing copysign: ");
5113 print_generic_expr (dump_file
, cst
, 0);
5114 fprintf (dump_file
, " * COPYSIGN (");
5115 print_generic_expr (dump_file
, arg0
, 0);
5116 fprintf (dump_file
, ", ");
5117 print_generic_expr (dump_file
, arg1
, 0);
5118 fprintf (dump_file
, ") into %sCOPYSIGN (",
5119 cst1_neg
? "-" : "");
5120 print_generic_expr (dump_file
, mul
, 0);
5121 fprintf (dump_file
, ", ");
5122 print_generic_expr (dump_file
, arg1
, 0);
5123 fprintf (dump_file
, "\n");
5136 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5139 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
5143 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5145 fprintf (dump_file
, "Transforming ");
5146 print_gimple_stmt (dump_file
, stmt
, 0, 0);
5149 rhs1
= gimple_assign_rhs1 (stmt
);
5150 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
5152 remove_visited_stmt_chain (rhs1
);
5154 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5156 fprintf (dump_file
, " into ");
5157 print_gimple_stmt (dump_file
, stmt
, 0, 0);
5161 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5164 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
5165 tree rhs1
, tree rhs2
)
5167 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5169 fprintf (dump_file
, "Transforming ");
5170 print_gimple_stmt (dump_file
, stmt
, 0, 0);
5173 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
5174 update_stmt (gsi_stmt (*gsi
));
5175 remove_visited_stmt_chain (rhs1
);
5177 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5179 fprintf (dump_file
, " into ");
5180 print_gimple_stmt (dump_file
, stmt
, 0, 0);
5184 /* Reassociate expressions in basic block BB and its post-dominator as
5187 Bubble up return status from maybe_optimize_range_tests. */
5190 reassociate_bb (basic_block bb
)
5192 gimple_stmt_iterator gsi
;
5194 gimple
*stmt
= last_stmt (bb
);
5195 bool cfg_cleanup_needed
= false;
5197 if (stmt
&& !gimple_visited_p (stmt
))
5198 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
5200 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
5202 stmt
= gsi_stmt (gsi
);
5204 if (is_gimple_assign (stmt
)
5205 && !stmt_could_throw_p (stmt
))
5207 tree lhs
, rhs1
, rhs2
;
5208 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
5210 /* If this is not a gimple binary expression, there is
5211 nothing for us to do with it. */
5212 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
5215 /* If this was part of an already processed statement,
5216 we don't need to touch it again. */
5217 if (gimple_visited_p (stmt
))
5219 /* This statement might have become dead because of previous
5221 if (has_zero_uses (gimple_get_lhs (stmt
)))
5223 reassoc_remove_stmt (&gsi
);
5224 release_defs (stmt
);
5225 /* We might end up removing the last stmt above which
5226 places the iterator to the end of the sequence.
5227 Reset it to the last stmt in this case which might
5228 be the end of the sequence as well if we removed
5229 the last statement of the sequence. In which case
5230 we need to bail out. */
5231 if (gsi_end_p (gsi
))
5233 gsi
= gsi_last_bb (bb
);
5234 if (gsi_end_p (gsi
))
5241 lhs
= gimple_assign_lhs (stmt
);
5242 rhs1
= gimple_assign_rhs1 (stmt
);
5243 rhs2
= gimple_assign_rhs2 (stmt
);
5245 /* For non-bit or min/max operations we can't associate
5246 all types. Verify that here. */
5247 if (rhs_code
!= BIT_IOR_EXPR
5248 && rhs_code
!= BIT_AND_EXPR
5249 && rhs_code
!= BIT_XOR_EXPR
5250 && rhs_code
!= MIN_EXPR
5251 && rhs_code
!= MAX_EXPR
5252 && (!can_reassociate_p (lhs
)
5253 || !can_reassociate_p (rhs1
)
5254 || !can_reassociate_p (rhs2
)))
5257 if (associative_tree_code (rhs_code
))
5259 auto_vec
<operand_entry
*> ops
;
5260 tree powi_result
= NULL_TREE
;
5261 bool is_vector
= VECTOR_TYPE_P (TREE_TYPE (lhs
));
5263 /* There may be no immediate uses left by the time we
5264 get here because we may have eliminated them all. */
5265 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
5268 gimple_set_visited (stmt
, true);
5269 linearize_expr_tree (&ops
, stmt
, true, true);
5270 ops
.qsort (sort_by_operand_rank
);
5271 optimize_ops_list (rhs_code
, &ops
);
5272 if (undistribute_ops_list (rhs_code
, &ops
,
5273 loop_containing_stmt (stmt
)))
5275 ops
.qsort (sort_by_operand_rank
);
5276 optimize_ops_list (rhs_code
, &ops
);
5279 if (rhs_code
== PLUS_EXPR
5280 && transform_add_to_multiply (&ops
))
5281 ops
.qsort (sort_by_operand_rank
);
5283 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
5286 optimize_vec_cond_expr (rhs_code
, &ops
);
5288 optimize_range_tests (rhs_code
, &ops
);
5291 if (rhs_code
== MULT_EXPR
&& !is_vector
)
5293 attempt_builtin_copysign (&ops
);
5295 if (reassoc_insert_powi_p
5296 && flag_unsafe_math_optimizations
)
5297 powi_result
= attempt_builtin_powi (stmt
, &ops
);
5300 operand_entry
*last
;
5301 bool negate_result
= false;
5302 if (ops
.length () > 1
5303 && rhs_code
== MULT_EXPR
)
5306 if ((integer_minus_onep (last
->op
)
5307 || real_minus_onep (last
->op
))
5308 && !HONOR_SNANS (TREE_TYPE (lhs
))
5309 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs
))
5310 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs
))))
5313 negate_result
= true;
5317 /* If the operand vector is now empty, all operands were
5318 consumed by the __builtin_powi optimization. */
5319 if (ops
.length () == 0)
5320 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
5321 else if (ops
.length () == 1)
5323 tree last_op
= ops
.last ()->op
;
5325 /* If the stmt that defines operand has to be inserted, insert it
5327 if (ops
.last ()->stmt_to_insert
)
5328 insert_stmt_before_use (stmt
, ops
.last ()->stmt_to_insert
);
5330 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
5333 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
5337 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
5338 int ops_num
= ops
.length ();
5339 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
5342 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5344 "Width = %d was chosen for reassociation\n", width
);
5347 && ops
.length () > 3)
5348 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
5352 /* When there are three operands left, we want
5353 to make sure the ones that get the double
5354 binary op are chosen wisely. */
5355 int len
= ops
.length ();
5357 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
5359 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
5360 powi_result
!= NULL
);
5363 /* If we combined some repeated factors into a
5364 __builtin_powi call, multiply that result by the
5365 reassociated operands. */
5368 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
5369 tree type
= TREE_TYPE (lhs
);
5370 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
5372 gimple_set_lhs (lhs_stmt
, target_ssa
);
5373 update_stmt (lhs_stmt
);
5375 target_ssa
= new_lhs
;
5376 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
5377 powi_result
, target_ssa
);
5378 gimple_set_location (mul_stmt
, gimple_location (stmt
));
5379 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
5380 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
5386 stmt
= SSA_NAME_DEF_STMT (lhs
);
5387 tree tmp
= make_ssa_name (TREE_TYPE (lhs
));
5388 gimple_set_lhs (stmt
, tmp
);
5389 gassign
*neg_stmt
= gimple_build_assign (lhs
, NEGATE_EXPR
,
5391 gimple_set_uid (neg_stmt
, gimple_uid (stmt
));
5392 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5393 gsi_insert_after (&gsi
, neg_stmt
, GSI_NEW_STMT
);
5399 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
5401 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
5402 cfg_cleanup_needed
|= reassociate_bb (son
);
5404 return cfg_cleanup_needed
;
5407 /* Add jumps around shifts for range tests turned into bit tests.
5408 For each SSA_NAME VAR we have code like:
5409 VAR = ...; // final stmt of range comparison
5410 // bit test here...;
5411 OTHERVAR = ...; // final stmt of the bit test sequence
5412 RES = VAR | OTHERVAR;
5413 Turn the above into:
5420 // bit test here...;
5423 # RES = PHI<1(l1), OTHERVAR(l2)>; */
5431 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
5433 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
5436 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
5438 && is_gimple_assign (use_stmt
)
5439 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
5440 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
5442 basic_block cond_bb
= gimple_bb (def_stmt
);
5443 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
5444 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
5446 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5447 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
5448 build_zero_cst (TREE_TYPE (var
)),
5449 NULL_TREE
, NULL_TREE
);
5450 location_t loc
= gimple_location (use_stmt
);
5451 gimple_set_location (g
, loc
);
5452 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
5454 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
5455 etrue
->probability
= REG_BR_PROB_BASE
/ 2;
5456 etrue
->count
= cond_bb
->count
/ 2;
5457 edge efalse
= find_edge (cond_bb
, then_bb
);
5458 efalse
->flags
= EDGE_FALSE_VALUE
;
5459 efalse
->probability
-= etrue
->probability
;
5460 efalse
->count
-= etrue
->count
;
5461 then_bb
->count
-= etrue
->count
;
5463 tree othervar
= NULL_TREE
;
5464 if (gimple_assign_rhs1 (use_stmt
) == var
)
5465 othervar
= gimple_assign_rhs2 (use_stmt
);
5466 else if (gimple_assign_rhs2 (use_stmt
) == var
)
5467 othervar
= gimple_assign_rhs1 (use_stmt
);
5470 tree lhs
= gimple_assign_lhs (use_stmt
);
5471 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
5472 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
5473 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
5474 gsi
= gsi_for_stmt (use_stmt
);
5475 gsi_remove (&gsi
, true);
5477 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
5478 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
5480 reassoc_branch_fixups
.release ();
5483 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
5484 void debug_ops_vector (vec
<operand_entry
*> ops
);
5486 /* Dump the operand entry vector OPS to FILE. */
5489 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
5494 FOR_EACH_VEC_ELT (ops
, i
, oe
)
5496 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
5497 print_generic_expr (file
, oe
->op
, 0);
5498 fprintf (file
, "\n");
5502 /* Dump the operand entry vector OPS to STDERR. */
5505 debug_ops_vector (vec
<operand_entry
*> ops
)
5507 dump_ops_vector (stderr
, ops
);
5510 /* Bubble up return status from reassociate_bb. */
5515 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
5516 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
5519 /* Initialize the reassociation pass. */
5526 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
5528 /* Find the loops, so that we can prevent moving calculations in
5530 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
5532 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
5534 next_operand_entry_id
= 0;
5536 /* Reverse RPO (Reverse Post Order) will give us something where
5537 deeper loops come later. */
5538 pre_and_rev_post_order_compute (NULL
, bbs
, false);
5539 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
5540 operand_rank
= new hash_map
<tree
, long>;
5542 /* Give each default definition a distinct rank. This includes
5543 parameters and the static chain. Walk backwards over all
5544 SSA names so that we get proper rank ordering according
5545 to tree_swap_operands_p. */
5546 for (i
= num_ssa_names
- 1; i
> 0; --i
)
5548 tree name
= ssa_name (i
);
5549 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
5550 insert_operand_rank (name
, ++rank
);
5553 /* Set up rank for each BB */
5554 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
5555 bb_rank
[bbs
[i
]] = ++rank
<< 16;
5558 calculate_dominance_info (CDI_POST_DOMINATORS
);
5559 plus_negates
= vNULL
;
5562 /* Cleanup after the reassociation pass, and print stats if
5568 statistics_counter_event (cfun
, "Linearized",
5569 reassociate_stats
.linearized
);
5570 statistics_counter_event (cfun
, "Constants eliminated",
5571 reassociate_stats
.constants_eliminated
);
5572 statistics_counter_event (cfun
, "Ops eliminated",
5573 reassociate_stats
.ops_eliminated
);
5574 statistics_counter_event (cfun
, "Statements rewritten",
5575 reassociate_stats
.rewritten
);
5576 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
5577 reassociate_stats
.pows_encountered
);
5578 statistics_counter_event (cfun
, "Built-in powi calls created",
5579 reassociate_stats
.pows_created
);
5581 delete operand_rank
;
5582 operand_entry_pool
.release ();
5584 plus_negates
.release ();
5585 free_dominance_info (CDI_POST_DOMINATORS
);
5586 loop_optimizer_finalize ();
5589 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
5590 insertion of __builtin_powi calls.
5592 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5593 optimization of a gimple conditional. Otherwise returns zero. */
5596 execute_reassoc (bool insert_powi_p
)
5598 reassoc_insert_powi_p
= insert_powi_p
;
5602 bool cfg_cleanup_needed
;
5603 cfg_cleanup_needed
= do_reassoc ();
5604 repropagate_negates ();
5608 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
5613 const pass_data pass_data_reassoc
=
5615 GIMPLE_PASS
, /* type */
5616 "reassoc", /* name */
5617 OPTGROUP_NONE
, /* optinfo_flags */
5618 TV_TREE_REASSOC
, /* tv_id */
5619 ( PROP_cfg
| PROP_ssa
), /* properties_required */
5620 0, /* properties_provided */
5621 0, /* properties_destroyed */
5622 0, /* todo_flags_start */
5623 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
5626 class pass_reassoc
: public gimple_opt_pass
5629 pass_reassoc (gcc::context
*ctxt
)
5630 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
5633 /* opt_pass methods: */
5634 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
5635 void set_pass_param (unsigned int n
, bool param
)
5637 gcc_assert (n
== 0);
5638 insert_powi_p
= param
;
5640 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
5641 virtual unsigned int execute (function
*)
5642 { return execute_reassoc (insert_powi_p
); }
5645 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
5646 point 3a in the pass header comment. */
5648 }; // class pass_reassoc
5653 make_pass_reassoc (gcc::context
*ctxt
)
5655 return new pass_reassoc (ctxt
);