1 /* Reassociation for trees.
2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
34 #include "optabs-tree.h"
35 #include "gimple-pretty-print.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
40 #include "gimple-fold.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
45 #include "tree-ssa-loop.h"
48 #include "langhooks.h"
53 #include "case-cfn-macros.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
69 3. Optimization of the operand lists, eliminating things like a +
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
106 You want to first merge all leaves of the same rank, as much as
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
160 newstmt = mergetmp + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p
;
184 int constants_eliminated
;
187 int pows_encountered
;
191 /* Operator, rank pair. */
200 static object_allocator
<operand_entry
> operand_entry_pool
201 ("operand entry pool");
203 /* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205 static int next_operand_entry_id
;
207 /* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
210 static long *bb_rank
;
212 /* Operand->rank hashtable. */
213 static hash_map
<tree
, long> *operand_rank
;
215 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
216 all basic blocks the CFG should be adjusted - basic blocks
217 split right after that SSA_NAME's definition statement and before
218 the only use, which must be a bit ior. */
219 static vec
<tree
> reassoc_branch_fixups
;
222 static long get_rank (tree
);
223 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
226 possibly added by gsi_remove. */
229 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
231 gimple
*stmt
= gsi_stmt (*gsi
);
233 if (!MAY_HAVE_DEBUG_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
234 return gsi_remove (gsi
, true);
236 gimple_stmt_iterator prev
= *gsi
;
238 unsigned uid
= gimple_uid (stmt
);
239 basic_block bb
= gimple_bb (stmt
);
240 bool ret
= gsi_remove (gsi
, true);
241 if (!gsi_end_p (prev
))
244 prev
= gsi_start_bb (bb
);
245 gimple
*end_stmt
= gsi_stmt (*gsi
);
246 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
248 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
249 gimple_set_uid (stmt
, uid
);
255 /* Bias amount for loop-carried phis. We want this to be larger than
256 the depth of any reassociation tree we can see, but not larger than
257 the rank difference between two blocks. */
258 #define PHI_LOOP_BIAS (1 << 15)
260 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
261 an innermost loop, and the phi has only a single use which is inside
262 the loop, then the rank is the block rank of the loop latch plus an
263 extra bias for the loop-carried dependence. This causes expressions
264 calculated into an accumulator variable to be independent for each
265 iteration of the loop. If STMT is some other phi, the rank is the
266 block rank of its containing block. */
268 phi_rank (gimple
*stmt
)
270 basic_block bb
= gimple_bb (stmt
);
271 struct loop
*father
= bb
->loop_father
;
277 /* We only care about real loops (those with a latch). */
279 return bb_rank
[bb
->index
];
281 /* Interesting phis must be in headers of innermost loops. */
282 if (bb
!= father
->header
284 return bb_rank
[bb
->index
];
286 /* Ignore virtual SSA_NAMEs. */
287 res
= gimple_phi_result (stmt
);
288 if (virtual_operand_p (res
))
289 return bb_rank
[bb
->index
];
291 /* The phi definition must have a single use, and that use must be
292 within the loop. Otherwise this isn't an accumulator pattern. */
293 if (!single_imm_use (res
, &use
, &use_stmt
)
294 || gimple_bb (use_stmt
)->loop_father
!= father
)
295 return bb_rank
[bb
->index
];
297 /* Look for phi arguments from within the loop. If found, bias this phi. */
298 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
300 tree arg
= gimple_phi_arg_def (stmt
, i
);
301 if (TREE_CODE (arg
) == SSA_NAME
302 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
304 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
305 if (gimple_bb (def_stmt
)->loop_father
== father
)
306 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
310 /* Must be an uninteresting phi. */
311 return bb_rank
[bb
->index
];
314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
315 loop-carried dependence of an innermost loop, return TRUE; else
318 loop_carried_phi (tree exp
)
323 if (TREE_CODE (exp
) != SSA_NAME
324 || SSA_NAME_IS_DEFAULT_DEF (exp
))
327 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
329 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
332 /* Non-loop-carried phis have block rank. Loop-carried phis have
333 an additional bias added in. If this phi doesn't have block rank,
334 it's biased and should not be propagated. */
335 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
337 if (phi_rank (phi_stmt
) != block_rank
)
343 /* Return the maximum of RANK and the rank that should be propagated
344 from expression OP. For most operands, this is just the rank of OP.
345 For loop-carried phis, the value is zero to avoid undoing the bias
346 in favor of the phi. */
348 propagate_rank (long rank
, tree op
)
352 if (loop_carried_phi (op
))
355 op_rank
= get_rank (op
);
357 return MAX (rank
, op_rank
);
360 /* Look up the operand rank structure for expression E. */
363 find_operand_rank (tree e
)
365 long *slot
= operand_rank
->get (e
);
366 return slot
? *slot
: -1;
369 /* Insert {E,RANK} into the operand rank hashtable. */
372 insert_operand_rank (tree e
, long rank
)
374 gcc_assert (rank
> 0);
375 gcc_assert (!operand_rank
->put (e
, rank
));
378 /* Given an expression E, return the rank of the expression. */
383 /* SSA_NAME's have the rank of the expression they are the result
385 For globals and uninitialized values, the rank is 0.
386 For function arguments, use the pre-setup rank.
387 For PHI nodes, stores, asm statements, etc, we use the rank of
389 For simple operations, the rank is the maximum rank of any of
390 its operands, or the bb_rank, whichever is less.
391 I make no claims that this is optimal, however, it gives good
394 /* We make an exception to the normal ranking system to break
395 dependences of accumulator variables in loops. Suppose we
396 have a simple one-block loop containing:
403 As shown, each iteration of the calculation into x is fully
404 dependent upon the iteration before it. We would prefer to
405 see this in the form:
412 If the loop is unrolled, the calculations of b and c from
413 different iterations can be interleaved.
415 To obtain this result during reassociation, we bias the rank
416 of the phi definition x_1 upward, when it is recognized as an
417 accumulator pattern. The artificial rank causes it to be
418 added last, providing the desired independence. */
420 if (TREE_CODE (e
) == SSA_NAME
)
427 if (SSA_NAME_IS_DEFAULT_DEF (e
))
428 return find_operand_rank (e
);
430 stmt
= SSA_NAME_DEF_STMT (e
);
431 if (gimple_code (stmt
) == GIMPLE_PHI
)
432 return phi_rank (stmt
);
434 if (!is_gimple_assign (stmt
))
435 return bb_rank
[gimple_bb (stmt
)->index
];
437 /* If we already have a rank for this expression, use that. */
438 rank
= find_operand_rank (e
);
442 /* Otherwise, find the maximum rank for the operands. As an
443 exception, remove the bias from loop-carried phis when propagating
444 the rank so that dependent operations are not also biased. */
445 /* Simply walk over all SSA uses - this takes advatage of the
446 fact that non-SSA operands are is_gimple_min_invariant and
449 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
450 rank
= propagate_rank (rank
, op
);
452 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
454 fprintf (dump_file
, "Rank for ");
455 print_generic_expr (dump_file
, e
, 0);
456 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
459 /* Note the rank in the hashtable so we don't recompute it. */
460 insert_operand_rank (e
, (rank
+ 1));
464 /* Constants, globals, etc., are rank 0 */
469 /* We want integer ones to end up last no matter what, since they are
470 the ones we can do the most with. */
471 #define INTEGER_CONST_TYPE 1 << 3
472 #define FLOAT_CONST_TYPE 1 << 2
473 #define OTHER_CONST_TYPE 1 << 1
475 /* Classify an invariant tree into integer, float, or other, so that
476 we can sort them to be near other constants of the same type. */
478 constant_type (tree t
)
480 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
481 return INTEGER_CONST_TYPE
;
482 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
483 return FLOAT_CONST_TYPE
;
485 return OTHER_CONST_TYPE
;
488 /* qsort comparison function to sort operand entries PA and PB by rank
489 so that the sorted array is ordered by rank in decreasing order. */
491 sort_by_operand_rank (const void *pa
, const void *pb
)
493 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
494 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
496 /* It's nicer for optimize_expression if constants that are likely
497 to fold when added/multiplied//whatever are put next to each
498 other. Since all constants have rank 0, order them by type. */
499 if (oeb
->rank
== 0 && oea
->rank
== 0)
501 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
502 return constant_type (oeb
->op
) - constant_type (oea
->op
);
504 /* To make sorting result stable, we use unique IDs to determine
506 return oeb
->id
- oea
->id
;
509 /* Lastly, make sure the versions that are the same go next to each
511 if ((oeb
->rank
- oea
->rank
== 0)
512 && TREE_CODE (oea
->op
) == SSA_NAME
513 && TREE_CODE (oeb
->op
) == SSA_NAME
)
515 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
516 versions of removed SSA_NAMEs, so if possible, prefer to sort
517 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
519 if (!SSA_NAME_IS_DEFAULT_DEF (oea
->op
)
520 && !SSA_NAME_IS_DEFAULT_DEF (oeb
->op
)
521 && SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
523 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
524 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
525 basic_block bba
= gimple_bb (stmta
);
526 basic_block bbb
= gimple_bb (stmtb
);
529 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
530 return bb_rank
[bbb
->index
] - bb_rank
[bba
->index
];
534 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
535 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
541 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
542 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
544 return oeb
->id
- oea
->id
;
547 if (oeb
->rank
!= oea
->rank
)
548 return oeb
->rank
- oea
->rank
;
550 return oeb
->id
- oea
->id
;
553 /* Add an operand entry to *OPS for the tree operand OP. */
556 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
)
558 operand_entry
*oe
= operand_entry_pool
.allocate ();
561 oe
->rank
= get_rank (op
);
562 oe
->id
= next_operand_entry_id
++;
567 /* Add an operand entry to *OPS for the tree operand OP with repeat
571 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
572 HOST_WIDE_INT repeat
)
574 operand_entry
*oe
= operand_entry_pool
.allocate ();
577 oe
->rank
= get_rank (op
);
578 oe
->id
= next_operand_entry_id
++;
582 reassociate_stats
.pows_encountered
++;
585 /* Return true if STMT is reassociable operation containing a binary
586 operation with tree code CODE, and is inside LOOP. */
589 is_reassociable_op (gimple
*stmt
, enum tree_code code
, struct loop
*loop
)
591 basic_block bb
= gimple_bb (stmt
);
593 if (gimple_bb (stmt
) == NULL
)
596 if (!flow_bb_inside_loop_p (loop
, bb
))
599 if (is_gimple_assign (stmt
)
600 && gimple_assign_rhs_code (stmt
) == code
601 && has_single_use (gimple_assign_lhs (stmt
)))
608 /* Return true if STMT is a nop-conversion. */
611 gimple_nop_conversion_p (gimple
*stmt
)
613 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
615 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
616 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
617 TREE_TYPE (gimple_assign_rhs1 (ass
))))
623 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
624 operand of the negate operation. Otherwise, return NULL. */
627 get_unary_op (tree name
, enum tree_code opcode
)
629 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
631 /* Look through nop conversions (sign changes). */
632 if (gimple_nop_conversion_p (stmt
)
633 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
634 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
636 if (!is_gimple_assign (stmt
))
639 if (gimple_assign_rhs_code (stmt
) == opcode
)
640 return gimple_assign_rhs1 (stmt
);
644 /* Return true if OP1 and OP2 have the same value if casted to either type. */
647 ops_equal_values_p (tree op1
, tree op2
)
653 if (TREE_CODE (op1
) == SSA_NAME
)
655 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
656 if (gimple_nop_conversion_p (stmt
))
658 op1
= gimple_assign_rhs1 (stmt
);
664 if (TREE_CODE (op2
) == SSA_NAME
)
666 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
667 if (gimple_nop_conversion_p (stmt
))
669 op2
= gimple_assign_rhs1 (stmt
);
680 /* If CURR and LAST are a pair of ops that OPCODE allows us to
681 eliminate through equivalences, do so, remove them from OPS, and
682 return true. Otherwise, return false. */
685 eliminate_duplicate_pair (enum tree_code opcode
,
686 vec
<operand_entry
*> *ops
,
693 /* If we have two of the same op, and the opcode is & |, min, or max,
694 we can eliminate one of them.
695 If we have two of the same op, and the opcode is ^, we can
696 eliminate both of them. */
698 if (last
&& last
->op
== curr
->op
)
706 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
708 fprintf (dump_file
, "Equivalence: ");
709 print_generic_expr (dump_file
, curr
->op
, 0);
710 fprintf (dump_file
, " [&|minmax] ");
711 print_generic_expr (dump_file
, last
->op
, 0);
712 fprintf (dump_file
, " -> ");
713 print_generic_stmt (dump_file
, last
->op
, 0);
716 ops
->ordered_remove (i
);
717 reassociate_stats
.ops_eliminated
++;
722 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
724 fprintf (dump_file
, "Equivalence: ");
725 print_generic_expr (dump_file
, curr
->op
, 0);
726 fprintf (dump_file
, " ^ ");
727 print_generic_expr (dump_file
, last
->op
, 0);
728 fprintf (dump_file
, " -> nothing\n");
731 reassociate_stats
.ops_eliminated
+= 2;
733 if (ops
->length () == 2)
736 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
741 ops
->ordered_remove (i
-1);
742 ops
->ordered_remove (i
-1);
754 static vec
<tree
> plus_negates
;
756 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
757 expression, look in OPS for a corresponding positive operation to cancel
758 it out. If we find one, remove the other from OPS, replace
759 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
763 eliminate_plus_minus_pair (enum tree_code opcode
,
764 vec
<operand_entry
*> *ops
,
765 unsigned int currindex
,
773 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
776 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
777 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
778 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
781 /* Any non-negated version will have a rank that is one less than
782 the current rank. So once we hit those ranks, if we don't find
785 for (i
= currindex
+ 1;
786 ops
->iterate (i
, &oe
)
787 && oe
->rank
>= curr
->rank
- 1 ;
791 && ops_equal_values_p (oe
->op
, negateop
))
793 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
795 fprintf (dump_file
, "Equivalence: ");
796 print_generic_expr (dump_file
, negateop
, 0);
797 fprintf (dump_file
, " + -");
798 print_generic_expr (dump_file
, oe
->op
, 0);
799 fprintf (dump_file
, " -> 0\n");
802 ops
->ordered_remove (i
);
803 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
804 ops
->ordered_remove (currindex
);
805 reassociate_stats
.ops_eliminated
++;
810 && ops_equal_values_p (oe
->op
, notop
))
812 tree op_type
= TREE_TYPE (oe
->op
);
814 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
816 fprintf (dump_file
, "Equivalence: ");
817 print_generic_expr (dump_file
, notop
, 0);
818 fprintf (dump_file
, " + ~");
819 print_generic_expr (dump_file
, oe
->op
, 0);
820 fprintf (dump_file
, " -> -1\n");
823 ops
->ordered_remove (i
);
824 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
825 ops
->ordered_remove (currindex
);
826 reassociate_stats
.ops_eliminated
++;
832 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
833 save it for later inspection in repropagate_negates(). */
834 if (negateop
!= NULL_TREE
835 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
836 plus_negates
.safe_push (curr
->op
);
841 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
842 bitwise not expression, look in OPS for a corresponding operand to
843 cancel it out. If we find one, remove the other from OPS, replace
844 OPS[CURRINDEX] with 0, and return true. Otherwise, return
848 eliminate_not_pairs (enum tree_code opcode
,
849 vec
<operand_entry
*> *ops
,
850 unsigned int currindex
,
857 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
858 || TREE_CODE (curr
->op
) != SSA_NAME
)
861 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
862 if (notop
== NULL_TREE
)
865 /* Any non-not version will have a rank that is one less than
866 the current rank. So once we hit those ranks, if we don't find
869 for (i
= currindex
+ 1;
870 ops
->iterate (i
, &oe
)
871 && oe
->rank
>= curr
->rank
- 1;
876 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
878 fprintf (dump_file
, "Equivalence: ");
879 print_generic_expr (dump_file
, notop
, 0);
880 if (opcode
== BIT_AND_EXPR
)
881 fprintf (dump_file
, " & ~");
882 else if (opcode
== BIT_IOR_EXPR
)
883 fprintf (dump_file
, " | ~");
884 print_generic_expr (dump_file
, oe
->op
, 0);
885 if (opcode
== BIT_AND_EXPR
)
886 fprintf (dump_file
, " -> 0\n");
887 else if (opcode
== BIT_IOR_EXPR
)
888 fprintf (dump_file
, " -> -1\n");
891 if (opcode
== BIT_AND_EXPR
)
892 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
893 else if (opcode
== BIT_IOR_EXPR
)
894 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
896 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
898 ops
->quick_push (oe
);
906 /* Use constant value that may be present in OPS to try to eliminate
907 operands. Note that this function is only really used when we've
908 eliminated ops for other reasons, or merged constants. Across
909 single statements, fold already does all of this, plus more. There
910 is little point in duplicating logic, so I've only included the
911 identities that I could ever construct testcases to trigger. */
914 eliminate_using_constants (enum tree_code opcode
,
915 vec
<operand_entry
*> *ops
)
917 operand_entry
*oelast
= ops
->last ();
918 tree type
= TREE_TYPE (oelast
->op
);
920 if (oelast
->rank
== 0
921 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
926 if (integer_zerop (oelast
->op
))
928 if (ops
->length () != 1)
930 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
931 fprintf (dump_file
, "Found & 0, removing all other ops\n");
933 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
936 ops
->quick_push (oelast
);
940 else if (integer_all_onesp (oelast
->op
))
942 if (ops
->length () != 1)
944 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
945 fprintf (dump_file
, "Found & -1, removing\n");
947 reassociate_stats
.ops_eliminated
++;
952 if (integer_all_onesp (oelast
->op
))
954 if (ops
->length () != 1)
956 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
957 fprintf (dump_file
, "Found | -1, removing all other ops\n");
959 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
962 ops
->quick_push (oelast
);
966 else if (integer_zerop (oelast
->op
))
968 if (ops
->length () != 1)
970 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
971 fprintf (dump_file
, "Found | 0, removing\n");
973 reassociate_stats
.ops_eliminated
++;
978 if (integer_zerop (oelast
->op
)
979 || (FLOAT_TYPE_P (type
)
980 && !HONOR_NANS (type
)
981 && !HONOR_SIGNED_ZEROS (type
)
982 && real_zerop (oelast
->op
)))
984 if (ops
->length () != 1)
986 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
987 fprintf (dump_file
, "Found * 0, removing all other ops\n");
989 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
991 ops
->quick_push (oelast
);
995 else if (integer_onep (oelast
->op
)
996 || (FLOAT_TYPE_P (type
)
997 && !HONOR_SNANS (type
)
998 && real_onep (oelast
->op
)))
1000 if (ops
->length () != 1)
1002 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1003 fprintf (dump_file
, "Found * 1, removing\n");
1005 reassociate_stats
.ops_eliminated
++;
1013 if (integer_zerop (oelast
->op
)
1014 || (FLOAT_TYPE_P (type
)
1015 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1016 && fold_real_zero_addition_p (type
, oelast
->op
,
1017 opcode
== MINUS_EXPR
)))
1019 if (ops
->length () != 1)
1021 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1022 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1024 reassociate_stats
.ops_eliminated
++;
1036 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1039 /* Structure for tracking and counting operands. */
1043 enum tree_code oecode
;
1048 /* The heap for the oecount hashtable and the sorted list of operands. */
1049 static vec
<oecount
> cvec
;
1052 /* Oecount hashtable helpers. */
1054 struct oecount_hasher
: int_hash
<int, 0, 1>
1056 static inline hashval_t
hash (int);
1057 static inline bool equal (int, int);
1060 /* Hash function for oecount. */
1063 oecount_hasher::hash (int p
)
1065 const oecount
*c
= &cvec
[p
- 42];
1066 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1069 /* Comparison function for oecount. */
1072 oecount_hasher::equal (int p1
, int p2
)
1074 const oecount
*c1
= &cvec
[p1
- 42];
1075 const oecount
*c2
= &cvec
[p2
- 42];
1076 return (c1
->oecode
== c2
->oecode
1077 && c1
->op
== c2
->op
);
1080 /* Comparison function for qsort sorting oecount elements by count. */
1083 oecount_cmp (const void *p1
, const void *p2
)
1085 const oecount
*c1
= (const oecount
*)p1
;
1086 const oecount
*c2
= (const oecount
*)p2
;
1087 if (c1
->cnt
!= c2
->cnt
)
1088 return c1
->cnt
- c2
->cnt
;
1090 /* If counts are identical, use unique IDs to stabilize qsort. */
1091 return c1
->id
- c2
->id
;
1094 /* Return TRUE iff STMT represents a builtin call that raises OP
1095 to some exponent. */
1098 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1100 if (!is_gimple_call (stmt
))
1103 switch (gimple_call_combined_fn (stmt
))
1107 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1114 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1115 in place and return the result. Assumes that stmt_is_power_of_op
1116 was previously called for STMT and returned TRUE. */
1118 static HOST_WIDE_INT
1119 decrement_power (gimple
*stmt
)
1121 REAL_VALUE_TYPE c
, cint
;
1122 HOST_WIDE_INT power
;
1125 switch (gimple_call_combined_fn (stmt
))
1128 arg1
= gimple_call_arg (stmt
, 1);
1129 c
= TREE_REAL_CST (arg1
);
1130 power
= real_to_integer (&c
) - 1;
1131 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1132 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1136 arg1
= gimple_call_arg (stmt
, 1);
1137 power
= TREE_INT_CST_LOW (arg1
) - 1;
1138 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1146 /* Find the single immediate use of STMT's LHS, and replace it
1147 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1148 replace *DEF with OP as well. */
1151 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1156 gimple_stmt_iterator gsi
;
1158 if (is_gimple_call (stmt
))
1159 lhs
= gimple_call_lhs (stmt
);
1161 lhs
= gimple_assign_lhs (stmt
);
1163 gcc_assert (has_single_use (lhs
));
1164 single_imm_use (lhs
, &use
, &use_stmt
);
1168 if (TREE_CODE (op
) != SSA_NAME
)
1169 update_stmt (use_stmt
);
1170 gsi
= gsi_for_stmt (stmt
);
1171 unlink_stmt_vdef (stmt
);
1172 reassoc_remove_stmt (&gsi
);
1173 release_defs (stmt
);
1176 /* Walks the linear chain with result *DEF searching for an operation
1177 with operand OP and code OPCODE removing that from the chain. *DEF
1178 is updated if there is only one operand but no operation left. */
1181 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1183 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1189 if (opcode
== MULT_EXPR
1190 && stmt_is_power_of_op (stmt
, op
))
1192 if (decrement_power (stmt
) == 1)
1193 propagate_op_to_single_use (op
, stmt
, def
);
1197 name
= gimple_assign_rhs1 (stmt
);
1199 /* If this is the operation we look for and one of the operands
1200 is ours simply propagate the other operand into the stmts
1202 if (gimple_assign_rhs_code (stmt
) == opcode
1204 || gimple_assign_rhs2 (stmt
) == op
))
1207 name
= gimple_assign_rhs2 (stmt
);
1208 propagate_op_to_single_use (name
, stmt
, def
);
1212 /* We might have a multiply of two __builtin_pow* calls, and
1213 the operand might be hiding in the rightmost one. */
1214 if (opcode
== MULT_EXPR
1215 && gimple_assign_rhs_code (stmt
) == opcode
1216 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1217 && has_single_use (gimple_assign_rhs2 (stmt
)))
1219 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1220 if (stmt_is_power_of_op (stmt2
, op
))
1222 if (decrement_power (stmt2
) == 1)
1223 propagate_op_to_single_use (op
, stmt2
, def
);
1228 /* Continue walking the chain. */
1229 gcc_assert (name
!= op
1230 && TREE_CODE (name
) == SSA_NAME
);
1231 stmt
= SSA_NAME_DEF_STMT (name
);
1236 /* Returns true if statement S1 dominates statement S2. Like
1237 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1240 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1242 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1244 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1245 SSA_NAME. Assume it lives at the beginning of function and
1246 thus dominates everything. */
1247 if (!bb1
|| s1
== s2
)
1250 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1256 /* PHIs in the same basic block are assumed to be
1257 executed all in parallel, if only one stmt is a PHI,
1258 it dominates the other stmt in the same basic block. */
1259 if (gimple_code (s1
) == GIMPLE_PHI
)
1262 if (gimple_code (s2
) == GIMPLE_PHI
)
1265 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1267 if (gimple_uid (s1
) < gimple_uid (s2
))
1270 if (gimple_uid (s1
) > gimple_uid (s2
))
1273 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1274 unsigned int uid
= gimple_uid (s1
);
1275 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1277 gimple
*s
= gsi_stmt (gsi
);
1278 if (gimple_uid (s
) != uid
)
1287 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1290 /* Insert STMT after INSERT_POINT. */
1293 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1295 gimple_stmt_iterator gsi
;
1298 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1299 bb
= gimple_bb (insert_point
);
1300 else if (!stmt_ends_bb_p (insert_point
))
1302 gsi
= gsi_for_stmt (insert_point
);
1303 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1304 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1308 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1309 thus if it must end a basic block, it should be a call that can
1310 throw, or some assignment that can throw. If it throws, the LHS
1311 of it will not be initialized though, so only valid places using
1312 the SSA_NAME should be dominated by the fallthru edge. */
1313 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1314 gsi
= gsi_after_labels (bb
);
1315 if (gsi_end_p (gsi
))
1317 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1318 gimple_set_uid (stmt
,
1319 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1322 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1323 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1326 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1327 the result. Places the statement after the definition of either
1328 OP1 or OP2. Returns the new statement. */
1331 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1333 gimple
*op1def
= NULL
, *op2def
= NULL
;
1334 gimple_stmt_iterator gsi
;
1338 /* Create the addition statement. */
1339 op
= make_ssa_name (type
);
1340 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1342 /* Find an insertion place and insert. */
1343 if (TREE_CODE (op1
) == SSA_NAME
)
1344 op1def
= SSA_NAME_DEF_STMT (op1
);
1345 if (TREE_CODE (op2
) == SSA_NAME
)
1346 op2def
= SSA_NAME_DEF_STMT (op2
);
1347 if ((!op1def
|| gimple_nop_p (op1def
))
1348 && (!op2def
|| gimple_nop_p (op2def
)))
1350 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1351 if (gsi_end_p (gsi
))
1353 gimple_stmt_iterator gsi2
1354 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1355 gimple_set_uid (sum
,
1356 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1359 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1360 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1364 gimple
*insert_point
;
1365 if ((!op1def
|| gimple_nop_p (op1def
))
1366 || (op2def
&& !gimple_nop_p (op2def
)
1367 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1368 insert_point
= op2def
;
1370 insert_point
= op1def
;
1371 insert_stmt_after (sum
, insert_point
);
1378 /* Perform un-distribution of divisions and multiplications.
1379 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1380 to (A + B) / X for real X.
1382 The algorithm is organized as follows.
1384 - First we walk the addition chain *OPS looking for summands that
1385 are defined by a multiplication or a real division. This results
1386 in the candidates bitmap with relevant indices into *OPS.
1388 - Second we build the chains of multiplications or divisions for
1389 these candidates, counting the number of occurrences of (operand, code)
1390 pairs in all of the candidates chains.
1392 - Third we sort the (operand, code) pairs by number of occurrence and
1393 process them starting with the pair with the most uses.
1395 * For each such pair we walk the candidates again to build a
1396 second candidate bitmap noting all multiplication/division chains
1397 that have at least one occurrence of (operand, code).
1399 * We build an alternate addition chain only covering these
1400 candidates with one (operand, code) operation removed from their
1401 multiplication/division chain.
1403 * The first candidate gets replaced by the alternate addition chain
1404 multiplied/divided by the operand.
1406 * All candidate chains get disabled for further processing and
1407 processing of (operand, code) pairs continues.
1409 The alternate addition chains built are re-processed by the main
1410 reassociation algorithm which allows optimizing a * x * y + b * y * x
1411 to (a + b ) * x * y in one invocation of the reassociation pass. */
1414 undistribute_ops_list (enum tree_code opcode
,
1415 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1417 unsigned int length
= ops
->length ();
1420 sbitmap candidates
, candidates2
;
1421 unsigned nr_candidates
, nr_candidates2
;
1422 sbitmap_iterator sbi0
;
1423 vec
<operand_entry
*> *subops
;
1424 bool changed
= false;
1425 int next_oecount_id
= 0;
1428 || opcode
!= PLUS_EXPR
)
1431 /* Build a list of candidates to process. */
1432 candidates
= sbitmap_alloc (length
);
1433 bitmap_clear (candidates
);
1435 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1437 enum tree_code dcode
;
1440 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1442 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1443 if (!is_gimple_assign (oe1def
))
1445 dcode
= gimple_assign_rhs_code (oe1def
);
1446 if ((dcode
!= MULT_EXPR
1447 && dcode
!= RDIV_EXPR
)
1448 || !is_reassociable_op (oe1def
, dcode
, loop
))
1451 bitmap_set_bit (candidates
, i
);
1455 if (nr_candidates
< 2)
1457 sbitmap_free (candidates
);
1461 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1463 fprintf (dump_file
, "searching for un-distribute opportunities ");
1464 print_generic_expr (dump_file
,
1465 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1466 fprintf (dump_file
, " %d\n", nr_candidates
);
1469 /* Build linearized sub-operand lists and the counting table. */
1472 hash_table
<oecount_hasher
> ctable (15);
1474 /* ??? Macro arguments cannot have multi-argument template types in
1475 them. This typedef is needed to workaround that limitation. */
1476 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1477 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1478 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1481 enum tree_code oecode
;
1484 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1485 oecode
= gimple_assign_rhs_code (oedef
);
1486 linearize_expr_tree (&subops
[i
], oedef
,
1487 associative_tree_code (oecode
), false);
1489 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1496 c
.id
= next_oecount_id
++;
1499 idx
= cvec
.length () + 41;
1500 slot
= ctable
.find_slot (idx
, INSERT
);
1508 cvec
[*slot
- 42].cnt
++;
1513 /* Sort the counting table. */
1514 cvec
.qsort (oecount_cmp
);
1516 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1519 fprintf (dump_file
, "Candidates:\n");
1520 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1522 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1523 c
->oecode
== MULT_EXPR
1524 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1525 print_generic_expr (dump_file
, c
->op
, 0);
1526 fprintf (dump_file
, "\n");
1530 /* Process the (operand, code) pairs in order of most occurrence. */
1531 candidates2
= sbitmap_alloc (length
);
1532 while (!cvec
.is_empty ())
1534 oecount
*c
= &cvec
.last ();
1538 /* Now collect the operands in the outer chain that contain
1539 the common operand in their inner chain. */
1540 bitmap_clear (candidates2
);
1542 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1545 enum tree_code oecode
;
1547 tree op
= (*ops
)[i
]->op
;
1549 /* If we undistributed in this chain already this may be
1551 if (TREE_CODE (op
) != SSA_NAME
)
1554 oedef
= SSA_NAME_DEF_STMT (op
);
1555 oecode
= gimple_assign_rhs_code (oedef
);
1556 if (oecode
!= c
->oecode
)
1559 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1561 if (oe1
->op
== c
->op
)
1563 bitmap_set_bit (candidates2
, i
);
1570 if (nr_candidates2
>= 2)
1572 operand_entry
*oe1
, *oe2
;
1574 int first
= bitmap_first_set_bit (candidates2
);
1576 /* Build the new addition chain. */
1577 oe1
= (*ops
)[first
];
1578 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1580 fprintf (dump_file
, "Building (");
1581 print_generic_expr (dump_file
, oe1
->op
, 0);
1583 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1584 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1588 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1590 fprintf (dump_file
, " + ");
1591 print_generic_expr (dump_file
, oe2
->op
, 0);
1593 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1594 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1595 oe1
->op
, oe2
->op
, opcode
);
1596 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1598 oe1
->op
= gimple_get_lhs (sum
);
1601 /* Apply the multiplication/division. */
1602 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1603 oe1
->op
, c
->op
, c
->oecode
);
1604 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1606 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1607 print_generic_expr (dump_file
, c
->op
, 0);
1608 fprintf (dump_file
, "\n");
1611 /* Record it in the addition chain and disable further
1612 undistribution with this op. */
1613 oe1
->op
= gimple_assign_lhs (prod
);
1614 oe1
->rank
= get_rank (oe1
->op
);
1615 subops
[first
].release ();
1623 for (i
= 0; i
< ops
->length (); ++i
)
1624 subops
[i
].release ();
1627 sbitmap_free (candidates
);
1628 sbitmap_free (candidates2
);
1633 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1634 expression, examine the other OPS to see if any of them are comparisons
1635 of the same values, which we may be able to combine or eliminate.
1636 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1639 eliminate_redundant_comparison (enum tree_code opcode
,
1640 vec
<operand_entry
*> *ops
,
1641 unsigned int currindex
,
1642 operand_entry
*curr
)
1645 enum tree_code lcode
, rcode
;
1646 gimple
*def1
, *def2
;
1650 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1653 /* Check that CURR is a comparison. */
1654 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1656 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1657 if (!is_gimple_assign (def1
))
1659 lcode
= gimple_assign_rhs_code (def1
);
1660 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1662 op1
= gimple_assign_rhs1 (def1
);
1663 op2
= gimple_assign_rhs2 (def1
);
1665 /* Now look for a similar comparison in the remaining OPS. */
1666 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1670 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1672 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1673 if (!is_gimple_assign (def2
))
1675 rcode
= gimple_assign_rhs_code (def2
);
1676 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1679 /* If we got here, we have a match. See if we can combine the
1681 if (opcode
== BIT_IOR_EXPR
)
1682 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1683 rcode
, gimple_assign_rhs1 (def2
),
1684 gimple_assign_rhs2 (def2
));
1686 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1687 rcode
, gimple_assign_rhs1 (def2
),
1688 gimple_assign_rhs2 (def2
));
1692 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1693 always give us a boolean_type_node value back. If the original
1694 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1695 we need to convert. */
1696 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1697 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1699 if (TREE_CODE (t
) != INTEGER_CST
1700 && !operand_equal_p (t
, curr
->op
, 0))
1702 enum tree_code subcode
;
1703 tree newop1
, newop2
;
1704 if (!COMPARISON_CLASS_P (t
))
1706 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1707 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1708 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1709 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1713 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1715 fprintf (dump_file
, "Equivalence: ");
1716 print_generic_expr (dump_file
, curr
->op
, 0);
1717 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1718 print_generic_expr (dump_file
, oe
->op
, 0);
1719 fprintf (dump_file
, " -> ");
1720 print_generic_expr (dump_file
, t
, 0);
1721 fprintf (dump_file
, "\n");
1724 /* Now we can delete oe, as it has been subsumed by the new combined
1726 ops
->ordered_remove (i
);
1727 reassociate_stats
.ops_eliminated
++;
1729 /* If t is the same as curr->op, we're done. Otherwise we must
1730 replace curr->op with t. Special case is if we got a constant
1731 back, in which case we add it to the end instead of in place of
1732 the current entry. */
1733 if (TREE_CODE (t
) == INTEGER_CST
)
1735 ops
->ordered_remove (currindex
);
1736 add_to_ops_vec (ops
, t
);
1738 else if (!operand_equal_p (t
, curr
->op
, 0))
1741 enum tree_code subcode
;
1744 gcc_assert (COMPARISON_CLASS_P (t
));
1745 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1746 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1747 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1748 gcc_checking_assert (is_gimple_val (newop1
)
1749 && is_gimple_val (newop2
));
1750 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1751 curr
->op
= gimple_get_lhs (sum
);
1759 /* Perform various identities and other optimizations on the list of
1760 operand entries, stored in OPS. The tree code for the binary
1761 operation between all the operands is OPCODE. */
1764 optimize_ops_list (enum tree_code opcode
,
1765 vec
<operand_entry
*> *ops
)
1767 unsigned int length
= ops
->length ();
1770 operand_entry
*oelast
= NULL
;
1771 bool iterate
= false;
1776 oelast
= ops
->last ();
1778 /* If the last two are constants, pop the constants off, merge them
1779 and try the next two. */
1780 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1782 operand_entry
*oelm1
= (*ops
)[length
- 2];
1784 if (oelm1
->rank
== 0
1785 && is_gimple_min_invariant (oelm1
->op
)
1786 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1787 TREE_TYPE (oelast
->op
)))
1789 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1790 oelm1
->op
, oelast
->op
);
1792 if (folded
&& is_gimple_min_invariant (folded
))
1794 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1795 fprintf (dump_file
, "Merging constants\n");
1800 add_to_ops_vec (ops
, folded
);
1801 reassociate_stats
.constants_eliminated
++;
1803 optimize_ops_list (opcode
, ops
);
1809 eliminate_using_constants (opcode
, ops
);
1812 for (i
= 0; ops
->iterate (i
, &oe
);)
1816 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1818 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1819 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1820 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1832 length
= ops
->length ();
1833 oelast
= ops
->last ();
1836 optimize_ops_list (opcode
, ops
);
1839 /* The following functions are subroutines to optimize_range_tests and allow
1840 it to try to change a logical combination of comparisons into a range
1844 X == 2 || X == 5 || X == 3 || X == 4
1848 (unsigned) (X - 2) <= 3
1850 For more information see comments above fold_test_range in fold-const.c,
1851 this implementation is for GIMPLE. */
1859 bool strict_overflow_p
;
1860 unsigned int idx
, next
;
1863 /* This is similar to make_range in fold-const.c, but on top of
1864 GIMPLE instead of trees. If EXP is non-NULL, it should be
1865 an SSA_NAME and STMT argument is ignored, otherwise STMT
1866 argument should be a GIMPLE_COND. */
1869 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
1873 bool is_bool
, strict_overflow_p
;
1877 r
->strict_overflow_p
= false;
1879 r
->high
= NULL_TREE
;
1880 if (exp
!= NULL_TREE
1881 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1884 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1885 and see if we can refine the range. Some of the cases below may not
1886 happen, but it doesn't seem worth worrying about this. We "continue"
1887 the outer loop when we've changed something; otherwise we "break"
1888 the switch, which will "break" the while. */
1889 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1892 strict_overflow_p
= false;
1894 if (exp
== NULL_TREE
)
1896 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1898 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1903 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1908 enum tree_code code
;
1909 tree arg0
, arg1
, exp_type
;
1913 if (exp
!= NULL_TREE
)
1915 if (TREE_CODE (exp
) != SSA_NAME
1916 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
1919 stmt
= SSA_NAME_DEF_STMT (exp
);
1920 if (!is_gimple_assign (stmt
))
1923 code
= gimple_assign_rhs_code (stmt
);
1924 arg0
= gimple_assign_rhs1 (stmt
);
1925 arg1
= gimple_assign_rhs2 (stmt
);
1926 exp_type
= TREE_TYPE (exp
);
1930 code
= gimple_cond_code (stmt
);
1931 arg0
= gimple_cond_lhs (stmt
);
1932 arg1
= gimple_cond_rhs (stmt
);
1933 exp_type
= boolean_type_node
;
1936 if (TREE_CODE (arg0
) != SSA_NAME
)
1938 loc
= gimple_location (stmt
);
1942 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1943 /* Ensure the range is either +[-,0], +[0,0],
1944 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1945 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1946 or similar expression of unconditional true or
1947 false, it should not be negated. */
1948 && ((high
&& integer_zerop (high
))
1949 || (low
&& integer_onep (low
))))
1962 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1964 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1969 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1984 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1986 &strict_overflow_p
);
1987 if (nexp
!= NULL_TREE
)
1990 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2003 r
->strict_overflow_p
= strict_overflow_p
;
2007 /* Comparison function for qsort. Sort entries
2008 without SSA_NAME exp first, then with SSA_NAMEs sorted
2009 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2010 by increasing ->low and if ->low is the same, by increasing
2011 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2015 range_entry_cmp (const void *a
, const void *b
)
2017 const struct range_entry
*p
= (const struct range_entry
*) a
;
2018 const struct range_entry
*q
= (const struct range_entry
*) b
;
2020 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2022 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2024 /* Group range_entries for the same SSA_NAME together. */
2025 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2027 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2029 /* If ->low is different, NULL low goes first, then by
2031 if (p
->low
!= NULL_TREE
)
2033 if (q
->low
!= NULL_TREE
)
2035 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2037 if (tem
&& integer_onep (tem
))
2039 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2041 if (tem
&& integer_onep (tem
))
2047 else if (q
->low
!= NULL_TREE
)
2049 /* If ->high is different, NULL high goes last, before that by
2051 if (p
->high
!= NULL_TREE
)
2053 if (q
->high
!= NULL_TREE
)
2055 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2057 if (tem
&& integer_onep (tem
))
2059 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2061 if (tem
&& integer_onep (tem
))
2067 else if (q
->high
!= NULL_TREE
)
2069 /* If both ranges are the same, sort below by ascending idx. */
2074 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2077 if (p
->idx
< q
->idx
)
2081 gcc_checking_assert (p
->idx
> q
->idx
);
2086 /* Helper routine of optimize_range_test.
2087 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2088 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2089 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2090 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2091 an array of COUNT pointers to other ranges. Return
2092 true if the range merge has been successful.
2093 If OPCODE is ERROR_MARK, this is called from within
2094 maybe_optimize_range_tests and is performing inter-bb range optimization.
2095 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2099 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2100 struct range_entry
**otherrangep
,
2101 unsigned int count
, enum tree_code opcode
,
2102 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2103 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2105 operand_entry
*oe
= (*ops
)[range
->idx
];
2107 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2108 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2109 location_t loc
= gimple_location (stmt
);
2110 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2111 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2113 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2114 gimple_stmt_iterator gsi
;
2115 unsigned int i
, uid
;
2117 if (tem
== NULL_TREE
)
2120 /* If op is default def SSA_NAME, there is no place to insert the
2121 new comparison. Give up, unless we can use OP itself as the
2123 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2125 if (op
== range
->exp
2126 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2127 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2129 || (TREE_CODE (tem
) == EQ_EXPR
2130 && TREE_OPERAND (tem
, 0) == op
2131 && integer_onep (TREE_OPERAND (tem
, 1))))
2132 && opcode
!= BIT_IOR_EXPR
2133 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2142 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2143 warning_at (loc
, OPT_Wstrict_overflow
,
2144 "assuming signed overflow does not occur "
2145 "when simplifying range test");
2147 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2149 struct range_entry
*r
;
2150 fprintf (dump_file
, "Optimizing range tests ");
2151 print_generic_expr (dump_file
, range
->exp
, 0);
2152 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2153 print_generic_expr (dump_file
, range
->low
, 0);
2154 fprintf (dump_file
, ", ");
2155 print_generic_expr (dump_file
, range
->high
, 0);
2156 fprintf (dump_file
, "]");
2157 for (i
= 0; i
< count
; i
++)
2163 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2164 print_generic_expr (dump_file
, r
->low
, 0);
2165 fprintf (dump_file
, ", ");
2166 print_generic_expr (dump_file
, r
->high
, 0);
2167 fprintf (dump_file
, "]");
2169 fprintf (dump_file
, "\n into ");
2170 print_generic_expr (dump_file
, tem
, 0);
2171 fprintf (dump_file
, "\n");
2174 if (opcode
== BIT_IOR_EXPR
2175 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2176 tem
= invert_truthvalue_loc (loc
, tem
);
2178 tem
= fold_convert_loc (loc
, optype
, tem
);
2181 gsi
= gsi_for_stmt (stmt
);
2182 uid
= gimple_uid (stmt
);
2190 gcc_checking_assert (tem
== op
);
2191 /* In rare cases range->exp can be equal to lhs of stmt.
2192 In that case we have to insert after the stmt rather then before
2193 it. If stmt is a PHI, insert it at the start of the basic block. */
2194 else if (op
!= range
->exp
)
2196 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2197 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2201 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2203 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2204 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2205 GSI_CONTINUE_LINKING
);
2209 gsi
= gsi_after_labels (gimple_bb (stmt
));
2210 if (!gsi_end_p (gsi
))
2211 uid
= gimple_uid (gsi_stmt (gsi
));
2214 gsi
= gsi_start_bb (gimple_bb (stmt
));
2216 while (!gsi_end_p (gsi
))
2218 uid
= gimple_uid (gsi_stmt (gsi
));
2222 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2223 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2225 if (gsi_end_p (gsi
))
2226 gsi
= gsi_last_bb (gimple_bb (stmt
));
2230 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2231 if (gimple_uid (gsi_stmt (gsi
)))
2234 gimple_set_uid (gsi_stmt (gsi
), uid
);
2241 range
->strict_overflow_p
= false;
2243 for (i
= 0; i
< count
; i
++)
2246 range
= otherrange
+ i
;
2248 range
= otherrangep
[i
];
2249 oe
= (*ops
)[range
->idx
];
2250 /* Now change all the other range test immediate uses, so that
2251 those tests will be optimized away. */
2252 if (opcode
== ERROR_MARK
)
2255 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2256 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2258 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2259 ? boolean_false_node
: boolean_true_node
);
2262 oe
->op
= error_mark_node
;
2263 range
->exp
= NULL_TREE
;
2268 /* Optimize X == CST1 || X == CST2
2269 if popcount (CST1 ^ CST2) == 1 into
2270 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2271 Similarly for ranges. E.g.
2272 X != 2 && X != 3 && X != 10 && X != 11
2273 will be transformed by the previous optimization into
2274 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2275 and this loop can transform that into
2276 !(((X & ~8) - 2U) <= 1U). */
2279 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2280 tree lowi
, tree lowj
, tree highi
, tree highj
,
2281 vec
<operand_entry
*> *ops
,
2282 struct range_entry
*rangei
,
2283 struct range_entry
*rangej
)
2285 tree lowxor
, highxor
, tem
, exp
;
2286 /* Check lowi ^ lowj == highi ^ highj and
2287 popcount (lowi ^ lowj) == 1. */
2288 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2289 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2291 if (!integer_pow2p (lowxor
))
2293 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2294 if (!tree_int_cst_equal (lowxor
, highxor
))
2297 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2298 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2299 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2300 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2301 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2302 NULL
, rangei
->in_p
, lowj
, highj
,
2303 rangei
->strict_overflow_p
2304 || rangej
->strict_overflow_p
))
2309 /* Optimize X == CST1 || X == CST2
2310 if popcount (CST2 - CST1) == 1 into
2311 ((X - CST1) & ~(CST2 - CST1)) == 0.
2312 Similarly for ranges. E.g.
2313 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2314 || X == 75 || X == 45
2315 will be transformed by the previous optimization into
2316 (X - 43U) <= 3U || (X - 75U) <= 3U
2317 and this loop can transform that into
2318 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2320 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2321 tree lowi
, tree lowj
, tree highi
, tree highj
,
2322 vec
<operand_entry
*> *ops
,
2323 struct range_entry
*rangei
,
2324 struct range_entry
*rangej
)
2326 tree tem1
, tem2
, mask
;
2327 /* Check highi - lowi == highj - lowj. */
2328 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2329 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2331 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2332 if (!tree_int_cst_equal (tem1
, tem2
))
2334 /* Check popcount (lowj - lowi) == 1. */
2335 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2336 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2338 if (!integer_pow2p (tem1
))
2341 type
= unsigned_type_for (type
);
2342 tem1
= fold_convert (type
, tem1
);
2343 tem2
= fold_convert (type
, tem2
);
2344 lowi
= fold_convert (type
, lowi
);
2345 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2346 tem1
= fold_binary (MINUS_EXPR
, type
,
2347 fold_convert (type
, rangei
->exp
), lowi
);
2348 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2349 lowj
= build_int_cst (type
, 0);
2350 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2351 NULL
, rangei
->in_p
, lowj
, tem2
,
2352 rangei
->strict_overflow_p
2353 || rangej
->strict_overflow_p
))
2358 /* It does some common checks for function optimize_range_tests_xor and
2359 optimize_range_tests_diff.
2360 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2361 Else it calls optimize_range_tests_diff. */
2364 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2365 bool optimize_xor
, vec
<operand_entry
*> *ops
,
2366 struct range_entry
*ranges
)
2369 bool any_changes
= false;
2370 for (i
= first
; i
< length
; i
++)
2372 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2374 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2376 type
= TREE_TYPE (ranges
[i
].exp
);
2377 if (!INTEGRAL_TYPE_P (type
))
2379 lowi
= ranges
[i
].low
;
2380 if (lowi
== NULL_TREE
)
2381 lowi
= TYPE_MIN_VALUE (type
);
2382 highi
= ranges
[i
].high
;
2383 if (highi
== NULL_TREE
)
2385 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2388 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2390 lowj
= ranges
[j
].low
;
2391 if (lowj
== NULL_TREE
)
2393 highj
= ranges
[j
].high
;
2394 if (highj
== NULL_TREE
)
2395 highj
= TYPE_MAX_VALUE (type
);
2396 /* Check lowj > highi. */
2397 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2399 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2402 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2404 ranges
+ i
, ranges
+ j
);
2406 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2408 ranges
+ i
, ranges
+ j
);
2419 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2420 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2421 EXP on success, NULL otherwise. */
2424 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2425 wide_int
*mask
, tree
*totallowp
)
2427 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2428 if (tem
== NULL_TREE
2429 || TREE_CODE (tem
) != INTEGER_CST
2430 || TREE_OVERFLOW (tem
)
2431 || tree_int_cst_sgn (tem
) == -1
2432 || compare_tree_int (tem
, prec
) != -1)
2435 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2436 *mask
= wi::shifted_mask (0, max
, false, prec
);
2437 if (TREE_CODE (exp
) == BIT_AND_EXPR
2438 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2440 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2441 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2442 if (wi::popcount (msk
) == 1
2443 && wi::ltu_p (msk
, prec
- max
))
2445 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2446 max
+= msk
.to_uhwi ();
2447 exp
= TREE_OPERAND (exp
, 0);
2448 if (integer_zerop (low
)
2449 && TREE_CODE (exp
) == PLUS_EXPR
2450 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2452 tree ret
= TREE_OPERAND (exp
, 0);
2455 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2456 TYPE_PRECISION (TREE_TYPE (low
))));
2457 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
2463 else if (!tree_int_cst_lt (totallow
, tbias
))
2465 bias
= wi::to_widest (tbias
);
2466 bias
-= wi::to_widest (totallow
);
2467 if (wi::ges_p (bias
, 0) && wi::lts_p (bias
, prec
- max
))
2469 *mask
= wi::lshift (*mask
, bias
);
2477 if (!tree_int_cst_lt (totallow
, low
))
2479 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2480 if (tem
== NULL_TREE
2481 || TREE_CODE (tem
) != INTEGER_CST
2482 || TREE_OVERFLOW (tem
)
2483 || compare_tree_int (tem
, prec
- max
) == 1)
2486 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2490 /* Attempt to optimize small range tests using bit test.
2492 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2493 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2494 has been by earlier optimizations optimized into:
2495 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2496 As all the 43 through 82 range is less than 64 numbers,
2497 for 64-bit word targets optimize that into:
2498 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2501 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2502 vec
<operand_entry
*> *ops
,
2503 struct range_entry
*ranges
)
2506 bool any_changes
= false;
2507 int prec
= GET_MODE_BITSIZE (word_mode
);
2508 auto_vec
<struct range_entry
*, 64> candidates
;
2510 for (i
= first
; i
< length
- 2; i
++)
2512 tree lowi
, highi
, lowj
, highj
, type
;
2514 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2516 type
= TREE_TYPE (ranges
[i
].exp
);
2517 if (!INTEGRAL_TYPE_P (type
))
2519 lowi
= ranges
[i
].low
;
2520 if (lowi
== NULL_TREE
)
2521 lowi
= TYPE_MIN_VALUE (type
);
2522 highi
= ranges
[i
].high
;
2523 if (highi
== NULL_TREE
)
2526 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2527 highi
, &mask
, &lowi
);
2528 if (exp
== NULL_TREE
)
2530 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2531 candidates
.truncate (0);
2532 int end
= MIN (i
+ 64, length
);
2533 for (j
= i
+ 1; j
< end
; j
++)
2536 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2538 if (ranges
[j
].exp
== exp
)
2540 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2542 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2545 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2547 exp2
= TREE_OPERAND (exp2
, 0);
2557 lowj
= ranges
[j
].low
;
2558 if (lowj
== NULL_TREE
)
2560 highj
= ranges
[j
].high
;
2561 if (highj
== NULL_TREE
)
2562 highj
= TYPE_MAX_VALUE (type
);
2564 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2565 highj
, &mask2
, NULL
);
2569 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2570 candidates
.safe_push (&ranges
[j
]);
2573 /* If we need otherwise 3 or more comparisons, use a bit test. */
2574 if (candidates
.length () >= 2)
2576 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2577 wi::to_widest (lowi
)
2578 + prec
- 1 - wi::clz (mask
));
2579 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
2581 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2582 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2583 location_t loc
= gimple_location (stmt
);
2584 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2586 /* See if it isn't cheaper to pretend the minimum value of the
2587 range is 0, if maximum value is small enough.
2588 We can avoid then subtraction of the minimum value, but the
2589 mask constant could be perhaps more expensive. */
2590 if (compare_tree_int (lowi
, 0) > 0
2591 && compare_tree_int (high
, prec
) < 0)
2594 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2595 rtx reg
= gen_raw_REG (word_mode
, 10000);
2596 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2597 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2598 GEN_INT (-m
)), speed_p
);
2599 rtx r
= immed_wide_int_const (mask
, word_mode
);
2600 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2601 word_mode
, speed_p
);
2602 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2603 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2604 word_mode
, speed_p
);
2607 mask
= wi::lshift (mask
, m
);
2608 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2612 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2614 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2616 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2617 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2618 fold_convert_loc (loc
, etype
, exp
),
2619 fold_convert_loc (loc
, etype
, lowi
));
2620 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2621 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2622 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2623 build_int_cst (word_type
, 1), exp
);
2624 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2625 wide_int_to_tree (word_type
, mask
));
2626 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2627 build_zero_cst (word_type
));
2628 if (is_gimple_val (exp
))
2631 /* The shift might have undefined behavior if TEM is true,
2632 but reassociate_bb isn't prepared to have basic blocks
2633 split when it is running. So, temporarily emit a code
2634 with BIT_IOR_EXPR instead of &&, and fix it up in
2637 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2638 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2639 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2641 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2642 gimple_seq_add_seq_without_update (&seq
, seq2
);
2643 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2644 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2645 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
2646 BIT_IOR_EXPR
, tem
, exp
);
2647 gimple_set_location (g
, loc
);
2648 gimple_seq_add_stmt_without_update (&seq
, g
);
2649 exp
= gimple_assign_lhs (g
);
2650 tree val
= build_zero_cst (optype
);
2651 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2652 candidates
.length (), opcode
, ops
, exp
,
2653 seq
, false, val
, val
, strict_overflow_p
))
2656 reassoc_branch_fixups
.safe_push (tem
);
2659 gimple_seq_discard (seq
);
2665 /* Optimize range tests, similarly how fold_range_test optimizes
2666 it on trees. The tree code for the binary
2667 operation between all the operands is OPCODE.
2668 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2669 maybe_optimize_range_tests for inter-bb range optimization.
2670 In that case if oe->op is NULL, oe->id is bb->index whose
2671 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2672 the actual opcode. */
2675 optimize_range_tests (enum tree_code opcode
,
2676 vec
<operand_entry
*> *ops
)
2678 unsigned int length
= ops
->length (), i
, j
, first
;
2680 struct range_entry
*ranges
;
2681 bool any_changes
= false;
2686 ranges
= XNEWVEC (struct range_entry
, length
);
2687 for (i
= 0; i
< length
; i
++)
2691 init_range_entry (ranges
+ i
, oe
->op
,
2694 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2695 /* For | invert it now, we will invert it again before emitting
2696 the optimized expression. */
2697 if (opcode
== BIT_IOR_EXPR
2698 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2699 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2702 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2703 for (i
= 0; i
< length
; i
++)
2704 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2707 /* Try to merge ranges. */
2708 for (first
= i
; i
< length
; i
++)
2710 tree low
= ranges
[i
].low
;
2711 tree high
= ranges
[i
].high
;
2712 int in_p
= ranges
[i
].in_p
;
2713 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2714 int update_fail_count
= 0;
2716 for (j
= i
+ 1; j
< length
; j
++)
2718 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2720 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2721 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2723 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2729 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
2730 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
2731 low
, high
, strict_overflow_p
))
2736 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2737 while update_range_test would fail. */
2738 else if (update_fail_count
== 64)
2741 ++update_fail_count
;
2744 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2747 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2748 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2750 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
2751 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
2754 if (any_changes
&& opcode
!= ERROR_MARK
)
2757 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2759 if (oe
->op
== error_mark_node
)
2768 XDELETEVEC (ranges
);
2772 /* Return true if STMT is a cast like:
2778 # _345 = PHI <_123(N), 1(...), 1(...)>
2779 where _234 has bool type, _123 has single use and
2780 bb N has a single successor M. This is commonly used in
2781 the last block of a range test. */
2784 final_range_test_p (gimple
*stmt
)
2786 basic_block bb
, rhs_bb
;
2789 use_operand_p use_p
;
2792 if (!gimple_assign_cast_p (stmt
))
2794 bb
= gimple_bb (stmt
);
2795 if (!single_succ_p (bb
))
2797 e
= single_succ_edge (bb
);
2798 if (e
->flags
& EDGE_COMPLEX
)
2801 lhs
= gimple_assign_lhs (stmt
);
2802 rhs
= gimple_assign_rhs1 (stmt
);
2803 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2804 || TREE_CODE (rhs
) != SSA_NAME
2805 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2808 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2809 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2812 if (gimple_code (use_stmt
) != GIMPLE_PHI
2813 || gimple_bb (use_stmt
) != e
->dest
)
2816 /* And that the rhs is defined in the same loop. */
2817 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2819 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2825 /* Return true if BB is suitable basic block for inter-bb range test
2826 optimization. If BACKWARD is true, BB should be the only predecessor
2827 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2828 or compared with to find a common basic block to which all conditions
2829 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2830 be the only predecessor of BB. */
2833 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2836 edge_iterator ei
, ei2
;
2840 bool other_edge_seen
= false;
2845 /* Check last stmt first. */
2846 stmt
= last_stmt (bb
);
2848 || (gimple_code (stmt
) != GIMPLE_COND
2849 && (backward
|| !final_range_test_p (stmt
)))
2850 || gimple_visited_p (stmt
)
2851 || stmt_could_throw_p (stmt
)
2854 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2857 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2858 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2859 to *OTHER_BB (if not set yet, try to find it out). */
2860 if (EDGE_COUNT (bb
->succs
) != 2)
2862 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2864 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2866 if (e
->dest
== test_bb
)
2875 if (*other_bb
== NULL
)
2877 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2878 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2880 else if (e
->dest
== e2
->dest
)
2881 *other_bb
= e
->dest
;
2882 if (*other_bb
== NULL
)
2885 if (e
->dest
== *other_bb
)
2886 other_edge_seen
= true;
2890 if (*other_bb
== NULL
|| !other_edge_seen
)
2893 else if (single_succ (bb
) != *other_bb
)
2896 /* Now check all PHIs of *OTHER_BB. */
2897 e
= find_edge (bb
, *other_bb
);
2898 e2
= find_edge (test_bb
, *other_bb
);
2899 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2901 gphi
*phi
= gsi
.phi ();
2902 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2903 corresponding to BB and TEST_BB predecessor must be the same. */
2904 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2905 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2907 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2908 one of the PHIs should have the lhs of the last stmt in
2909 that block as PHI arg and that PHI should have 0 or 1
2910 corresponding to it in all other range test basic blocks
2914 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2915 == gimple_assign_lhs (stmt
)
2916 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2917 || integer_onep (gimple_phi_arg_def (phi
,
2923 gimple
*test_last
= last_stmt (test_bb
);
2924 if (gimple_code (test_last
) != GIMPLE_COND
2925 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2926 == gimple_assign_lhs (test_last
)
2927 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2928 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2938 /* Return true if BB doesn't have side-effects that would disallow
2939 range test optimization, all SSA_NAMEs set in the bb are consumed
2940 in the bb and there are no PHIs. */
2943 no_side_effect_bb (basic_block bb
)
2945 gimple_stmt_iterator gsi
;
2948 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2950 last
= last_stmt (bb
);
2951 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2953 gimple
*stmt
= gsi_stmt (gsi
);
2955 imm_use_iterator imm_iter
;
2956 use_operand_p use_p
;
2958 if (is_gimple_debug (stmt
))
2960 if (gimple_has_side_effects (stmt
))
2964 if (!is_gimple_assign (stmt
))
2966 lhs
= gimple_assign_lhs (stmt
);
2967 if (TREE_CODE (lhs
) != SSA_NAME
)
2969 if (gimple_assign_rhs_could_trap_p (stmt
))
2971 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2973 gimple
*use_stmt
= USE_STMT (use_p
);
2974 if (is_gimple_debug (use_stmt
))
2976 if (gimple_bb (use_stmt
) != bb
)
2983 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2984 return true and fill in *OPS recursively. */
2987 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
2990 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
2994 if (!is_reassociable_op (stmt
, code
, loop
))
2997 rhs
[0] = gimple_assign_rhs1 (stmt
);
2998 rhs
[1] = gimple_assign_rhs2 (stmt
);
2999 gimple_set_visited (stmt
, true);
3000 for (i
= 0; i
< 2; i
++)
3001 if (TREE_CODE (rhs
[i
]) == SSA_NAME
3002 && !get_ops (rhs
[i
], code
, ops
, loop
)
3003 && has_single_use (rhs
[i
]))
3005 operand_entry
*oe
= operand_entry_pool
.allocate ();
3011 ops
->safe_push (oe
);
3016 /* Find the ops that were added by get_ops starting from VAR, see if
3017 they were changed during update_range_test and if yes, create new
3021 update_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> ops
,
3022 unsigned int *pidx
, struct loop
*loop
)
3024 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3028 if (!is_reassociable_op (stmt
, code
, loop
))
3031 rhs
[0] = gimple_assign_rhs1 (stmt
);
3032 rhs
[1] = gimple_assign_rhs2 (stmt
);
3035 for (i
= 0; i
< 2; i
++)
3036 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
3038 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
3039 if (rhs
[2 + i
] == NULL_TREE
)
3041 if (has_single_use (rhs
[i
]))
3042 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
3044 rhs
[2 + i
] = rhs
[i
];
3047 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
3048 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
3050 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3051 var
= make_ssa_name (TREE_TYPE (var
));
3052 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
3054 gimple_set_uid (g
, gimple_uid (stmt
));
3055 gimple_set_visited (g
, true);
3056 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3061 /* Structure to track the initial value passed to get_ops and
3062 the range in the ops vector for each basic block. */
3064 struct inter_bb_range_test_entry
3067 unsigned int first_idx
, last_idx
;
3070 /* Inter-bb range test optimization.
3072 Returns TRUE if a gimple conditional is optimized to a true/false,
3073 otherwise return FALSE.
3075 This indicates to the caller that it should run a CFG cleanup pass
3076 once reassociation is completed. */
3079 maybe_optimize_range_tests (gimple
*stmt
)
3081 basic_block first_bb
= gimple_bb (stmt
);
3082 basic_block last_bb
= first_bb
;
3083 basic_block other_bb
= NULL
;
3087 auto_vec
<operand_entry
*> ops
;
3088 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3089 bool any_changes
= false;
3090 bool cfg_cleanup_needed
= false;
3092 /* Consider only basic blocks that end with GIMPLE_COND or
3093 a cast statement satisfying final_range_test_p. All
3094 but the last bb in the first_bb .. last_bb range
3095 should end with GIMPLE_COND. */
3096 if (gimple_code (stmt
) == GIMPLE_COND
)
3098 if (EDGE_COUNT (first_bb
->succs
) != 2)
3099 return cfg_cleanup_needed
;
3101 else if (final_range_test_p (stmt
))
3102 other_bb
= single_succ (first_bb
);
3104 return cfg_cleanup_needed
;
3106 if (stmt_could_throw_p (stmt
))
3107 return cfg_cleanup_needed
;
3109 /* As relative ordering of post-dominator sons isn't fixed,
3110 maybe_optimize_range_tests can be called first on any
3111 bb in the range we want to optimize. So, start searching
3112 backwards, if first_bb can be set to a predecessor. */
3113 while (single_pred_p (first_bb
))
3115 basic_block pred_bb
= single_pred (first_bb
);
3116 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3118 if (!no_side_effect_bb (first_bb
))
3122 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3123 Before starting forward search in last_bb successors, find
3124 out the other_bb. */
3125 if (first_bb
== last_bb
)
3128 /* As non-GIMPLE_COND last stmt always terminates the range,
3129 if forward search didn't discover anything, just give up. */
3130 if (gimple_code (stmt
) != GIMPLE_COND
)
3131 return cfg_cleanup_needed
;
3132 /* Look at both successors. Either it ends with a GIMPLE_COND
3133 and satisfies suitable_cond_bb, or ends with a cast and
3134 other_bb is that cast's successor. */
3135 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
3136 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
3137 || e
->dest
== first_bb
)
3138 return cfg_cleanup_needed
;
3139 else if (single_pred_p (e
->dest
))
3141 stmt
= last_stmt (e
->dest
);
3143 && gimple_code (stmt
) == GIMPLE_COND
3144 && EDGE_COUNT (e
->dest
->succs
) == 2)
3146 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
3152 && final_range_test_p (stmt
)
3153 && find_edge (first_bb
, single_succ (e
->dest
)))
3155 other_bb
= single_succ (e
->dest
);
3156 if (other_bb
== first_bb
)
3160 if (other_bb
== NULL
)
3161 return cfg_cleanup_needed
;
3163 /* Now do the forward search, moving last_bb to successor bbs
3164 that aren't other_bb. */
3165 while (EDGE_COUNT (last_bb
->succs
) == 2)
3167 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
3168 if (e
->dest
!= other_bb
)
3172 if (!single_pred_p (e
->dest
))
3174 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
3176 if (!no_side_effect_bb (e
->dest
))
3180 if (first_bb
== last_bb
)
3181 return cfg_cleanup_needed
;
3182 /* Here basic blocks first_bb through last_bb's predecessor
3183 end with GIMPLE_COND, all of them have one of the edges to
3184 other_bb and another to another block in the range,
3185 all blocks except first_bb don't have side-effects and
3186 last_bb ends with either GIMPLE_COND, or cast satisfying
3187 final_range_test_p. */
3188 for (bb
= last_bb
; ; bb
= single_pred (bb
))
3190 enum tree_code code
;
3192 inter_bb_range_test_entry bb_ent
;
3194 bb_ent
.op
= NULL_TREE
;
3195 bb_ent
.first_idx
= ops
.length ();
3196 bb_ent
.last_idx
= bb_ent
.first_idx
;
3197 e
= find_edge (bb
, other_bb
);
3198 stmt
= last_stmt (bb
);
3199 gimple_set_visited (stmt
, true);
3200 if (gimple_code (stmt
) != GIMPLE_COND
)
3202 use_operand_p use_p
;
3207 lhs
= gimple_assign_lhs (stmt
);
3208 rhs
= gimple_assign_rhs1 (stmt
);
3209 gcc_assert (bb
== last_bb
);
3216 # _345 = PHI <_123(N), 1(...), 1(...)>
3218 or 0 instead of 1. If it is 0, the _234
3219 range test is anded together with all the
3220 other range tests, if it is 1, it is ored with
3222 single_imm_use (lhs
, &use_p
, &phi
);
3223 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
3224 e2
= find_edge (first_bb
, other_bb
);
3226 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
3227 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
3228 code
= BIT_AND_EXPR
;
3231 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
3232 code
= BIT_IOR_EXPR
;
3235 /* If _234 SSA_NAME_DEF_STMT is
3237 (or &, corresponding to 1/0 in the phi arguments,
3238 push into ops the individual range test arguments
3239 of the bitwise or resp. and, recursively. */
3240 if (!get_ops (rhs
, code
, &ops
,
3241 loop_containing_stmt (stmt
))
3242 && has_single_use (rhs
))
3244 /* Otherwise, push the _234 range test itself. */
3245 operand_entry
*oe
= operand_entry_pool
.allocate ();
3255 bb_ent
.last_idx
= ops
.length ();
3257 bbinfo
.safe_push (bb_ent
);
3260 /* Otherwise stmt is GIMPLE_COND. */
3261 code
= gimple_cond_code (stmt
);
3262 lhs
= gimple_cond_lhs (stmt
);
3263 rhs
= gimple_cond_rhs (stmt
);
3264 if (TREE_CODE (lhs
) == SSA_NAME
3265 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3266 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
3267 || rhs
!= boolean_false_node
3268 /* Either push into ops the individual bitwise
3269 or resp. and operands, depending on which
3270 edge is other_bb. */
3271 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
3272 ^ (code
== EQ_EXPR
))
3273 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
3274 loop_containing_stmt (stmt
))))
3276 /* Or push the GIMPLE_COND stmt itself. */
3277 operand_entry
*oe
= operand_entry_pool
.allocate ();
3280 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
3281 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3282 /* oe->op = NULL signs that there is no SSA_NAME
3283 for the range test, and oe->id instead is the
3284 basic block number, at which's end the GIMPLE_COND
3292 else if (ops
.length () > bb_ent
.first_idx
)
3295 bb_ent
.last_idx
= ops
.length ();
3297 bbinfo
.safe_push (bb_ent
);
3301 if (ops
.length () > 1)
3302 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
3305 unsigned int idx
, max_idx
= 0;
3306 /* update_ops relies on has_single_use predicates returning the
3307 same values as it did during get_ops earlier. Additionally it
3308 never removes statements, only adds new ones and it should walk
3309 from the single imm use and check the predicate already before
3310 making those changes.
3311 On the other side, the handling of GIMPLE_COND directly can turn
3312 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3313 it needs to be done in a separate loop afterwards. */
3314 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3316 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3317 && bbinfo
[idx
].op
!= NULL_TREE
)
3322 stmt
= last_stmt (bb
);
3323 new_op
= update_ops (bbinfo
[idx
].op
,
3325 ops
[bbinfo
[idx
].first_idx
]->rank
,
3326 ops
, &bbinfo
[idx
].first_idx
,
3327 loop_containing_stmt (stmt
));
3328 if (new_op
== NULL_TREE
)
3330 gcc_assert (bb
== last_bb
);
3331 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
3333 if (bbinfo
[idx
].op
!= new_op
)
3335 imm_use_iterator iter
;
3336 use_operand_p use_p
;
3337 gimple
*use_stmt
, *cast_stmt
= NULL
;
3339 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
3340 if (is_gimple_debug (use_stmt
))
3342 else if (gimple_code (use_stmt
) == GIMPLE_COND
3343 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3344 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3345 SET_USE (use_p
, new_op
);
3346 else if (gimple_assign_cast_p (use_stmt
))
3347 cast_stmt
= use_stmt
;
3352 gcc_assert (bb
== last_bb
);
3353 tree lhs
= gimple_assign_lhs (cast_stmt
);
3354 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
3355 enum tree_code rhs_code
3356 = gimple_assign_rhs_code (cast_stmt
);
3358 if (is_gimple_min_invariant (new_op
))
3360 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
3361 g
= gimple_build_assign (new_lhs
, new_op
);
3364 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
3365 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
3366 gimple_set_uid (g
, gimple_uid (cast_stmt
));
3367 gimple_set_visited (g
, true);
3368 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3369 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3370 if (is_gimple_debug (use_stmt
))
3372 else if (gimple_code (use_stmt
) == GIMPLE_COND
3373 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3374 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3375 SET_USE (use_p
, new_lhs
);
3384 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3386 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3387 && bbinfo
[idx
].op
== NULL_TREE
3388 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
3390 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
3395 /* If we collapse the conditional to a true/false
3396 condition, then bubble that knowledge up to our caller. */
3397 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
3399 gimple_cond_make_false (cond_stmt
);
3400 cfg_cleanup_needed
= true;
3402 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
3404 gimple_cond_make_true (cond_stmt
);
3405 cfg_cleanup_needed
= true;
3409 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
3410 gimple_cond_set_lhs (cond_stmt
,
3411 ops
[bbinfo
[idx
].first_idx
]->op
);
3412 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
3414 update_stmt (cond_stmt
);
3420 /* The above changes could result in basic blocks after the first
3421 modified one, up to and including last_bb, to be executed even if
3422 they would not be in the original program. If the value ranges of
3423 assignment lhs' in those bbs were dependent on the conditions
3424 guarding those basic blocks which now can change, the VRs might
3425 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
3426 are only used within the same bb, it should be not a big deal if
3427 we just reset all the VRs in those bbs. See PR68671. */
3428 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
3429 reset_flow_sensitive_info_in_bb (bb
);
3431 return cfg_cleanup_needed
;
3434 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3435 of STMT in it's operands. This is also known as a "destructive
3436 update" operation. */
3439 is_phi_for_stmt (gimple
*stmt
, tree operand
)
3444 use_operand_p arg_p
;
3447 if (TREE_CODE (operand
) != SSA_NAME
)
3450 lhs
= gimple_assign_lhs (stmt
);
3452 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3453 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
3457 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
3458 if (lhs
== USE_FROM_PTR (arg_p
))
3463 /* Remove def stmt of VAR if VAR has zero uses and recurse
3464 on rhs1 operand if so. */
3467 remove_visited_stmt_chain (tree var
)
3470 gimple_stmt_iterator gsi
;
3474 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3476 stmt
= SSA_NAME_DEF_STMT (var
);
3477 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3479 var
= gimple_assign_rhs1 (stmt
);
3480 gsi
= gsi_for_stmt (stmt
);
3481 reassoc_remove_stmt (&gsi
);
3482 release_defs (stmt
);
3489 /* This function checks three consequtive operands in
3490 passed operands vector OPS starting from OPINDEX and
3491 swaps two operands if it is profitable for binary operation
3492 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3494 We pair ops with the same rank if possible.
3496 The alternative we try is to see if STMT is a destructive
3497 update style statement, which is like:
3500 In that case, we want to use the destructive update form to
3501 expose the possible vectorizer sum reduction opportunity.
3502 In that case, the third operand will be the phi node. This
3503 check is not performed if STMT is null.
3505 We could, of course, try to be better as noted above, and do a
3506 lot of work to try to find these opportunities in >3 operand
3507 cases, but it is unlikely to be worth it. */
3510 swap_ops_for_binary_stmt (vec
<operand_entry
*> ops
,
3511 unsigned int opindex
, gimple
*stmt
)
3513 operand_entry
*oe1
, *oe2
, *oe3
;
3516 oe2
= ops
[opindex
+ 1];
3517 oe3
= ops
[opindex
+ 2];
3519 if ((oe1
->rank
== oe2
->rank
3520 && oe2
->rank
!= oe3
->rank
)
3521 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3522 && !is_phi_for_stmt (stmt
, oe1
->op
)
3523 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3525 operand_entry temp
= *oe3
;
3527 oe3
->rank
= oe1
->rank
;
3529 oe1
->rank
= temp
.rank
;
3531 else if ((oe1
->rank
== oe3
->rank
3532 && oe2
->rank
!= oe3
->rank
)
3533 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3534 && !is_phi_for_stmt (stmt
, oe1
->op
)
3535 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3537 operand_entry temp
= *oe2
;
3539 oe2
->rank
= oe1
->rank
;
3541 oe1
->rank
= temp
.rank
;
3545 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3546 two definitions, otherwise return STMT. */
3548 static inline gimple
*
3549 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
)
3551 if (TREE_CODE (rhs1
) == SSA_NAME
3552 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3553 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3554 if (TREE_CODE (rhs2
) == SSA_NAME
3555 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3556 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3560 /* Recursively rewrite our linearized statements so that the operators
3561 match those in OPS[OPINDEX], putting the computation in rank
3562 order. Return new lhs. */
3565 rewrite_expr_tree (gimple
*stmt
, unsigned int opindex
,
3566 vec
<operand_entry
*> ops
, bool changed
)
3568 tree rhs1
= gimple_assign_rhs1 (stmt
);
3569 tree rhs2
= gimple_assign_rhs2 (stmt
);
3570 tree lhs
= gimple_assign_lhs (stmt
);
3573 /* The final recursion case for this function is that you have
3574 exactly two operations left.
3575 If we had exactly one op in the entire list to start with, we
3576 would have never called this function, and the tail recursion
3577 rewrites them one at a time. */
3578 if (opindex
+ 2 == ops
.length ())
3580 operand_entry
*oe1
, *oe2
;
3583 oe2
= ops
[opindex
+ 1];
3585 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3587 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3588 unsigned int uid
= gimple_uid (stmt
);
3590 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3592 fprintf (dump_file
, "Transforming ");
3593 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3596 /* Even when changed is false, reassociation could have e.g. removed
3597 some redundant operations, so unless we are just swapping the
3598 arguments or unless there is no change at all (then we just
3599 return lhs), force creation of a new SSA_NAME. */
3600 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
3602 gimple
*insert_point
3603 = find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3604 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3606 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3608 gimple_set_uid (stmt
, uid
);
3609 gimple_set_visited (stmt
, true);
3610 if (insert_point
== gsi_stmt (gsi
))
3611 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3613 insert_stmt_after (stmt
, insert_point
);
3617 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3619 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3620 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3624 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3625 remove_visited_stmt_chain (rhs1
);
3627 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3629 fprintf (dump_file
, " into ");
3630 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3636 /* If we hit here, we should have 3 or more ops left. */
3637 gcc_assert (opindex
+ 2 < ops
.length ());
3639 /* Rewrite the next operator. */
3642 /* Recurse on the LHS of the binary operator, which is guaranteed to
3643 be the non-leaf side. */
3645 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3646 changed
|| oe
->op
!= rhs2
);
3648 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3650 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3652 fprintf (dump_file
, "Transforming ");
3653 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3656 /* If changed is false, this is either opindex == 0
3657 or all outer rhs2's were equal to corresponding oe->op,
3658 and powi_result is NULL.
3659 That means lhs is equivalent before and after reassociation.
3660 Otherwise ensure the old lhs SSA_NAME is not reused and
3661 create a new stmt as well, so that any debug stmts will be
3662 properly adjusted. */
3665 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3666 unsigned int uid
= gimple_uid (stmt
);
3667 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3669 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3670 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3672 gimple_set_uid (stmt
, uid
);
3673 gimple_set_visited (stmt
, true);
3674 if (insert_point
== gsi_stmt (gsi
))
3675 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3677 insert_stmt_after (stmt
, insert_point
);
3681 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3683 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3684 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3688 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3690 fprintf (dump_file
, " into ");
3691 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3697 /* Find out how many cycles we need to compute statements chain.
3698 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3699 maximum number of independent statements we may execute per cycle. */
3702 get_required_cycles (int ops_num
, int cpu_width
)
3708 /* While we have more than 2 * cpu_width operands
3709 we may reduce number of operands by cpu_width
3711 res
= ops_num
/ (2 * cpu_width
);
3713 /* Remained operands count may be reduced twice per cycle
3714 until we have only one operand. */
3715 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3716 elog
= exact_log2 (rest
);
3720 res
+= floor_log2 (rest
) + 1;
3725 /* Returns an optimal number of registers to use for computation of
3726 given statements. */
3729 get_reassociation_width (int ops_num
, enum tree_code opc
,
3732 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3737 if (param_width
> 0)
3738 width
= param_width
;
3740 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3745 /* Get the minimal time required for sequence computation. */
3746 cycles_best
= get_required_cycles (ops_num
, width
);
3748 /* Check if we may use less width and still compute sequence for
3749 the same time. It will allow us to reduce registers usage.
3750 get_required_cycles is monotonically increasing with lower width
3751 so we can perform a binary search for the minimal width that still
3752 results in the optimal cycle count. */
3754 while (width
> width_min
)
3756 int width_mid
= (width
+ width_min
) / 2;
3758 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3760 else if (width_min
< width_mid
)
3761 width_min
= width_mid
;
3769 /* Recursively rewrite our linearized statements so that the operators
3770 match those in OPS[OPINDEX], putting the computation in rank
3771 order and trying to allow operations to be executed in
3775 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
3776 vec
<operand_entry
*> ops
)
3778 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3779 int op_num
= ops
.length ();
3780 int stmt_num
= op_num
- 1;
3781 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
3782 int op_index
= op_num
- 1;
3784 int ready_stmts_end
= 0;
3786 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3788 /* We start expression rewriting from the top statements.
3789 So, in this loop we create a full list of statements
3790 we will work with. */
3791 stmts
[stmt_num
- 1] = stmt
;
3792 for (i
= stmt_num
- 2; i
>= 0; i
--)
3793 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3795 for (i
= 0; i
< stmt_num
; i
++)
3799 /* Determine whether we should use results of
3800 already handled statements or not. */
3801 if (ready_stmts_end
== 0
3802 && (i
- stmt_index
>= width
|| op_index
< 1))
3803 ready_stmts_end
= i
;
3805 /* Now we choose operands for the next statement. Non zero
3806 value in ready_stmts_end means here that we should use
3807 the result of already generated statements as new operand. */
3808 if (ready_stmts_end
> 0)
3810 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3811 if (ready_stmts_end
> stmt_index
)
3812 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3813 else if (op_index
>= 0)
3814 op2
= ops
[op_index
--]->op
;
3817 gcc_assert (stmt_index
< i
);
3818 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3821 if (stmt_index
>= ready_stmts_end
)
3822 ready_stmts_end
= 0;
3827 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3828 op2
= ops
[op_index
--]->op
;
3829 op1
= ops
[op_index
--]->op
;
3832 /* If we emit the last statement then we should put
3833 operands into the last statement. It will also
3835 if (op_index
< 0 && stmt_index
== i
)
3838 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3840 fprintf (dump_file
, "Transforming ");
3841 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3844 /* We keep original statement only for the last one. All
3845 others are recreated. */
3846 if (i
== stmt_num
- 1)
3848 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3849 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3850 update_stmt (stmts
[i
]);
3853 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3855 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3857 fprintf (dump_file
, " into ");
3858 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3862 remove_visited_stmt_chain (last_rhs1
);
3865 /* Transform STMT, which is really (A +B) + (C + D) into the left
3866 linear form, ((A+B)+C)+D.
3867 Recurse on D if necessary. */
3870 linearize_expr (gimple
*stmt
)
3872 gimple_stmt_iterator gsi
;
3873 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3874 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3875 gimple
*oldbinrhs
= binrhs
;
3876 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3877 gimple
*newbinrhs
= NULL
;
3878 struct loop
*loop
= loop_containing_stmt (stmt
);
3879 tree lhs
= gimple_assign_lhs (stmt
);
3881 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3882 && is_reassociable_op (binrhs
, rhscode
, loop
));
3884 gsi
= gsi_for_stmt (stmt
);
3886 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3887 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
3888 gimple_assign_rhs_code (binrhs
),
3889 gimple_assign_lhs (binlhs
),
3890 gimple_assign_rhs2 (binrhs
));
3891 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3892 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3893 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3895 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3896 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3898 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3900 fprintf (dump_file
, "Linearized: ");
3901 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3904 reassociate_stats
.linearized
++;
3907 gsi
= gsi_for_stmt (oldbinrhs
);
3908 reassoc_remove_stmt (&gsi
);
3909 release_defs (oldbinrhs
);
3911 gimple_set_visited (stmt
, true);
3912 gimple_set_visited (binlhs
, true);
3913 gimple_set_visited (binrhs
, true);
3915 /* Tail recurse on the new rhs if it still needs reassociation. */
3916 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3917 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3918 want to change the algorithm while converting to tuples. */
3919 linearize_expr (stmt
);
3922 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3923 it. Otherwise, return NULL. */
3926 get_single_immediate_use (tree lhs
)
3928 use_operand_p immuse
;
3931 if (TREE_CODE (lhs
) == SSA_NAME
3932 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3933 && is_gimple_assign (immusestmt
))
3939 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3940 representing the negated value. Insertions of any necessary
3941 instructions go before GSI.
3942 This function is recursive in that, if you hand it "a_5" as the
3943 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3944 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3947 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3949 gimple
*negatedefstmt
= NULL
;
3950 tree resultofnegate
;
3951 gimple_stmt_iterator gsi
;
3954 /* If we are trying to negate a name, defined by an add, negate the
3955 add operands instead. */
3956 if (TREE_CODE (tonegate
) == SSA_NAME
)
3957 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3958 if (TREE_CODE (tonegate
) == SSA_NAME
3959 && is_gimple_assign (negatedefstmt
)
3960 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3961 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3962 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3964 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3965 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3966 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3969 gsi
= gsi_for_stmt (negatedefstmt
);
3970 rhs1
= negate_value (rhs1
, &gsi
);
3972 gsi
= gsi_for_stmt (negatedefstmt
);
3973 rhs2
= negate_value (rhs2
, &gsi
);
3975 gsi
= gsi_for_stmt (negatedefstmt
);
3976 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3977 gimple_set_visited (negatedefstmt
, true);
3978 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
3979 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3980 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3984 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3985 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3986 NULL_TREE
, true, GSI_SAME_STMT
);
3988 uid
= gimple_uid (gsi_stmt (gsi
));
3989 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3991 gimple
*stmt
= gsi_stmt (gsi
);
3992 if (gimple_uid (stmt
) != 0)
3994 gimple_set_uid (stmt
, uid
);
3996 return resultofnegate
;
3999 /* Return true if we should break up the subtract in STMT into an add
4000 with negate. This is true when we the subtract operands are really
4001 adds, or the subtract itself is used in an add expression. In
4002 either case, breaking up the subtract into an add with negate
4003 exposes the adds to reassociation. */
4006 should_break_up_subtract (gimple
*stmt
)
4008 tree lhs
= gimple_assign_lhs (stmt
);
4009 tree binlhs
= gimple_assign_rhs1 (stmt
);
4010 tree binrhs
= gimple_assign_rhs2 (stmt
);
4012 struct loop
*loop
= loop_containing_stmt (stmt
);
4014 if (TREE_CODE (binlhs
) == SSA_NAME
4015 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
4018 if (TREE_CODE (binrhs
) == SSA_NAME
4019 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
4022 if (TREE_CODE (lhs
) == SSA_NAME
4023 && (immusestmt
= get_single_immediate_use (lhs
))
4024 && is_gimple_assign (immusestmt
)
4025 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
4026 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
4031 /* Transform STMT from A - B into A + -B. */
4034 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
4036 tree rhs1
= gimple_assign_rhs1 (stmt
);
4037 tree rhs2
= gimple_assign_rhs2 (stmt
);
4039 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4041 fprintf (dump_file
, "Breaking up subtract ");
4042 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4045 rhs2
= negate_value (rhs2
, gsip
);
4046 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
4050 /* Determine whether STMT is a builtin call that raises an SSA name
4051 to an integer power and has only one use. If so, and this is early
4052 reassociation and unsafe math optimizations are permitted, place
4053 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4054 If any of these conditions does not hold, return FALSE. */
4057 acceptable_pow_call (gimple
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
4060 REAL_VALUE_TYPE c
, cint
;
4062 if (!reassoc_insert_powi_p
4063 || !flag_unsafe_math_optimizations
4064 || !is_gimple_call (stmt
)
4065 || !has_single_use (gimple_call_lhs (stmt
)))
4068 switch (gimple_call_combined_fn (stmt
))
4071 if (flag_errno_math
)
4074 *base
= gimple_call_arg (stmt
, 0);
4075 arg1
= gimple_call_arg (stmt
, 1);
4077 if (TREE_CODE (arg1
) != REAL_CST
)
4080 c
= TREE_REAL_CST (arg1
);
4082 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
4085 *exponent
= real_to_integer (&c
);
4086 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
4087 if (!real_identical (&c
, &cint
))
4093 *base
= gimple_call_arg (stmt
, 0);
4094 arg1
= gimple_call_arg (stmt
, 1);
4096 if (!tree_fits_shwi_p (arg1
))
4099 *exponent
= tree_to_shwi (arg1
);
4106 /* Expanding negative exponents is generally unproductive, so we don't
4107 complicate matters with those. Exponents of zero and one should
4108 have been handled by expression folding. */
4109 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
4115 /* Recursively linearize a binary expression that is the RHS of STMT.
4116 Place the operands of the expression tree in the vector named OPS. */
4119 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
4120 bool is_associative
, bool set_visited
)
4122 tree binlhs
= gimple_assign_rhs1 (stmt
);
4123 tree binrhs
= gimple_assign_rhs2 (stmt
);
4124 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
4125 bool binlhsisreassoc
= false;
4126 bool binrhsisreassoc
= false;
4127 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4128 struct loop
*loop
= loop_containing_stmt (stmt
);
4129 tree base
= NULL_TREE
;
4130 HOST_WIDE_INT exponent
= 0;
4133 gimple_set_visited (stmt
, true);
4135 if (TREE_CODE (binlhs
) == SSA_NAME
)
4137 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
4138 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
4139 && !stmt_could_throw_p (binlhsdef
));
4142 if (TREE_CODE (binrhs
) == SSA_NAME
)
4144 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
4145 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
4146 && !stmt_could_throw_p (binrhsdef
));
4149 /* If the LHS is not reassociable, but the RHS is, we need to swap
4150 them. If neither is reassociable, there is nothing we can do, so
4151 just put them in the ops vector. If the LHS is reassociable,
4152 linearize it. If both are reassociable, then linearize the RHS
4155 if (!binlhsisreassoc
)
4157 /* If this is not a associative operation like division, give up. */
4158 if (!is_associative
)
4160 add_to_ops_vec (ops
, binrhs
);
4164 if (!binrhsisreassoc
)
4166 if (rhscode
== MULT_EXPR
4167 && TREE_CODE (binrhs
) == SSA_NAME
4168 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
4170 add_repeat_to_ops_vec (ops
, base
, exponent
);
4171 gimple_set_visited (binrhsdef
, true);
4174 add_to_ops_vec (ops
, binrhs
);
4176 if (rhscode
== MULT_EXPR
4177 && TREE_CODE (binlhs
) == SSA_NAME
4178 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
4180 add_repeat_to_ops_vec (ops
, base
, exponent
);
4181 gimple_set_visited (binlhsdef
, true);
4184 add_to_ops_vec (ops
, binlhs
);
4189 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4191 fprintf (dump_file
, "swapping operands of ");
4192 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4195 swap_ssa_operands (stmt
,
4196 gimple_assign_rhs1_ptr (stmt
),
4197 gimple_assign_rhs2_ptr (stmt
));
4200 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4202 fprintf (dump_file
, " is now ");
4203 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4206 /* We want to make it so the lhs is always the reassociative op,
4208 std::swap (binlhs
, binrhs
);
4210 else if (binrhsisreassoc
)
4212 linearize_expr (stmt
);
4213 binlhs
= gimple_assign_rhs1 (stmt
);
4214 binrhs
= gimple_assign_rhs2 (stmt
);
4217 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
4218 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
4220 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
4221 is_associative
, set_visited
);
4223 if (rhscode
== MULT_EXPR
4224 && TREE_CODE (binrhs
) == SSA_NAME
4225 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
4227 add_repeat_to_ops_vec (ops
, base
, exponent
);
4228 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
4231 add_to_ops_vec (ops
, binrhs
);
4234 /* Repropagate the negates back into subtracts, since no other pass
4235 currently does it. */
4238 repropagate_negates (void)
4243 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
4245 gimple
*user
= get_single_immediate_use (negate
);
4247 if (!user
|| !is_gimple_assign (user
))
4250 /* The negate operand can be either operand of a PLUS_EXPR
4251 (it can be the LHS if the RHS is a constant for example).
4253 Force the negate operand to the RHS of the PLUS_EXPR, then
4254 transform the PLUS_EXPR into a MINUS_EXPR. */
4255 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
4257 /* If the negated operand appears on the LHS of the
4258 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4259 to force the negated operand to the RHS of the PLUS_EXPR. */
4260 if (gimple_assign_rhs1 (user
) == negate
)
4262 swap_ssa_operands (user
,
4263 gimple_assign_rhs1_ptr (user
),
4264 gimple_assign_rhs2_ptr (user
));
4267 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4268 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4269 if (gimple_assign_rhs2 (user
) == negate
)
4271 tree rhs1
= gimple_assign_rhs1 (user
);
4272 tree rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
4273 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4274 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
4278 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
4280 if (gimple_assign_rhs1 (user
) == negate
)
4285 which we transform into
4288 This pushes down the negate which we possibly can merge
4289 into some other operation, hence insert it into the
4290 plus_negates vector. */
4291 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
4292 tree a
= gimple_assign_rhs1 (feed
);
4293 tree b
= gimple_assign_rhs2 (user
);
4294 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
4295 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
4296 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
4297 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
4298 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
4299 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
4300 user
= gsi_stmt (gsi2
);
4302 reassoc_remove_stmt (&gsi
);
4303 release_defs (feed
);
4304 plus_negates
.safe_push (gimple_assign_lhs (user
));
4308 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4309 rid of one operation. */
4310 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
4311 tree a
= gimple_assign_rhs1 (feed
);
4312 tree rhs1
= gimple_assign_rhs1 (user
);
4313 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4314 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
4315 update_stmt (gsi_stmt (gsi
));
4321 /* Returns true if OP is of a type for which we can do reassociation.
4322 That is for integral or non-saturating fixed-point types, and for
4323 floating point type when associative-math is enabled. */
4326 can_reassociate_p (tree op
)
4328 tree type
= TREE_TYPE (op
);
4329 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
4330 || NON_SAT_FIXED_POINT_TYPE_P (type
)
4331 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
4336 /* Break up subtract operations in block BB.
4338 We do this top down because we don't know whether the subtract is
4339 part of a possible chain of reassociation except at the top.
4348 we want to break up k = t - q, but we won't until we've transformed q
4349 = b - r, which won't be broken up until we transform b = c - d.
4351 En passant, clear the GIMPLE visited flag on every statement
4352 and set UIDs within each basic block. */
4355 break_up_subtract_bb (basic_block bb
)
4357 gimple_stmt_iterator gsi
;
4359 unsigned int uid
= 1;
4361 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4363 gimple
*stmt
= gsi_stmt (gsi
);
4364 gimple_set_visited (stmt
, false);
4365 gimple_set_uid (stmt
, uid
++);
4367 if (!is_gimple_assign (stmt
)
4368 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
4371 /* Look for simple gimple subtract operations. */
4372 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
4374 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
4375 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
4378 /* Check for a subtract used only in an addition. If this
4379 is the case, transform it into add of a negate for better
4380 reassociation. IE transform C = A-B into C = A + -B if C
4381 is only used in an addition. */
4382 if (should_break_up_subtract (stmt
))
4383 break_up_subtract (stmt
, &gsi
);
4385 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
4386 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
4387 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
4389 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
4391 son
= next_dom_son (CDI_DOMINATORS
, son
))
4392 break_up_subtract_bb (son
);
4395 /* Used for repeated factor analysis. */
4396 struct repeat_factor
4398 /* An SSA name that occurs in a multiply chain. */
4401 /* Cached rank of the factor. */
4404 /* Number of occurrences of the factor in the chain. */
4405 HOST_WIDE_INT count
;
4407 /* An SSA name representing the product of this factor and
4408 all factors appearing later in the repeated factor vector. */
4413 static vec
<repeat_factor
> repeat_factor_vec
;
4415 /* Used for sorting the repeat factor vector. Sort primarily by
4416 ascending occurrence count, secondarily by descending rank. */
4419 compare_repeat_factors (const void *x1
, const void *x2
)
4421 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
4422 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
4424 if (rf1
->count
!= rf2
->count
)
4425 return rf1
->count
- rf2
->count
;
4427 return rf2
->rank
- rf1
->rank
;
4430 /* Look for repeated operands in OPS in the multiply tree rooted at
4431 STMT. Replace them with an optimal sequence of multiplies and powi
4432 builtin calls, and remove the used operands from OPS. Return an
4433 SSA name representing the value of the replacement sequence. */
4436 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
4438 unsigned i
, j
, vec_len
;
4441 repeat_factor
*rf1
, *rf2
;
4442 repeat_factor rfnew
;
4443 tree result
= NULL_TREE
;
4444 tree target_ssa
, iter_result
;
4445 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4446 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4447 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4448 gimple
*mul_stmt
, *pow_stmt
;
4450 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4455 /* Allocate the repeated factor vector. */
4456 repeat_factor_vec
.create (10);
4458 /* Scan the OPS vector for all SSA names in the product and build
4459 up a vector of occurrence counts for each factor. */
4460 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4462 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4464 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4466 if (rf1
->factor
== oe
->op
)
4468 rf1
->count
+= oe
->count
;
4473 if (j
>= repeat_factor_vec
.length ())
4475 rfnew
.factor
= oe
->op
;
4476 rfnew
.rank
= oe
->rank
;
4477 rfnew
.count
= oe
->count
;
4478 rfnew
.repr
= NULL_TREE
;
4479 repeat_factor_vec
.safe_push (rfnew
);
4484 /* Sort the repeated factor vector by (a) increasing occurrence count,
4485 and (b) decreasing rank. */
4486 repeat_factor_vec
.qsort (compare_repeat_factors
);
4488 /* It is generally best to combine as many base factors as possible
4489 into a product before applying __builtin_powi to the result.
4490 However, the sort order chosen for the repeated factor vector
4491 allows us to cache partial results for the product of the base
4492 factors for subsequent use. When we already have a cached partial
4493 result from a previous iteration, it is best to make use of it
4494 before looking for another __builtin_pow opportunity.
4496 As an example, consider x * x * y * y * y * z * z * z * z.
4497 We want to first compose the product x * y * z, raise it to the
4498 second power, then multiply this by y * z, and finally multiply
4499 by z. This can be done in 5 multiplies provided we cache y * z
4500 for use in both expressions:
4508 If we instead ignored the cached y * z and first multiplied by
4509 the __builtin_pow opportunity z * z, we would get the inferior:
4518 vec_len
= repeat_factor_vec
.length ();
4520 /* Repeatedly look for opportunities to create a builtin_powi call. */
4523 HOST_WIDE_INT power
;
4525 /* First look for the largest cached product of factors from
4526 preceding iterations. If found, create a builtin_powi for
4527 it if the minimum occurrence count for its factors is at
4528 least 2, or just use this cached product as our next
4529 multiplicand if the minimum occurrence count is 1. */
4530 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4532 if (rf1
->repr
&& rf1
->count
> 0)
4542 iter_result
= rf1
->repr
;
4544 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4548 fputs ("Multiplying by cached product ", dump_file
);
4549 for (elt
= j
; elt
< vec_len
; elt
++)
4551 rf
= &repeat_factor_vec
[elt
];
4552 print_generic_expr (dump_file
, rf
->factor
, 0);
4553 if (elt
< vec_len
- 1)
4554 fputs (" * ", dump_file
);
4556 fputs ("\n", dump_file
);
4561 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4562 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4563 build_int_cst (integer_type_node
,
4565 gimple_call_set_lhs (pow_stmt
, iter_result
);
4566 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4567 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
4568 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4570 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4574 fputs ("Building __builtin_pow call for cached product (",
4576 for (elt
= j
; elt
< vec_len
; elt
++)
4578 rf
= &repeat_factor_vec
[elt
];
4579 print_generic_expr (dump_file
, rf
->factor
, 0);
4580 if (elt
< vec_len
- 1)
4581 fputs (" * ", dump_file
);
4583 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
4590 /* Otherwise, find the first factor in the repeated factor
4591 vector whose occurrence count is at least 2. If no such
4592 factor exists, there are no builtin_powi opportunities
4594 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4596 if (rf1
->count
>= 2)
4605 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4609 fputs ("Building __builtin_pow call for (", dump_file
);
4610 for (elt
= j
; elt
< vec_len
; elt
++)
4612 rf
= &repeat_factor_vec
[elt
];
4613 print_generic_expr (dump_file
, rf
->factor
, 0);
4614 if (elt
< vec_len
- 1)
4615 fputs (" * ", dump_file
);
4617 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4620 reassociate_stats
.pows_created
++;
4622 /* Visit each element of the vector in reverse order (so that
4623 high-occurrence elements are visited first, and within the
4624 same occurrence count, lower-ranked elements are visited
4625 first). Form a linear product of all elements in this order
4626 whose occurrencce count is at least that of element J.
4627 Record the SSA name representing the product of each element
4628 with all subsequent elements in the vector. */
4629 if (j
== vec_len
- 1)
4630 rf1
->repr
= rf1
->factor
;
4633 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4637 rf1
= &repeat_factor_vec
[ii
];
4638 rf2
= &repeat_factor_vec
[ii
+ 1];
4640 /* Init the last factor's representative to be itself. */
4642 rf2
->repr
= rf2
->factor
;
4647 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4648 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
4650 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4651 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
4652 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4653 rf1
->repr
= target_ssa
;
4655 /* Don't reprocess the multiply we just introduced. */
4656 gimple_set_visited (mul_stmt
, true);
4660 /* Form a call to __builtin_powi for the maximum product
4661 just formed, raised to the power obtained earlier. */
4662 rf1
= &repeat_factor_vec
[j
];
4663 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4664 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4665 build_int_cst (integer_type_node
,
4667 gimple_call_set_lhs (pow_stmt
, iter_result
);
4668 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4669 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
4670 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4673 /* If we previously formed at least one other builtin_powi call,
4674 form the product of this one and those others. */
4677 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4678 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
4679 result
, iter_result
);
4680 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4681 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
4682 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4683 gimple_set_visited (mul_stmt
, true);
4684 result
= new_result
;
4687 result
= iter_result
;
4689 /* Decrement the occurrence count of each element in the product
4690 by the count found above, and remove this many copies of each
4692 for (i
= j
; i
< vec_len
; i
++)
4697 rf1
= &repeat_factor_vec
[i
];
4698 rf1
->count
-= power
;
4700 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4702 if (oe
->op
== rf1
->factor
)
4706 ops
->ordered_remove (n
);
4722 /* At this point all elements in the repeated factor vector have a
4723 remaining occurrence count of 0 or 1, and those with a count of 1
4724 don't have cached representatives. Re-sort the ops vector and
4726 ops
->qsort (sort_by_operand_rank
);
4727 repeat_factor_vec
.release ();
4729 /* Return the final product computed herein. Note that there may
4730 still be some elements with single occurrence count left in OPS;
4731 those will be handled by the normal reassociation logic. */
4735 /* Attempt to optimize
4736 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
4737 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
4740 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
4744 unsigned int length
= ops
->length ();
4745 tree cst
= ops
->last ()->op
;
4747 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
4750 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4752 if (TREE_CODE (oe
->op
) == SSA_NAME
4753 && has_single_use (oe
->op
))
4755 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
4756 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
4759 switch (gimple_call_combined_fn (old_call
))
4762 arg0
= gimple_call_arg (old_call
, 0);
4763 arg1
= gimple_call_arg (old_call
, 1);
4764 /* The first argument of copysign must be a constant,
4765 otherwise there's nothing to do. */
4766 if (TREE_CODE (arg0
) == REAL_CST
)
4768 tree type
= TREE_TYPE (arg0
);
4769 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
4770 /* If we couldn't fold to a single constant, skip it.
4771 That happens e.g. for inexact multiplication when
4773 if (mul
== NULL_TREE
)
4775 /* Instead of adjusting OLD_CALL, let's build a new
4776 call to not leak the LHS and prevent keeping bogus
4777 debug statements. DCE will clean up the old call. */
4779 if (gimple_call_internal_p (old_call
))
4780 new_call
= gimple_build_call_internal
4781 (IFN_COPYSIGN
, 2, mul
, arg1
);
4783 new_call
= gimple_build_call
4784 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
4785 tree lhs
= make_ssa_name (type
);
4786 gimple_call_set_lhs (new_call
, lhs
);
4787 gimple_set_location (new_call
,
4788 gimple_location (old_call
));
4789 insert_stmt_after (new_call
, old_call
);
4790 /* We've used the constant, get rid of it. */
4792 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
4793 /* Handle the CST1 < 0 case by negating the result. */
4796 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
4798 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
4799 insert_stmt_after (negate_stmt
, new_call
);
4804 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4806 fprintf (dump_file
, "Optimizing copysign: ");
4807 print_generic_expr (dump_file
, cst
, 0);
4808 fprintf (dump_file
, " * COPYSIGN (");
4809 print_generic_expr (dump_file
, arg0
, 0);
4810 fprintf (dump_file
, ", ");
4811 print_generic_expr (dump_file
, arg1
, 0);
4812 fprintf (dump_file
, ") into %sCOPYSIGN (",
4813 cst1_neg
? "-" : "");
4814 print_generic_expr (dump_file
, mul
, 0);
4815 fprintf (dump_file
, ", ");
4816 print_generic_expr (dump_file
, arg1
, 0);
4817 fprintf (dump_file
, "\n");
4830 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4833 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
4837 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4839 fprintf (dump_file
, "Transforming ");
4840 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4843 rhs1
= gimple_assign_rhs1 (stmt
);
4844 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4846 remove_visited_stmt_chain (rhs1
);
4848 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4850 fprintf (dump_file
, " into ");
4851 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4855 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4858 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
4859 tree rhs1
, tree rhs2
)
4861 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4863 fprintf (dump_file
, "Transforming ");
4864 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4867 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4868 update_stmt (gsi_stmt (*gsi
));
4869 remove_visited_stmt_chain (rhs1
);
4871 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4873 fprintf (dump_file
, " into ");
4874 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4878 /* Reassociate expressions in basic block BB and its post-dominator as
4881 Bubble up return status from maybe_optimize_range_tests. */
4884 reassociate_bb (basic_block bb
)
4886 gimple_stmt_iterator gsi
;
4888 gimple
*stmt
= last_stmt (bb
);
4889 bool cfg_cleanup_needed
= false;
4891 if (stmt
&& !gimple_visited_p (stmt
))
4892 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
4894 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4896 stmt
= gsi_stmt (gsi
);
4898 if (is_gimple_assign (stmt
)
4899 && !stmt_could_throw_p (stmt
))
4901 tree lhs
, rhs1
, rhs2
;
4902 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4904 /* If this is not a gimple binary expression, there is
4905 nothing for us to do with it. */
4906 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4909 /* If this was part of an already processed statement,
4910 we don't need to touch it again. */
4911 if (gimple_visited_p (stmt
))
4913 /* This statement might have become dead because of previous
4915 if (has_zero_uses (gimple_get_lhs (stmt
)))
4917 reassoc_remove_stmt (&gsi
);
4918 release_defs (stmt
);
4919 /* We might end up removing the last stmt above which
4920 places the iterator to the end of the sequence.
4921 Reset it to the last stmt in this case which might
4922 be the end of the sequence as well if we removed
4923 the last statement of the sequence. In which case
4924 we need to bail out. */
4925 if (gsi_end_p (gsi
))
4927 gsi
= gsi_last_bb (bb
);
4928 if (gsi_end_p (gsi
))
4935 lhs
= gimple_assign_lhs (stmt
);
4936 rhs1
= gimple_assign_rhs1 (stmt
);
4937 rhs2
= gimple_assign_rhs2 (stmt
);
4939 /* For non-bit or min/max operations we can't associate
4940 all types. Verify that here. */
4941 if (rhs_code
!= BIT_IOR_EXPR
4942 && rhs_code
!= BIT_AND_EXPR
4943 && rhs_code
!= BIT_XOR_EXPR
4944 && rhs_code
!= MIN_EXPR
4945 && rhs_code
!= MAX_EXPR
4946 && (!can_reassociate_p (lhs
)
4947 || !can_reassociate_p (rhs1
)
4948 || !can_reassociate_p (rhs2
)))
4951 if (associative_tree_code (rhs_code
))
4953 auto_vec
<operand_entry
*> ops
;
4954 tree powi_result
= NULL_TREE
;
4956 /* There may be no immediate uses left by the time we
4957 get here because we may have eliminated them all. */
4958 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4961 gimple_set_visited (stmt
, true);
4962 linearize_expr_tree (&ops
, stmt
, true, true);
4963 ops
.qsort (sort_by_operand_rank
);
4964 optimize_ops_list (rhs_code
, &ops
);
4965 if (undistribute_ops_list (rhs_code
, &ops
,
4966 loop_containing_stmt (stmt
)))
4968 ops
.qsort (sort_by_operand_rank
);
4969 optimize_ops_list (rhs_code
, &ops
);
4972 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4973 optimize_range_tests (rhs_code
, &ops
);
4975 if (rhs_code
== MULT_EXPR
)
4976 attempt_builtin_copysign (&ops
);
4978 if (reassoc_insert_powi_p
4979 && rhs_code
== MULT_EXPR
4980 && flag_unsafe_math_optimizations
)
4981 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4983 /* If the operand vector is now empty, all operands were
4984 consumed by the __builtin_powi optimization. */
4985 if (ops
.length () == 0)
4986 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4987 else if (ops
.length () == 1)
4989 tree last_op
= ops
.last ()->op
;
4992 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4995 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4999 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
5000 int ops_num
= ops
.length ();
5001 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
5004 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5006 "Width = %d was chosen for reassociation\n", width
);
5009 && ops
.length () > 3)
5010 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
5014 /* When there are three operands left, we want
5015 to make sure the ones that get the double
5016 binary op are chosen wisely. */
5017 int len
= ops
.length ();
5019 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
5021 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
5022 powi_result
!= NULL
);
5025 /* If we combined some repeated factors into a
5026 __builtin_powi call, multiply that result by the
5027 reassociated operands. */
5030 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
5031 tree type
= TREE_TYPE (lhs
);
5032 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
5034 gimple_set_lhs (lhs_stmt
, target_ssa
);
5035 update_stmt (lhs_stmt
);
5037 target_ssa
= new_lhs
;
5038 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
5039 powi_result
, target_ssa
);
5040 gimple_set_location (mul_stmt
, gimple_location (stmt
));
5041 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
5042 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
5048 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
5050 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
5051 cfg_cleanup_needed
|= reassociate_bb (son
);
5053 return cfg_cleanup_needed
;
5056 /* Add jumps around shifts for range tests turned into bit tests.
5057 For each SSA_NAME VAR we have code like:
5058 VAR = ...; // final stmt of range comparison
5059 // bit test here...;
5060 OTHERVAR = ...; // final stmt of the bit test sequence
5061 RES = VAR | OTHERVAR;
5062 Turn the above into:
5069 // bit test here...;
5072 # RES = PHI<1(l1), OTHERVAR(l2)>; */
5080 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
5082 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
5085 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
5087 && is_gimple_assign (use_stmt
)
5088 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
5089 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
5091 basic_block cond_bb
= gimple_bb (def_stmt
);
5092 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
5093 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
5095 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
5096 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
5097 build_zero_cst (TREE_TYPE (var
)),
5098 NULL_TREE
, NULL_TREE
);
5099 location_t loc
= gimple_location (use_stmt
);
5100 gimple_set_location (g
, loc
);
5101 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
5103 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
5104 etrue
->probability
= REG_BR_PROB_BASE
/ 2;
5105 etrue
->count
= cond_bb
->count
/ 2;
5106 edge efalse
= find_edge (cond_bb
, then_bb
);
5107 efalse
->flags
= EDGE_FALSE_VALUE
;
5108 efalse
->probability
-= etrue
->probability
;
5109 efalse
->count
-= etrue
->count
;
5110 then_bb
->count
-= etrue
->count
;
5112 tree othervar
= NULL_TREE
;
5113 if (gimple_assign_rhs1 (use_stmt
) == var
)
5114 othervar
= gimple_assign_rhs2 (use_stmt
);
5115 else if (gimple_assign_rhs2 (use_stmt
) == var
)
5116 othervar
= gimple_assign_rhs1 (use_stmt
);
5119 tree lhs
= gimple_assign_lhs (use_stmt
);
5120 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
5121 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
5122 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
5123 gsi
= gsi_for_stmt (use_stmt
);
5124 gsi_remove (&gsi
, true);
5126 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
5127 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
5129 reassoc_branch_fixups
.release ();
5132 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
5133 void debug_ops_vector (vec
<operand_entry
*> ops
);
5135 /* Dump the operand entry vector OPS to FILE. */
5138 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
5143 FOR_EACH_VEC_ELT (ops
, i
, oe
)
5145 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
5146 print_generic_expr (file
, oe
->op
, 0);
5147 fprintf (file
, "\n");
5151 /* Dump the operand entry vector OPS to STDERR. */
5154 debug_ops_vector (vec
<operand_entry
*> ops
)
5156 dump_ops_vector (stderr
, ops
);
5159 /* Bubble up return status from reassociate_bb. */
5164 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
5165 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
5168 /* Initialize the reassociation pass. */
5175 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
5177 /* Find the loops, so that we can prevent moving calculations in
5179 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
5181 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
5183 next_operand_entry_id
= 0;
5185 /* Reverse RPO (Reverse Post Order) will give us something where
5186 deeper loops come later. */
5187 pre_and_rev_post_order_compute (NULL
, bbs
, false);
5188 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
5189 operand_rank
= new hash_map
<tree
, long>;
5191 /* Give each default definition a distinct rank. This includes
5192 parameters and the static chain. Walk backwards over all
5193 SSA names so that we get proper rank ordering according
5194 to tree_swap_operands_p. */
5195 for (i
= num_ssa_names
- 1; i
> 0; --i
)
5197 tree name
= ssa_name (i
);
5198 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
5199 insert_operand_rank (name
, ++rank
);
5202 /* Set up rank for each BB */
5203 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
5204 bb_rank
[bbs
[i
]] = ++rank
<< 16;
5207 calculate_dominance_info (CDI_POST_DOMINATORS
);
5208 plus_negates
= vNULL
;
5211 /* Cleanup after the reassociation pass, and print stats if
5217 statistics_counter_event (cfun
, "Linearized",
5218 reassociate_stats
.linearized
);
5219 statistics_counter_event (cfun
, "Constants eliminated",
5220 reassociate_stats
.constants_eliminated
);
5221 statistics_counter_event (cfun
, "Ops eliminated",
5222 reassociate_stats
.ops_eliminated
);
5223 statistics_counter_event (cfun
, "Statements rewritten",
5224 reassociate_stats
.rewritten
);
5225 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
5226 reassociate_stats
.pows_encountered
);
5227 statistics_counter_event (cfun
, "Built-in powi calls created",
5228 reassociate_stats
.pows_created
);
5230 delete operand_rank
;
5231 operand_entry_pool
.release ();
5233 plus_negates
.release ();
5234 free_dominance_info (CDI_POST_DOMINATORS
);
5235 loop_optimizer_finalize ();
5238 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
5239 insertion of __builtin_powi calls.
5241 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5242 optimization of a gimple conditional. Otherwise returns zero. */
5245 execute_reassoc (bool insert_powi_p
)
5247 reassoc_insert_powi_p
= insert_powi_p
;
5251 bool cfg_cleanup_needed
;
5252 cfg_cleanup_needed
= do_reassoc ();
5253 repropagate_negates ();
5257 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
5262 const pass_data pass_data_reassoc
=
5264 GIMPLE_PASS
, /* type */
5265 "reassoc", /* name */
5266 OPTGROUP_NONE
, /* optinfo_flags */
5267 TV_TREE_REASSOC
, /* tv_id */
5268 ( PROP_cfg
| PROP_ssa
), /* properties_required */
5269 0, /* properties_provided */
5270 0, /* properties_destroyed */
5271 0, /* todo_flags_start */
5272 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
5275 class pass_reassoc
: public gimple_opt_pass
5278 pass_reassoc (gcc::context
*ctxt
)
5279 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
5282 /* opt_pass methods: */
5283 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
5284 void set_pass_param (unsigned int n
, bool param
)
5286 gcc_assert (n
== 0);
5287 insert_powi_p
= param
;
5289 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
5290 virtual unsigned int execute (function
*)
5291 { return execute_reassoc (insert_powi_p
); }
5294 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
5295 point 3a in the pass header comment. */
5297 }; // class pass_reassoc
5302 make_pass_reassoc (gcc::context
*ctxt
)
5304 return new pass_reassoc (ctxt
);