1 /* Reassociation for trees.
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimple-fold.h"
44 #include "gimplify-me.h"
46 #include "tree-ssa-loop.h"
49 #include "langhooks.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
57 #include "internal-fn.h"
59 /* This is a simple global reassociation pass. It is, in part, based
60 on the LLVM pass of the same name (They do some things more/less
61 than we do, in different orders, etc).
63 It consists of five steps:
65 1. Breaking up subtract operations into addition + negate, where
66 it would promote the reassociation of adds.
68 2. Left linearization of the expression trees, so that (A+B)+(C+D)
69 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
70 During linearization, we place the operands of the binary
71 expressions into a vector of operand_entry_*
73 3. Optimization of the operand lists, eliminating things like a +
76 3a. Combine repeated factors with the same occurrence counts
77 into a __builtin_powi call that will later be optimized into
78 an optimal number of multiplies.
80 4. Rewrite the expression trees we linearized and optimized so
81 they are in proper rank order.
83 5. Repropagate negates, as nothing else will clean it up ATM.
85 A bit of theory on #4, since nobody seems to write anything down
86 about why it makes sense to do it the way they do it:
88 We could do this much nicer theoretically, but don't (for reasons
89 explained after how to do it theoretically nice :P).
91 In order to promote the most redundancy elimination, you want
92 binary expressions whose operands are the same rank (or
93 preferably, the same value) exposed to the redundancy eliminator,
94 for possible elimination.
96 So the way to do this if we really cared, is to build the new op
97 tree from the leaves to the roots, merging as you go, and putting the
98 new op on the end of the worklist, until you are left with one
99 thing on the worklist.
101 IE if you have to rewrite the following set of operands (listed with
102 rank in parentheses), with opcode PLUS_EXPR:
104 a (1), b (1), c (1), d (2), e (2)
107 We start with our merge worklist empty, and the ops list with all of
110 You want to first merge all leaves of the same rank, as much as
113 So first build a binary op of
115 mergetmp = a + b, and put "mergetmp" on the merge worklist.
117 Because there is no three operand form of PLUS_EXPR, c is not going to
118 be exposed to redundancy elimination as a rank 1 operand.
120 So you might as well throw it on the merge worklist (you could also
121 consider it to now be a rank two operand, and merge it with d and e,
122 but in this case, you then have evicted e from a binary op. So at
123 least in this situation, you can't win.)
125 Then build a binary op of d + e
128 and put mergetmp2 on the merge worklist.
130 so merge worklist = {mergetmp, c, mergetmp2}
132 Continue building binary ops of these operations until you have only
133 one operation left on the worklist.
138 mergetmp3 = mergetmp + c
140 worklist = {mergetmp2, mergetmp3}
142 mergetmp4 = mergetmp2 + mergetmp3
144 worklist = {mergetmp4}
146 because we have one operation left, we can now just set the original
147 statement equal to the result of that operation.
149 This will at least expose a + b and d + e to redundancy elimination
150 as binary operations.
152 For extra points, you can reuse the old statements to build the
153 mergetmps, since you shouldn't run out.
155 So why don't we do this?
157 Because it's expensive, and rarely will help. Most trees we are
158 reassociating have 3 or less ops. If they have 2 ops, they already
159 will be written into a nice single binary op. If you have 3 ops, a
160 single simple check suffices to tell you whether the first two are of the
161 same rank. If so, you know to order it
164 newstmt = mergetmp + op3
168 newstmt = mergetmp + op1
170 If all three are of the same rank, you can't expose them all in a
171 single binary operator anyway, so the above is *still* the best you
174 Thus, this is what we do. When we have three ops left, we check to see
175 what order to put them in, and call it a day. As a nod to vector sum
176 reduction, we check if any of the ops are really a phi node that is a
177 destructive update for the associating op, and keep the destructive
178 update together for vector sum reduction recognition. */
180 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
181 point 3a in the pass header comment. */
182 static bool reassoc_insert_powi_p
;
184 /* Enable biasing ranks of loop accumulators. We don't want this before
185 vectorization, since it interferes with reduction chains. */
186 static bool reassoc_bias_loop_carried_phi_ranks_p
;
192 int constants_eliminated
;
195 int pows_encountered
;
200 static object_allocator
<operand_entry
> operand_entry_pool
201 ("operand entry pool");
203 /* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205 static unsigned int next_operand_entry_id
;
207 /* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
210 static int64_t *bb_rank
;
212 /* Operand->rank hashtable. */
213 static hash_map
<tree
, int64_t> *operand_rank
;
215 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
217 static auto_bitmap biased_names
;
219 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
220 all basic blocks the CFG should be adjusted - basic blocks
221 split right after that SSA_NAME's definition statement and before
222 the only use, which must be a bit ior. */
223 static vec
<tree
> reassoc_branch_fixups
;
226 static int64_t get_rank (tree
);
227 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
229 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
230 possibly added by gsi_remove. */
233 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
235 gimple
*stmt
= gsi_stmt (*gsi
);
237 if (!MAY_HAVE_DEBUG_BIND_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
238 return gsi_remove (gsi
, true);
240 gimple_stmt_iterator prev
= *gsi
;
242 unsigned uid
= gimple_uid (stmt
);
243 basic_block bb
= gimple_bb (stmt
);
244 bool ret
= gsi_remove (gsi
, true);
245 if (!gsi_end_p (prev
))
248 prev
= gsi_start_bb (bb
);
249 gimple
*end_stmt
= gsi_stmt (*gsi
);
250 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
252 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
253 gimple_set_uid (stmt
, uid
);
259 /* Bias amount for loop-carried phis. We want this to be larger than
260 the depth of any reassociation tree we can see, but not larger than
261 the rank difference between two blocks. */
262 #define PHI_LOOP_BIAS (1 << 15)
264 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
265 operands to the STMT's left-hand side. The goal is to preserve bias in code
275 That is, we need to preserve bias along single-use chains originating from
276 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
277 uses, because only they participate in rank propagation. */
279 propagate_bias_p (gimple
*stmt
)
282 imm_use_iterator use_iter
;
283 gimple
*single_use_stmt
= NULL
;
285 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_reference
)
288 FOR_EACH_IMM_USE_FAST (use
, use_iter
, gimple_assign_lhs (stmt
))
290 gimple
*current_use_stmt
= USE_STMT (use
);
292 if (is_gimple_assign (current_use_stmt
)
293 && TREE_CODE (gimple_assign_lhs (current_use_stmt
)) == SSA_NAME
)
295 if (single_use_stmt
!= NULL
&& single_use_stmt
!= current_use_stmt
)
297 single_use_stmt
= current_use_stmt
;
301 if (single_use_stmt
== NULL
)
304 if (gimple_bb (stmt
)->loop_father
305 != gimple_bb (single_use_stmt
)->loop_father
)
311 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
312 an innermost loop, and the phi has only a single use which is inside
313 the loop, then the rank is the block rank of the loop latch plus an
314 extra bias for the loop-carried dependence. This causes expressions
315 calculated into an accumulator variable to be independent for each
316 iteration of the loop. If STMT is some other phi, the rank is the
317 block rank of its containing block. */
319 phi_rank (gimple
*stmt
)
321 basic_block bb
= gimple_bb (stmt
);
322 class loop
*father
= bb
->loop_father
;
328 if (!reassoc_bias_loop_carried_phi_ranks_p
)
329 return bb_rank
[bb
->index
];
331 /* We only care about real loops (those with a latch). */
333 return bb_rank
[bb
->index
];
335 /* Interesting phis must be in headers of innermost loops. */
336 if (bb
!= father
->header
338 return bb_rank
[bb
->index
];
340 /* Ignore virtual SSA_NAMEs. */
341 res
= gimple_phi_result (stmt
);
342 if (virtual_operand_p (res
))
343 return bb_rank
[bb
->index
];
345 /* The phi definition must have a single use, and that use must be
346 within the loop. Otherwise this isn't an accumulator pattern. */
347 if (!single_imm_use (res
, &use
, &use_stmt
)
348 || gimple_bb (use_stmt
)->loop_father
!= father
)
349 return bb_rank
[bb
->index
];
351 /* Look for phi arguments from within the loop. If found, bias this phi. */
352 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
354 tree arg
= gimple_phi_arg_def (stmt
, i
);
355 if (TREE_CODE (arg
) == SSA_NAME
356 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
358 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
359 if (gimple_bb (def_stmt
)->loop_father
== father
)
360 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
364 /* Must be an uninteresting phi. */
365 return bb_rank
[bb
->index
];
368 /* Return the maximum of RANK and the rank that should be propagated
369 from expression OP. For most operands, this is just the rank of OP.
370 For loop-carried phis, the value is zero to avoid undoing the bias
371 in favor of the phi. */
373 propagate_rank (int64_t rank
, tree op
, bool *maybe_biased_p
)
377 op_rank
= get_rank (op
);
379 /* Check whether op is biased after the get_rank () call, since it might have
380 updated biased_names. */
381 if (TREE_CODE (op
) == SSA_NAME
382 && bitmap_bit_p (biased_names
, SSA_NAME_VERSION (op
)))
384 if (maybe_biased_p
== NULL
)
386 *maybe_biased_p
= true;
389 return MAX (rank
, op_rank
);
392 /* Look up the operand rank structure for expression E. */
394 static inline int64_t
395 find_operand_rank (tree e
)
397 int64_t *slot
= operand_rank
->get (e
);
398 return slot
? *slot
: -1;
401 /* Insert {E,RANK} into the operand rank hashtable. */
404 insert_operand_rank (tree e
, int64_t rank
)
406 gcc_assert (rank
> 0);
407 gcc_assert (!operand_rank
->put (e
, rank
));
410 /* Given an expression E, return the rank of the expression. */
415 /* SSA_NAME's have the rank of the expression they are the result
417 For globals and uninitialized values, the rank is 0.
418 For function arguments, use the pre-setup rank.
419 For PHI nodes, stores, asm statements, etc, we use the rank of
421 For simple operations, the rank is the maximum rank of any of
422 its operands, or the bb_rank, whichever is less.
423 I make no claims that this is optimal, however, it gives good
426 /* We make an exception to the normal ranking system to break
427 dependences of accumulator variables in loops. Suppose we
428 have a simple one-block loop containing:
435 As shown, each iteration of the calculation into x is fully
436 dependent upon the iteration before it. We would prefer to
437 see this in the form:
444 If the loop is unrolled, the calculations of b and c from
445 different iterations can be interleaved.
447 To obtain this result during reassociation, we bias the rank
448 of the phi definition x_1 upward, when it is recognized as an
449 accumulator pattern. The artificial rank causes it to be
450 added last, providing the desired independence. */
452 if (TREE_CODE (e
) == SSA_NAME
)
459 /* If we already have a rank for this expression, use that. */
460 rank
= find_operand_rank (e
);
464 stmt
= SSA_NAME_DEF_STMT (e
);
465 if (gimple_code (stmt
) == GIMPLE_PHI
)
467 rank
= phi_rank (stmt
);
468 if (rank
!= bb_rank
[gimple_bb (stmt
)->index
])
469 bitmap_set_bit (biased_names
, SSA_NAME_VERSION (e
));
472 else if (!is_gimple_assign (stmt
))
473 rank
= bb_rank
[gimple_bb (stmt
)->index
];
477 bool biased_p
= false;
478 bool *maybe_biased_p
= propagate_bias_p (stmt
) ? &biased_p
: NULL
;
480 /* Otherwise, find the maximum rank for the operands. As an
481 exception, remove the bias from loop-carried phis when propagating
482 the rank so that dependent operations are not also biased. */
483 /* Simply walk over all SSA uses - this takes advatage of the
484 fact that non-SSA operands are is_gimple_min_invariant and
487 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
488 rank
= propagate_rank (rank
, op
, maybe_biased_p
);
492 bitmap_set_bit (biased_names
, SSA_NAME_VERSION (e
));
495 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
497 fprintf (dump_file
, "Rank for ");
498 print_generic_expr (dump_file
, e
);
499 fprintf (dump_file
, " is %" PRId64
"\n", rank
);
502 /* Note the rank in the hashtable so we don't recompute it. */
503 insert_operand_rank (e
, rank
);
507 /* Constants, globals, etc., are rank 0 */
512 /* We want integer ones to end up last no matter what, since they are
513 the ones we can do the most with. */
514 #define INTEGER_CONST_TYPE 1 << 4
515 #define FLOAT_ONE_CONST_TYPE 1 << 3
516 #define FLOAT_CONST_TYPE 1 << 2
517 #define OTHER_CONST_TYPE 1 << 1
519 /* Classify an invariant tree into integer, float, or other, so that
520 we can sort them to be near other constants of the same type. */
522 constant_type (tree t
)
524 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
525 return INTEGER_CONST_TYPE
;
526 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
528 /* Sort -1.0 and 1.0 constants last, while in some cases
529 const_binop can't optimize some inexact operations, multiplication
530 by -1.0 or 1.0 can be always merged with others. */
531 if (real_onep (t
) || real_minus_onep (t
))
532 return FLOAT_ONE_CONST_TYPE
;
533 return FLOAT_CONST_TYPE
;
536 return OTHER_CONST_TYPE
;
539 /* qsort comparison function to sort operand entries PA and PB by rank
540 so that the sorted array is ordered by rank in decreasing order. */
542 sort_by_operand_rank (const void *pa
, const void *pb
)
544 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
545 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
547 if (oeb
->rank
!= oea
->rank
)
548 return oeb
->rank
> oea
->rank
? 1 : -1;
550 /* It's nicer for optimize_expression if constants that are likely
551 to fold when added/multiplied/whatever are put next to each
552 other. Since all constants have rank 0, order them by type. */
555 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
556 return constant_type (oea
->op
) - constant_type (oeb
->op
);
558 /* To make sorting result stable, we use unique IDs to determine
560 return oeb
->id
> oea
->id
? 1 : -1;
563 if (TREE_CODE (oea
->op
) != SSA_NAME
)
565 if (TREE_CODE (oeb
->op
) != SSA_NAME
)
566 return oeb
->id
> oea
->id
? 1 : -1;
570 else if (TREE_CODE (oeb
->op
) != SSA_NAME
)
573 /* Lastly, make sure the versions that are the same go next to each
575 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
577 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
578 versions of removed SSA_NAMEs, so if possible, prefer to sort
579 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
581 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
582 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
583 basic_block bba
= gimple_bb (stmta
);
584 basic_block bbb
= gimple_bb (stmtb
);
587 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
588 but the other might not. */
593 /* If neither is, compare bb_rank. */
594 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
595 return (bb_rank
[bbb
->index
] >> 16) - (bb_rank
[bba
->index
] >> 16);
598 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
599 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
603 return SSA_NAME_VERSION (oeb
->op
) > SSA_NAME_VERSION (oea
->op
) ? 1 : -1;
606 return oeb
->id
> oea
->id
? 1 : -1;
609 /* Add an operand entry to *OPS for the tree operand OP. */
612 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
, gimple
*stmt_to_insert
= NULL
)
614 operand_entry
*oe
= operand_entry_pool
.allocate ();
617 oe
->rank
= get_rank (op
);
618 oe
->id
= next_operand_entry_id
++;
620 oe
->stmt_to_insert
= stmt_to_insert
;
624 /* Add an operand entry to *OPS for the tree operand OP with repeat
628 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
629 HOST_WIDE_INT repeat
)
631 operand_entry
*oe
= operand_entry_pool
.allocate ();
634 oe
->rank
= get_rank (op
);
635 oe
->id
= next_operand_entry_id
++;
637 oe
->stmt_to_insert
= NULL
;
640 reassociate_stats
.pows_encountered
++;
643 /* Returns true if we can associate the SSA def OP. */
646 can_reassociate_op_p (tree op
)
648 if (TREE_CODE (op
) == SSA_NAME
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op
))
650 /* Make sure asm goto outputs do not participate in reassociation since
651 we have no way to find an insertion place after asm goto. */
652 if (TREE_CODE (op
) == SSA_NAME
653 && gimple_code (SSA_NAME_DEF_STMT (op
)) == GIMPLE_ASM
654 && gimple_asm_nlabels (as_a
<gasm
*> (SSA_NAME_DEF_STMT (op
))) != 0)
659 /* Returns true if we can reassociate operations of TYPE.
660 That is for integral or non-saturating fixed-point types, and for
661 floating point type when associative-math is enabled. */
664 can_reassociate_type_p (tree type
)
666 if ((ANY_INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
667 || NON_SAT_FIXED_POINT_TYPE_P (type
)
668 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
673 /* Return true if STMT is reassociable operation containing a binary
674 operation with tree code CODE, and is inside LOOP. */
677 is_reassociable_op (gimple
*stmt
, enum tree_code code
, class loop
*loop
)
679 basic_block bb
= gimple_bb (stmt
);
681 if (gimple_bb (stmt
) == NULL
)
684 if (!flow_bb_inside_loop_p (loop
, bb
))
687 if (is_gimple_assign (stmt
)
688 && gimple_assign_rhs_code (stmt
) == code
689 && has_single_use (gimple_assign_lhs (stmt
)))
691 tree rhs1
= gimple_assign_rhs1 (stmt
);
692 tree rhs2
= gimple_assign_rhs2 (stmt
);
693 if (!can_reassociate_op_p (rhs1
)
694 || (rhs2
&& !can_reassociate_op_p (rhs2
)))
703 /* Return true if STMT is a nop-conversion. */
706 gimple_nop_conversion_p (gimple
*stmt
)
708 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
710 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
711 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
712 TREE_TYPE (gimple_assign_rhs1 (ass
))))
718 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
719 operand of the negate operation. Otherwise, return NULL. */
722 get_unary_op (tree name
, enum tree_code opcode
)
724 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
726 /* Look through nop conversions (sign changes). */
727 if (gimple_nop_conversion_p (stmt
)
728 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
729 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
731 if (!is_gimple_assign (stmt
))
734 if (gimple_assign_rhs_code (stmt
) == opcode
)
735 return gimple_assign_rhs1 (stmt
);
739 /* Return true if OP1 and OP2 have the same value if casted to either type. */
742 ops_equal_values_p (tree op1
, tree op2
)
748 if (TREE_CODE (op1
) == SSA_NAME
)
750 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
751 if (gimple_nop_conversion_p (stmt
))
753 op1
= gimple_assign_rhs1 (stmt
);
759 if (TREE_CODE (op2
) == SSA_NAME
)
761 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
762 if (gimple_nop_conversion_p (stmt
))
764 op2
= gimple_assign_rhs1 (stmt
);
775 /* If CURR and LAST are a pair of ops that OPCODE allows us to
776 eliminate through equivalences, do so, remove them from OPS, and
777 return true. Otherwise, return false. */
780 eliminate_duplicate_pair (enum tree_code opcode
,
781 vec
<operand_entry
*> *ops
,
788 /* If we have two of the same op, and the opcode is & |, min, or max,
789 we can eliminate one of them.
790 If we have two of the same op, and the opcode is ^, we can
791 eliminate both of them. */
793 if (last
&& last
->op
== curr
->op
)
801 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
803 fprintf (dump_file
, "Equivalence: ");
804 print_generic_expr (dump_file
, curr
->op
);
805 fprintf (dump_file
, " [&|minmax] ");
806 print_generic_expr (dump_file
, last
->op
);
807 fprintf (dump_file
, " -> ");
808 print_generic_stmt (dump_file
, last
->op
);
811 ops
->ordered_remove (i
);
812 reassociate_stats
.ops_eliminated
++;
817 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
819 fprintf (dump_file
, "Equivalence: ");
820 print_generic_expr (dump_file
, curr
->op
);
821 fprintf (dump_file
, " ^ ");
822 print_generic_expr (dump_file
, last
->op
);
823 fprintf (dump_file
, " -> nothing\n");
826 reassociate_stats
.ops_eliminated
+= 2;
828 if (ops
->length () == 2)
831 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
836 ops
->ordered_remove (i
-1);
837 ops
->ordered_remove (i
-1);
849 static vec
<tree
> plus_negates
;
851 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
852 expression, look in OPS for a corresponding positive operation to cancel
853 it out. If we find one, remove the other from OPS, replace
854 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
858 eliminate_plus_minus_pair (enum tree_code opcode
,
859 vec
<operand_entry
*> *ops
,
860 unsigned int currindex
,
868 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
871 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
872 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
873 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
876 /* Any non-negated version will have a rank that is one less than
877 the current rank. So once we hit those ranks, if we don't find
880 for (i
= currindex
+ 1;
881 ops
->iterate (i
, &oe
)
882 && oe
->rank
>= curr
->rank
- 1 ;
886 && ops_equal_values_p (oe
->op
, negateop
))
888 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
890 fprintf (dump_file
, "Equivalence: ");
891 print_generic_expr (dump_file
, negateop
);
892 fprintf (dump_file
, " + -");
893 print_generic_expr (dump_file
, oe
->op
);
894 fprintf (dump_file
, " -> 0\n");
897 ops
->ordered_remove (i
);
898 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
899 ops
->ordered_remove (currindex
);
900 reassociate_stats
.ops_eliminated
++;
905 && ops_equal_values_p (oe
->op
, notop
))
907 tree op_type
= TREE_TYPE (oe
->op
);
909 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
911 fprintf (dump_file
, "Equivalence: ");
912 print_generic_expr (dump_file
, notop
);
913 fprintf (dump_file
, " + ~");
914 print_generic_expr (dump_file
, oe
->op
);
915 fprintf (dump_file
, " -> -1\n");
918 ops
->ordered_remove (i
);
919 add_to_ops_vec (ops
, build_all_ones_cst (op_type
));
920 ops
->ordered_remove (currindex
);
921 reassociate_stats
.ops_eliminated
++;
927 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
928 save it for later inspection in repropagate_negates(). */
929 if (negateop
!= NULL_TREE
930 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
931 plus_negates
.safe_push (curr
->op
);
936 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
937 bitwise not expression, look in OPS for a corresponding operand to
938 cancel it out. If we find one, remove the other from OPS, replace
939 OPS[CURRINDEX] with 0, and return true. Otherwise, return
943 eliminate_not_pairs (enum tree_code opcode
,
944 vec
<operand_entry
*> *ops
,
945 unsigned int currindex
,
952 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
953 || TREE_CODE (curr
->op
) != SSA_NAME
)
956 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
957 if (notop
== NULL_TREE
)
960 /* Any non-not version will have a rank that is one less than
961 the current rank. So once we hit those ranks, if we don't find
964 for (i
= currindex
+ 1;
965 ops
->iterate (i
, &oe
)
966 && oe
->rank
>= curr
->rank
- 1;
971 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
973 fprintf (dump_file
, "Equivalence: ");
974 print_generic_expr (dump_file
, notop
);
975 if (opcode
== BIT_AND_EXPR
)
976 fprintf (dump_file
, " & ~");
977 else if (opcode
== BIT_IOR_EXPR
)
978 fprintf (dump_file
, " | ~");
979 print_generic_expr (dump_file
, oe
->op
);
980 if (opcode
== BIT_AND_EXPR
)
981 fprintf (dump_file
, " -> 0\n");
982 else if (opcode
== BIT_IOR_EXPR
)
983 fprintf (dump_file
, " -> -1\n");
986 if (opcode
== BIT_AND_EXPR
)
987 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
988 else if (opcode
== BIT_IOR_EXPR
)
989 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
991 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
993 ops
->quick_push (oe
);
1001 /* Use constant value that may be present in OPS to try to eliminate
1002 operands. Note that this function is only really used when we've
1003 eliminated ops for other reasons, or merged constants. Across
1004 single statements, fold already does all of this, plus more. There
1005 is little point in duplicating logic, so I've only included the
1006 identities that I could ever construct testcases to trigger. */
1009 eliminate_using_constants (enum tree_code opcode
,
1010 vec
<operand_entry
*> *ops
)
1012 operand_entry
*oelast
= ops
->last ();
1013 tree type
= TREE_TYPE (oelast
->op
);
1015 if (oelast
->rank
== 0
1016 && (ANY_INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
1021 if (integer_zerop (oelast
->op
))
1023 if (ops
->length () != 1)
1025 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1026 fprintf (dump_file
, "Found & 0, removing all other ops\n");
1028 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1031 ops
->quick_push (oelast
);
1035 else if (integer_all_onesp (oelast
->op
))
1037 if (ops
->length () != 1)
1039 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1040 fprintf (dump_file
, "Found & -1, removing\n");
1042 reassociate_stats
.ops_eliminated
++;
1047 if (integer_all_onesp (oelast
->op
))
1049 if (ops
->length () != 1)
1051 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1052 fprintf (dump_file
, "Found | -1, removing all other ops\n");
1054 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1057 ops
->quick_push (oelast
);
1061 else if (integer_zerop (oelast
->op
))
1063 if (ops
->length () != 1)
1065 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1066 fprintf (dump_file
, "Found | 0, removing\n");
1068 reassociate_stats
.ops_eliminated
++;
1073 if (integer_zerop (oelast
->op
)
1074 || (FLOAT_TYPE_P (type
)
1075 && !HONOR_NANS (type
)
1076 && !HONOR_SIGNED_ZEROS (type
)
1077 && real_zerop (oelast
->op
)))
1079 if (ops
->length () != 1)
1081 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1082 fprintf (dump_file
, "Found * 0, removing all other ops\n");
1084 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1086 ops
->quick_push (oelast
);
1090 else if (integer_onep (oelast
->op
)
1091 || (FLOAT_TYPE_P (type
)
1092 && !HONOR_SNANS (type
)
1093 && real_onep (oelast
->op
)))
1095 if (ops
->length () != 1)
1097 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1098 fprintf (dump_file
, "Found * 1, removing\n");
1100 reassociate_stats
.ops_eliminated
++;
1108 if (integer_zerop (oelast
->op
)
1109 || (FLOAT_TYPE_P (type
)
1110 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1111 && fold_real_zero_addition_p (type
, 0, oelast
->op
,
1112 opcode
== MINUS_EXPR
)))
1114 if (ops
->length () != 1)
1116 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1117 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1119 reassociate_stats
.ops_eliminated
++;
1131 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1134 /* Structure for tracking and counting operands. */
1138 enum tree_code oecode
;
1143 /* The heap for the oecount hashtable and the sorted list of operands. */
1144 static vec
<oecount
> cvec
;
1147 /* Oecount hashtable helpers. */
1149 struct oecount_hasher
: int_hash
<int, 0, 1>
1151 static inline hashval_t
hash (int);
1152 static inline bool equal (int, int);
1155 /* Hash function for oecount. */
1158 oecount_hasher::hash (int p
)
1160 const oecount
*c
= &cvec
[p
- 42];
1161 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1164 /* Comparison function for oecount. */
1167 oecount_hasher::equal (int p1
, int p2
)
1169 const oecount
*c1
= &cvec
[p1
- 42];
1170 const oecount
*c2
= &cvec
[p2
- 42];
1171 return c1
->oecode
== c2
->oecode
&& c1
->op
== c2
->op
;
1174 /* Comparison function for qsort sorting oecount elements by count. */
1177 oecount_cmp (const void *p1
, const void *p2
)
1179 const oecount
*c1
= (const oecount
*)p1
;
1180 const oecount
*c2
= (const oecount
*)p2
;
1181 if (c1
->cnt
!= c2
->cnt
)
1182 return c1
->cnt
> c2
->cnt
? 1 : -1;
1184 /* If counts are identical, use unique IDs to stabilize qsort. */
1185 return c1
->id
> c2
->id
? 1 : -1;
1188 /* Return TRUE iff STMT represents a builtin call that raises OP
1189 to some exponent. */
1192 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1194 if (!is_gimple_call (stmt
))
1197 switch (gimple_call_combined_fn (stmt
))
1201 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1208 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1209 in place and return the result. Assumes that stmt_is_power_of_op
1210 was previously called for STMT and returned TRUE. */
1212 static HOST_WIDE_INT
1213 decrement_power (gimple
*stmt
)
1215 REAL_VALUE_TYPE c
, cint
;
1216 HOST_WIDE_INT power
;
1219 switch (gimple_call_combined_fn (stmt
))
1222 arg1
= gimple_call_arg (stmt
, 1);
1223 c
= TREE_REAL_CST (arg1
);
1224 power
= real_to_integer (&c
) - 1;
1225 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1226 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1230 arg1
= gimple_call_arg (stmt
, 1);
1231 power
= TREE_INT_CST_LOW (arg1
) - 1;
1232 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1240 /* Replace SSA defined by STMT and replace all its uses with new
1241 SSA. Also return the new SSA. */
1244 make_new_ssa_for_def (gimple
*stmt
, enum tree_code opcode
, tree op
)
1248 imm_use_iterator iter
;
1249 tree new_lhs
, new_debug_lhs
= NULL_TREE
;
1250 tree lhs
= gimple_get_lhs (stmt
);
1252 new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
1253 gimple_set_lhs (stmt
, new_lhs
);
1255 /* Also need to update GIMPLE_DEBUGs. */
1256 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
1258 tree repl
= new_lhs
;
1259 if (is_gimple_debug (use_stmt
))
1261 if (new_debug_lhs
== NULL_TREE
)
1263 new_debug_lhs
= build_debug_expr_decl (TREE_TYPE (lhs
));
1265 = gimple_build_debug_bind (new_debug_lhs
,
1266 build2 (opcode
, TREE_TYPE (lhs
),
1269 gimple_set_uid (def_temp
, gimple_uid (stmt
));
1270 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1271 gsi_insert_after (&gsi
, def_temp
, GSI_SAME_STMT
);
1273 repl
= new_debug_lhs
;
1275 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1276 SET_USE (use
, repl
);
1277 update_stmt (use_stmt
);
1282 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1283 uses with new SSAs. Also do this for the stmt that defines DEF
1284 if *DEF is not OP. */
1287 make_new_ssa_for_all_defs (tree
*def
, enum tree_code opcode
, tree op
,
1288 vec
<gimple
*> &stmts_to_fix
)
1294 && TREE_CODE (*def
) == SSA_NAME
1295 && (stmt
= SSA_NAME_DEF_STMT (*def
))
1296 && gimple_code (stmt
) != GIMPLE_NOP
)
1297 *def
= make_new_ssa_for_def (stmt
, opcode
, op
);
1299 FOR_EACH_VEC_ELT (stmts_to_fix
, i
, stmt
)
1300 make_new_ssa_for_def (stmt
, opcode
, op
);
1303 /* Find the single immediate use of STMT's LHS, and replace it
1304 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1305 replace *DEF with OP as well. */
1308 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1313 gimple_stmt_iterator gsi
;
1315 if (is_gimple_call (stmt
))
1316 lhs
= gimple_call_lhs (stmt
);
1318 lhs
= gimple_assign_lhs (stmt
);
1320 gcc_assert (has_single_use (lhs
));
1321 single_imm_use (lhs
, &use
, &use_stmt
);
1325 if (TREE_CODE (op
) != SSA_NAME
)
1326 update_stmt (use_stmt
);
1327 gsi
= gsi_for_stmt (stmt
);
1328 unlink_stmt_vdef (stmt
);
1329 reassoc_remove_stmt (&gsi
);
1330 release_defs (stmt
);
1333 /* Walks the linear chain with result *DEF searching for an operation
1334 with operand OP and code OPCODE removing that from the chain. *DEF
1335 is updated if there is only one operand but no operation left. */
1338 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1340 tree orig_def
= *def
;
1341 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1342 /* PR72835 - Record the stmt chain that has to be updated such that
1343 we dont use the same LHS when the values computed are different. */
1344 auto_vec
<gimple
*, 64> stmts_to_fix
;
1350 if (opcode
== MULT_EXPR
)
1352 if (stmt_is_power_of_op (stmt
, op
))
1354 if (decrement_power (stmt
) == 1)
1356 if (stmts_to_fix
.length () > 0)
1357 stmts_to_fix
.pop ();
1358 propagate_op_to_single_use (op
, stmt
, def
);
1362 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
)
1364 if (gimple_assign_rhs1 (stmt
) == op
)
1366 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1367 if (stmts_to_fix
.length () > 0)
1368 stmts_to_fix
.pop ();
1369 propagate_op_to_single_use (cst
, stmt
, def
);
1372 else if (integer_minus_onep (op
)
1373 || real_minus_onep (op
))
1375 gimple_assign_set_rhs_code
1376 (stmt
, TREE_CODE (gimple_assign_rhs1 (stmt
)));
1382 name
= gimple_assign_rhs1 (stmt
);
1384 /* If this is the operation we look for and one of the operands
1385 is ours simply propagate the other operand into the stmts
1387 if (gimple_assign_rhs_code (stmt
) == opcode
1389 || gimple_assign_rhs2 (stmt
) == op
))
1392 name
= gimple_assign_rhs2 (stmt
);
1393 if (stmts_to_fix
.length () > 0)
1394 stmts_to_fix
.pop ();
1395 propagate_op_to_single_use (name
, stmt
, def
);
1399 /* We might have a multiply of two __builtin_pow* calls, and
1400 the operand might be hiding in the rightmost one. Likewise
1401 this can happen for a negate. */
1402 if (opcode
== MULT_EXPR
1403 && gimple_assign_rhs_code (stmt
) == opcode
1404 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1405 && has_single_use (gimple_assign_rhs2 (stmt
)))
1407 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1408 if (stmt_is_power_of_op (stmt2
, op
))
1410 if (decrement_power (stmt2
) == 1)
1411 propagate_op_to_single_use (op
, stmt2
, def
);
1413 stmts_to_fix
.safe_push (stmt2
);
1416 else if (is_gimple_assign (stmt2
)
1417 && gimple_assign_rhs_code (stmt2
) == NEGATE_EXPR
)
1419 if (gimple_assign_rhs1 (stmt2
) == op
)
1421 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1422 propagate_op_to_single_use (cst
, stmt2
, def
);
1425 else if (integer_minus_onep (op
)
1426 || real_minus_onep (op
))
1428 stmts_to_fix
.safe_push (stmt2
);
1429 gimple_assign_set_rhs_code
1430 (stmt2
, TREE_CODE (gimple_assign_rhs1 (stmt2
)));
1436 /* Continue walking the chain. */
1437 gcc_assert (name
!= op
1438 && TREE_CODE (name
) == SSA_NAME
);
1439 stmt
= SSA_NAME_DEF_STMT (name
);
1440 stmts_to_fix
.safe_push (stmt
);
1444 if (stmts_to_fix
.length () > 0 || *def
== orig_def
)
1445 make_new_ssa_for_all_defs (def
, opcode
, op
, stmts_to_fix
);
1448 /* Returns true if statement S1 dominates statement S2. Like
1449 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1452 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1454 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1456 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1457 SSA_NAME. Assume it lives at the beginning of function and
1458 thus dominates everything. */
1459 if (!bb1
|| s1
== s2
)
1462 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1468 /* PHIs in the same basic block are assumed to be
1469 executed all in parallel, if only one stmt is a PHI,
1470 it dominates the other stmt in the same basic block. */
1471 if (gimple_code (s1
) == GIMPLE_PHI
)
1474 if (gimple_code (s2
) == GIMPLE_PHI
)
1477 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1479 if (gimple_uid (s1
) < gimple_uid (s2
))
1482 if (gimple_uid (s1
) > gimple_uid (s2
))
1485 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1486 unsigned int uid
= gimple_uid (s1
);
1487 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1489 gimple
*s
= gsi_stmt (gsi
);
1490 if (gimple_uid (s
) != uid
)
1499 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1502 /* Insert STMT after INSERT_POINT. */
1505 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1507 gimple_stmt_iterator gsi
;
1510 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1511 bb
= gimple_bb (insert_point
);
1512 else if (!stmt_ends_bb_p (insert_point
))
1514 gsi
= gsi_for_stmt (insert_point
);
1515 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1516 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1519 else if (gimple_code (insert_point
) == GIMPLE_ASM
1520 && gimple_asm_nlabels (as_a
<gasm
*> (insert_point
)) != 0)
1521 /* We have no idea where to insert - it depends on where the
1522 uses will be placed. */
1525 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1526 thus if it must end a basic block, it should be a call that can
1527 throw, or some assignment that can throw. If it throws, the LHS
1528 of it will not be initialized though, so only valid places using
1529 the SSA_NAME should be dominated by the fallthru edge. */
1530 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1531 gsi
= gsi_after_labels (bb
);
1532 if (gsi_end_p (gsi
))
1534 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1535 gimple_set_uid (stmt
,
1536 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1539 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1540 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1543 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1544 the result. Places the statement after the definition of either
1545 OP1 or OP2. Returns the new statement. */
1548 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1550 gimple
*op1def
= NULL
, *op2def
= NULL
;
1551 gimple_stmt_iterator gsi
;
1555 /* Create the addition statement. */
1556 op
= make_ssa_name (type
);
1557 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1559 /* Find an insertion place and insert. */
1560 if (TREE_CODE (op1
) == SSA_NAME
)
1561 op1def
= SSA_NAME_DEF_STMT (op1
);
1562 if (TREE_CODE (op2
) == SSA_NAME
)
1563 op2def
= SSA_NAME_DEF_STMT (op2
);
1564 if ((!op1def
|| gimple_nop_p (op1def
))
1565 && (!op2def
|| gimple_nop_p (op2def
)))
1567 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1568 if (!gsi_end_p (gsi
)
1569 && is_gimple_call (gsi_stmt (gsi
))
1570 && (gimple_call_flags (gsi_stmt (gsi
)) & ECF_RETURNS_TWICE
))
1572 /* Don't add statements before a returns_twice call at the start
1574 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1575 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1577 if (gsi_end_p (gsi
))
1579 gimple_stmt_iterator gsi2
1580 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1581 gimple_set_uid (sum
,
1582 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1585 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1586 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1590 gimple
*insert_point
;
1591 if ((!op1def
|| gimple_nop_p (op1def
))
1592 || (op2def
&& !gimple_nop_p (op2def
)
1593 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1594 insert_point
= op2def
;
1596 insert_point
= op1def
;
1597 insert_stmt_after (sum
, insert_point
);
1604 /* Perform un-distribution of divisions and multiplications.
1605 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1606 to (A + B) / X for real X.
1608 The algorithm is organized as follows.
1610 - First we walk the addition chain *OPS looking for summands that
1611 are defined by a multiplication or a real division. This results
1612 in the candidates bitmap with relevant indices into *OPS.
1614 - Second we build the chains of multiplications or divisions for
1615 these candidates, counting the number of occurrences of (operand, code)
1616 pairs in all of the candidates chains.
1618 - Third we sort the (operand, code) pairs by number of occurrence and
1619 process them starting with the pair with the most uses.
1621 * For each such pair we walk the candidates again to build a
1622 second candidate bitmap noting all multiplication/division chains
1623 that have at least one occurrence of (operand, code).
1625 * We build an alternate addition chain only covering these
1626 candidates with one (operand, code) operation removed from their
1627 multiplication/division chain.
1629 * The first candidate gets replaced by the alternate addition chain
1630 multiplied/divided by the operand.
1632 * All candidate chains get disabled for further processing and
1633 processing of (operand, code) pairs continues.
1635 The alternate addition chains built are re-processed by the main
1636 reassociation algorithm which allows optimizing a * x * y + b * y * x
1637 to (a + b ) * x * y in one invocation of the reassociation pass. */
1640 undistribute_ops_list (enum tree_code opcode
,
1641 vec
<operand_entry
*> *ops
, class loop
*loop
)
1643 unsigned int length
= ops
->length ();
1646 unsigned nr_candidates
, nr_candidates2
;
1647 sbitmap_iterator sbi0
;
1648 vec
<operand_entry
*> *subops
;
1649 bool changed
= false;
1650 unsigned int next_oecount_id
= 0;
1653 || opcode
!= PLUS_EXPR
)
1656 /* Build a list of candidates to process. */
1657 auto_sbitmap
candidates (length
);
1658 bitmap_clear (candidates
);
1660 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1662 enum tree_code dcode
;
1665 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1667 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1668 if (!is_gimple_assign (oe1def
))
1670 dcode
= gimple_assign_rhs_code (oe1def
);
1671 if ((dcode
!= MULT_EXPR
1672 && dcode
!= RDIV_EXPR
)
1673 || !is_reassociable_op (oe1def
, dcode
, loop
))
1676 bitmap_set_bit (candidates
, i
);
1680 if (nr_candidates
< 2)
1683 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1685 fprintf (dump_file
, "searching for un-distribute opportunities ");
1686 print_generic_expr (dump_file
,
1687 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, TDF_NONE
);
1688 fprintf (dump_file
, " %d\n", nr_candidates
);
1691 /* Build linearized sub-operand lists and the counting table. */
1694 hash_table
<oecount_hasher
> ctable (15);
1696 /* ??? Macro arguments cannot have multi-argument template types in
1697 them. This typedef is needed to workaround that limitation. */
1698 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1699 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1700 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1703 enum tree_code oecode
;
1706 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1707 oecode
= gimple_assign_rhs_code (oedef
);
1708 linearize_expr_tree (&subops
[i
], oedef
,
1709 associative_tree_code (oecode
), false);
1711 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1718 c
.id
= next_oecount_id
++;
1721 idx
= cvec
.length () + 41;
1722 slot
= ctable
.find_slot (idx
, INSERT
);
1730 cvec
[*slot
- 42].cnt
++;
1735 /* Sort the counting table. */
1736 cvec
.qsort (oecount_cmp
);
1738 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1741 fprintf (dump_file
, "Candidates:\n");
1742 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1744 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1745 c
->oecode
== MULT_EXPR
1746 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1747 print_generic_expr (dump_file
, c
->op
);
1748 fprintf (dump_file
, "\n");
1752 /* Process the (operand, code) pairs in order of most occurrence. */
1753 auto_sbitmap
candidates2 (length
);
1754 while (!cvec
.is_empty ())
1756 oecount
*c
= &cvec
.last ();
1760 /* Now collect the operands in the outer chain that contain
1761 the common operand in their inner chain. */
1762 bitmap_clear (candidates2
);
1764 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1767 enum tree_code oecode
;
1769 tree op
= (*ops
)[i
]->op
;
1771 /* If we undistributed in this chain already this may be
1773 if (TREE_CODE (op
) != SSA_NAME
)
1776 oedef
= SSA_NAME_DEF_STMT (op
);
1777 oecode
= gimple_assign_rhs_code (oedef
);
1778 if (oecode
!= c
->oecode
)
1781 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1783 if (oe1
->op
== c
->op
)
1785 bitmap_set_bit (candidates2
, i
);
1792 if (nr_candidates2
>= 2)
1794 operand_entry
*oe1
, *oe2
;
1796 int first
= bitmap_first_set_bit (candidates2
);
1798 /* Build the new addition chain. */
1799 oe1
= (*ops
)[first
];
1800 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1802 fprintf (dump_file
, "Building (");
1803 print_generic_expr (dump_file
, oe1
->op
);
1805 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1806 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1810 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1812 fprintf (dump_file
, " + ");
1813 print_generic_expr (dump_file
, oe2
->op
);
1815 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1816 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1817 oe1
->op
, oe2
->op
, opcode
);
1818 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1820 oe1
->op
= gimple_get_lhs (sum
);
1823 /* Apply the multiplication/division. */
1824 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1825 oe1
->op
, c
->op
, c
->oecode
);
1826 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1828 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1829 print_generic_expr (dump_file
, c
->op
);
1830 fprintf (dump_file
, "\n");
1833 /* Record it in the addition chain and disable further
1834 undistribution with this op. */
1835 oe1
->op
= gimple_assign_lhs (prod
);
1836 oe1
->rank
= get_rank (oe1
->op
);
1837 subops
[first
].release ();
1845 for (i
= 0; i
< ops
->length (); ++i
)
1846 subops
[i
].release ();
1853 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1854 first: element index for each relevant BIT_FIELD_REF.
1855 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1856 typedef std::pair
<unsigned, unsigned> v_info_elem
;
1859 auto_vec
<v_info_elem
, 32> vec
;
1861 typedef v_info
*v_info_ptr
;
1863 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1865 sort_by_mach_mode (const void *p_i
, const void *p_j
)
1867 const tree tr1
= *((const tree
*) p_i
);
1868 const tree tr2
= *((const tree
*) p_j
);
1869 unsigned int mode1
= TYPE_MODE (TREE_TYPE (tr1
));
1870 unsigned int mode2
= TYPE_MODE (TREE_TYPE (tr2
));
1873 else if (mode1
< mode2
)
1875 if (SSA_NAME_VERSION (tr1
) < SSA_NAME_VERSION (tr2
))
1877 else if (SSA_NAME_VERSION (tr1
) > SSA_NAME_VERSION (tr2
))
1882 /* Cleanup hash map for VECTOR information. */
1884 cleanup_vinfo_map (hash_map
<tree
, v_info_ptr
> &info_map
)
1886 for (hash_map
<tree
, v_info_ptr
>::iterator it
= info_map
.begin ();
1887 it
!= info_map
.end (); ++it
)
1889 v_info_ptr info
= (*it
).second
;
1891 (*it
).second
= NULL
;
1895 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1896 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1898 Vs = (V1 + V2 + ... + Vn)
1899 Vs[0] + Vs[1] + ... + Vs[k]
1901 The basic steps are listed below:
1903 1) Check the addition chain *OPS by looking those summands coming from
1904 VECTOR bit_field_ref on VECTOR type. Put the information into
1905 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1907 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1908 continuous, they can cover the whole VECTOR perfectly without any holes.
1909 Obtain one VECTOR list which contain candidates to be transformed.
1911 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1912 candidates with same mode, build the addition statements for them and
1913 generate BIT_FIELD_REFs accordingly.
1916 The current implementation requires the whole VECTORs should be fully
1917 covered, but it can be extended to support partial, checking adjacent
1918 but not fill the whole, it may need some cost model to define the
1919 boundary to do or not.
1922 undistribute_bitref_for_vector (enum tree_code opcode
,
1923 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1925 if (ops
->length () <= 1)
1928 if (opcode
!= PLUS_EXPR
1929 && opcode
!= MULT_EXPR
1930 && opcode
!= BIT_XOR_EXPR
1931 && opcode
!= BIT_IOR_EXPR
1932 && opcode
!= BIT_AND_EXPR
)
1935 hash_map
<tree
, v_info_ptr
> v_info_map
;
1939 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1940 information into map. */
1941 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1943 enum tree_code dcode
;
1946 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1948 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1949 if (!is_gimple_assign (oe1def
))
1951 dcode
= gimple_assign_rhs_code (oe1def
);
1952 if (dcode
!= BIT_FIELD_REF
|| !is_reassociable_op (oe1def
, dcode
, loop
))
1955 tree rhs
= gimple_assign_rhs1 (oe1def
);
1956 tree vec
= TREE_OPERAND (rhs
, 0);
1957 tree vec_type
= TREE_TYPE (vec
);
1959 if (TREE_CODE (vec
) != SSA_NAME
|| !VECTOR_TYPE_P (vec_type
))
1962 /* Ignore it if target machine can't support this VECTOR type. */
1963 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1966 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1967 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
1970 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
1971 || !is_a
<scalar_mode
> (TYPE_MODE (TREE_TYPE (rhs
))))
1974 /* The type of BIT_FIELD_REF might not be equal to the element type of
1975 the vector. We want to use a vector type with element type the
1976 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1977 if (!useless_type_conversion_p (TREE_TYPE (rhs
), TREE_TYPE (vec_type
)))
1979 machine_mode simd_mode
;
1980 unsigned HOST_WIDE_INT size
, nunits
;
1981 unsigned HOST_WIDE_INT elem_size
1982 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs
)));
1983 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type
)).is_constant (&size
))
1985 if (size
<= elem_size
|| (size
% elem_size
) != 0)
1987 nunits
= size
/ elem_size
;
1988 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs
)),
1989 nunits
).exists (&simd_mode
))
1991 vec_type
= build_vector_type_for_mode (TREE_TYPE (rhs
), simd_mode
);
1993 /* Ignore it if target machine can't support this VECTOR type. */
1994 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1997 /* Check const vector type, constrain BIT_FIELD_REF offset and
1999 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
2002 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type
)),
2003 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec
)))))
2007 tree elem_type
= TREE_TYPE (vec_type
);
2008 unsigned HOST_WIDE_INT elem_size
= tree_to_uhwi (TYPE_SIZE (elem_type
));
2009 if (maybe_ne (bit_field_size (rhs
), elem_size
))
2013 if (!constant_multiple_p (bit_field_offset (rhs
), elem_size
, &idx
))
2016 /* Ignore it if target machine can't support this type of VECTOR
2018 optab op_tab
= optab_for_tree_code (opcode
, vec_type
, optab_vector
);
2019 if (optab_handler (op_tab
, TYPE_MODE (vec_type
)) == CODE_FOR_nothing
)
2023 v_info_ptr
&info
= v_info_map
.get_or_insert (vec
, &existed
);
2027 info
->vec_type
= vec_type
;
2029 else if (!types_compatible_p (vec_type
, info
->vec_type
))
2031 info
->vec
.safe_push (std::make_pair (idx
, i
));
2034 /* At least two VECTOR to combine. */
2035 if (v_info_map
.elements () <= 1)
2037 cleanup_vinfo_map (v_info_map
);
2041 /* Verify all VECTOR candidates by checking two conditions:
2042 1) sorted offsets are adjacent, no holes.
2043 2) can fill the whole VECTOR perfectly.
2044 And add the valid candidates to a vector for further handling. */
2045 auto_vec
<tree
> valid_vecs (v_info_map
.elements ());
2046 for (hash_map
<tree
, v_info_ptr
>::iterator it
= v_info_map
.begin ();
2047 it
!= v_info_map
.end (); ++it
)
2049 tree cand_vec
= (*it
).first
;
2050 v_info_ptr cand_info
= (*it
).second
;
2051 unsigned int num_elems
2052 = TYPE_VECTOR_SUBPARTS (cand_info
->vec_type
).to_constant ();
2053 if (cand_info
->vec
.length () != num_elems
)
2055 sbitmap holes
= sbitmap_alloc (num_elems
);
2056 bitmap_ones (holes
);
2059 FOR_EACH_VEC_ELT (cand_info
->vec
, i
, curr
)
2061 if (!bitmap_bit_p (holes
, curr
->first
))
2067 bitmap_clear_bit (holes
, curr
->first
);
2069 if (valid
&& bitmap_empty_p (holes
))
2070 valid_vecs
.quick_push (cand_vec
);
2071 sbitmap_free (holes
);
2074 /* At least two VECTOR to combine. */
2075 if (valid_vecs
.length () <= 1)
2077 cleanup_vinfo_map (v_info_map
);
2081 valid_vecs
.qsort (sort_by_mach_mode
);
2082 /* Go through all candidates by machine mode order, query the mode_to_total
2083 to get the total number for each mode and skip the single one. */
2084 for (unsigned i
= 0; i
< valid_vecs
.length () - 1; ++i
)
2086 tree tvec
= valid_vecs
[i
];
2087 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (tvec
));
2089 /* Skip modes with only a single candidate. */
2090 if (TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) != mode
)
2093 unsigned int idx
, j
;
2095 tree sum_vec
= tvec
;
2096 v_info_ptr info_ptr
= *(v_info_map
.get (tvec
));
2098 tree vec_type
= info_ptr
->vec_type
;
2100 /* Build the sum for all candidates with same mode. */
2103 sum
= build_and_add_sum (vec_type
, sum_vec
,
2104 valid_vecs
[i
+ 1], opcode
);
2105 if (!useless_type_conversion_p (vec_type
,
2106 TREE_TYPE (valid_vecs
[i
+ 1])))
2108 /* Update the operands only after build_and_add_sum,
2109 so that we don't have to repeat the placement algorithm
2110 of build_and_add_sum. */
2111 gimple_stmt_iterator gsi
= gsi_for_stmt (sum
);
2112 tree vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
,
2114 tree lhs
= make_ssa_name (vec_type
);
2115 gimple
*g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2116 gimple_set_uid (g
, gimple_uid (sum
));
2117 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2118 gimple_assign_set_rhs2 (sum
, lhs
);
2119 if (sum_vec
== tvec
)
2121 vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
, sum_vec
);
2122 lhs
= make_ssa_name (vec_type
);
2123 g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2124 gimple_set_uid (g
, gimple_uid (sum
));
2125 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2126 gimple_assign_set_rhs1 (sum
, lhs
);
2130 sum_vec
= gimple_get_lhs (sum
);
2131 info_ptr
= *(v_info_map
.get (valid_vecs
[i
+ 1]));
2132 gcc_assert (types_compatible_p (vec_type
, info_ptr
->vec_type
));
2133 /* Update those related ops of current candidate VECTOR. */
2134 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2137 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2138 /* Set this then op definition will get DCEd later. */
2139 gimple_set_visited (def
, true);
2140 if (opcode
== PLUS_EXPR
2141 || opcode
== BIT_XOR_EXPR
2142 || opcode
== BIT_IOR_EXPR
)
2143 (*ops
)[idx
]->op
= build_zero_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2144 else if (opcode
== MULT_EXPR
)
2145 (*ops
)[idx
]->op
= build_one_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2148 gcc_assert (opcode
== BIT_AND_EXPR
);
2150 = build_all_ones_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2152 (*ops
)[idx
]->rank
= 0;
2154 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2156 fprintf (dump_file
, "Generating addition -> ");
2157 print_gimple_stmt (dump_file
, sum
, 0);
2161 while ((i
< valid_vecs
.length () - 1)
2162 && TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) == mode
);
2164 /* Referring to first valid VECTOR with this mode, generate the
2165 BIT_FIELD_REF statements accordingly. */
2166 info_ptr
= *(v_info_map
.get (tvec
));
2168 tree elem_type
= TREE_TYPE (vec_type
);
2169 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2172 tree dst
= make_ssa_name (elem_type
);
2173 tree pos
= bitsize_int (elem
->first
2174 * tree_to_uhwi (TYPE_SIZE (elem_type
)));
2175 tree bfr
= build3 (BIT_FIELD_REF
, elem_type
, sum_vec
,
2176 TYPE_SIZE (elem_type
), pos
);
2177 gimple
*gs
= gimple_build_assign (dst
, BIT_FIELD_REF
, bfr
);
2178 insert_stmt_after (gs
, sum
);
2179 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2180 /* Set this then op definition will get DCEd later. */
2181 gimple_set_visited (def
, true);
2182 (*ops
)[idx
]->op
= gimple_assign_lhs (gs
);
2183 (*ops
)[idx
]->rank
= get_rank ((*ops
)[idx
]->op
);
2184 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2186 fprintf (dump_file
, "Generating bit_field_ref -> ");
2187 print_gimple_stmt (dump_file
, gs
, 0);
2192 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2193 fprintf (dump_file
, "undistributiong bit_field_ref for vector done.\n");
2195 cleanup_vinfo_map (v_info_map
);
2200 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2201 expression, examine the other OPS to see if any of them are comparisons
2202 of the same values, which we may be able to combine or eliminate.
2203 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2206 eliminate_redundant_comparison (enum tree_code opcode
,
2207 vec
<operand_entry
*> *ops
,
2208 unsigned int currindex
,
2209 operand_entry
*curr
)
2212 enum tree_code lcode
, rcode
;
2213 gimple
*def1
, *def2
;
2217 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
2220 /* Check that CURR is a comparison. */
2221 if (TREE_CODE (curr
->op
) != SSA_NAME
)
2223 def1
= SSA_NAME_DEF_STMT (curr
->op
);
2224 if (!is_gimple_assign (def1
))
2226 lcode
= gimple_assign_rhs_code (def1
);
2227 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
2229 op1
= gimple_assign_rhs1 (def1
);
2230 op2
= gimple_assign_rhs2 (def1
);
2232 /* Now look for a similar comparison in the remaining OPS. */
2233 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
2237 if (TREE_CODE (oe
->op
) != SSA_NAME
)
2239 def2
= SSA_NAME_DEF_STMT (oe
->op
);
2240 if (!is_gimple_assign (def2
))
2242 rcode
= gimple_assign_rhs_code (def2
);
2243 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
2246 /* If we got here, we have a match. See if we can combine the
2248 tree type
= TREE_TYPE (gimple_assign_lhs (def1
));
2249 if (opcode
== BIT_IOR_EXPR
)
2250 t
= maybe_fold_or_comparisons (type
,
2252 rcode
, gimple_assign_rhs1 (def2
),
2253 gimple_assign_rhs2 (def2
));
2255 t
= maybe_fold_and_comparisons (type
,
2257 rcode
, gimple_assign_rhs1 (def2
),
2258 gimple_assign_rhs2 (def2
));
2262 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2263 always give us a boolean_type_node value back. If the original
2264 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2265 we need to convert. */
2266 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
2268 if (!fold_convertible_p (TREE_TYPE (curr
->op
), t
))
2270 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
2273 if (TREE_CODE (t
) != INTEGER_CST
2274 && !operand_equal_p (t
, curr
->op
, 0))
2276 enum tree_code subcode
;
2277 tree newop1
, newop2
;
2278 if (!COMPARISON_CLASS_P (t
))
2280 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2281 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2282 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2283 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
2285 if (lcode
== TREE_CODE (t
)
2286 && operand_equal_p (op1
, newop1
, 0)
2287 && operand_equal_p (op2
, newop2
, 0))
2289 else if ((TREE_CODE (newop1
) == SSA_NAME
2290 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1
))
2291 || (TREE_CODE (newop2
) == SSA_NAME
2292 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2
)))
2296 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2298 fprintf (dump_file
, "Equivalence: ");
2299 print_generic_expr (dump_file
, curr
->op
);
2300 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
2301 print_generic_expr (dump_file
, oe
->op
);
2302 fprintf (dump_file
, " -> ");
2303 print_generic_expr (dump_file
, t
);
2304 fprintf (dump_file
, "\n");
2307 /* Now we can delete oe, as it has been subsumed by the new combined
2309 ops
->ordered_remove (i
);
2310 reassociate_stats
.ops_eliminated
++;
2312 /* If t is the same as curr->op, we're done. Otherwise we must
2313 replace curr->op with t. Special case is if we got a constant
2314 back, in which case we add it to the end instead of in place of
2315 the current entry. */
2316 if (TREE_CODE (t
) == INTEGER_CST
)
2318 ops
->ordered_remove (currindex
);
2319 add_to_ops_vec (ops
, t
);
2321 else if (!operand_equal_p (t
, curr
->op
, 0))
2324 enum tree_code subcode
;
2327 gcc_assert (COMPARISON_CLASS_P (t
));
2328 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2329 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2330 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2331 gcc_checking_assert (is_gimple_val (newop1
)
2332 && is_gimple_val (newop2
));
2333 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
2334 curr
->op
= gimple_get_lhs (sum
);
2343 /* Transform repeated addition of same values into multiply with
2346 transform_add_to_multiply (vec
<operand_entry
*> *ops
)
2349 tree op
= NULL_TREE
;
2351 int i
, start
= -1, end
= 0, count
= 0;
2352 auto_vec
<std::pair
<int, int> > indxs
;
2353 bool changed
= false;
2355 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2356 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2357 || !flag_unsafe_math_optimizations
))
2360 /* Look for repeated operands. */
2361 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2369 else if (operand_equal_p (oe
->op
, op
, 0))
2377 indxs
.safe_push (std::make_pair (start
, end
));
2385 indxs
.safe_push (std::make_pair (start
, end
));
2387 for (j
= indxs
.length () - 1; j
>= 0; --j
)
2389 /* Convert repeated operand addition to multiplication. */
2390 start
= indxs
[j
].first
;
2391 end
= indxs
[j
].second
;
2392 op
= (*ops
)[start
]->op
;
2393 count
= end
- start
+ 1;
2394 for (i
= end
; i
>= start
; --i
)
2395 ops
->unordered_remove (i
);
2396 tree tmp
= make_ssa_name (TREE_TYPE (op
));
2397 tree cst
= build_int_cst (integer_type_node
, count
);
2399 = gimple_build_assign (tmp
, MULT_EXPR
,
2400 op
, fold_convert (TREE_TYPE (op
), cst
));
2401 gimple_set_visited (mul_stmt
, true);
2402 add_to_ops_vec (ops
, tmp
, mul_stmt
);
2410 /* Perform various identities and other optimizations on the list of
2411 operand entries, stored in OPS. The tree code for the binary
2412 operation between all the operands is OPCODE. */
2415 optimize_ops_list (enum tree_code opcode
,
2416 vec
<operand_entry
*> *ops
)
2418 unsigned int length
= ops
->length ();
2421 operand_entry
*oelast
= NULL
;
2422 bool iterate
= false;
2427 oelast
= ops
->last ();
2429 /* If the last two are constants, pop the constants off, merge them
2430 and try the next two. */
2431 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
2433 operand_entry
*oelm1
= (*ops
)[length
- 2];
2435 if (oelm1
->rank
== 0
2436 && is_gimple_min_invariant (oelm1
->op
)
2437 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
2438 TREE_TYPE (oelast
->op
)))
2440 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
2441 oelm1
->op
, oelast
->op
);
2443 if (folded
&& is_gimple_min_invariant (folded
))
2445 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2446 fprintf (dump_file
, "Merging constants\n");
2451 add_to_ops_vec (ops
, folded
);
2452 reassociate_stats
.constants_eliminated
++;
2454 optimize_ops_list (opcode
, ops
);
2460 eliminate_using_constants (opcode
, ops
);
2463 for (i
= 0; ops
->iterate (i
, &oe
);)
2467 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
2469 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
2470 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
2471 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
2484 optimize_ops_list (opcode
, ops
);
2487 /* The following functions are subroutines to optimize_range_tests and allow
2488 it to try to change a logical combination of comparisons into a range
2492 X == 2 || X == 5 || X == 3 || X == 4
2496 (unsigned) (X - 2) <= 3
2498 For more information see comments above fold_test_range in fold-const.cc,
2499 this implementation is for GIMPLE. */
2503 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2506 dump_range_entry (FILE *file
, struct range_entry
*r
, bool skip_exp
)
2509 print_generic_expr (file
, r
->exp
);
2510 fprintf (file
, " %c[", r
->in_p
? '+' : '-');
2511 print_generic_expr (file
, r
->low
);
2513 print_generic_expr (file
, r
->high
);
2517 /* Dump the range entry R to STDERR. */
2520 debug_range_entry (struct range_entry
*r
)
2522 dump_range_entry (stderr
, r
, false);
2523 fputc ('\n', stderr
);
2526 /* This is similar to make_range in fold-const.cc, but on top of
2527 GIMPLE instead of trees. If EXP is non-NULL, it should be
2528 an SSA_NAME and STMT argument is ignored, otherwise STMT
2529 argument should be a GIMPLE_COND. */
2532 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
2536 bool is_bool
, strict_overflow_p
;
2540 r
->strict_overflow_p
= false;
2542 r
->high
= NULL_TREE
;
2543 if (exp
!= NULL_TREE
2544 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
2547 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2548 and see if we can refine the range. Some of the cases below may not
2549 happen, but it doesn't seem worth worrying about this. We "continue"
2550 the outer loop when we've changed something; otherwise we "break"
2551 the switch, which will "break" the while. */
2552 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
2555 strict_overflow_p
= false;
2557 if (exp
== NULL_TREE
)
2559 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
2561 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
2566 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
2571 enum tree_code code
;
2572 tree arg0
, arg1
, exp_type
;
2576 if (exp
!= NULL_TREE
)
2578 if (TREE_CODE (exp
) != SSA_NAME
2579 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
2582 stmt
= SSA_NAME_DEF_STMT (exp
);
2583 if (!is_gimple_assign (stmt
))
2586 code
= gimple_assign_rhs_code (stmt
);
2587 arg0
= gimple_assign_rhs1 (stmt
);
2588 arg1
= gimple_assign_rhs2 (stmt
);
2589 exp_type
= TREE_TYPE (exp
);
2593 code
= gimple_cond_code (stmt
);
2594 arg0
= gimple_cond_lhs (stmt
);
2595 arg1
= gimple_cond_rhs (stmt
);
2596 exp_type
= boolean_type_node
;
2599 if (TREE_CODE (arg0
) != SSA_NAME
2600 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0
))
2602 loc
= gimple_location (stmt
);
2606 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
2607 /* Ensure the range is either +[-,0], +[0,0],
2608 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2609 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2610 or similar expression of unconditional true or
2611 false, it should not be negated. */
2612 && ((high
&& integer_zerop (high
))
2613 || (low
&& integer_onep (low
))))
2626 if ((TYPE_PRECISION (exp_type
) == 1
2627 || TREE_CODE (exp_type
) == BOOLEAN_TYPE
)
2628 && TYPE_PRECISION (TREE_TYPE (arg0
)) > 1)
2631 else if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
2633 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
2638 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2653 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2655 &strict_overflow_p
);
2656 if (nexp
!= NULL_TREE
)
2659 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2672 r
->strict_overflow_p
= strict_overflow_p
;
2676 /* Comparison function for qsort. Sort entries
2677 without SSA_NAME exp first, then with SSA_NAMEs sorted
2678 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2679 by increasing ->low and if ->low is the same, by increasing
2680 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2684 range_entry_cmp (const void *a
, const void *b
)
2686 const struct range_entry
*p
= (const struct range_entry
*) a
;
2687 const struct range_entry
*q
= (const struct range_entry
*) b
;
2689 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2691 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2693 /* Group range_entries for the same SSA_NAME together. */
2694 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2696 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2698 /* If ->low is different, NULL low goes first, then by
2700 if (p
->low
!= NULL_TREE
)
2702 if (q
->low
!= NULL_TREE
)
2704 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2706 if (tem
&& integer_onep (tem
))
2708 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2710 if (tem
&& integer_onep (tem
))
2716 else if (q
->low
!= NULL_TREE
)
2718 /* If ->high is different, NULL high goes last, before that by
2720 if (p
->high
!= NULL_TREE
)
2722 if (q
->high
!= NULL_TREE
)
2724 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2726 if (tem
&& integer_onep (tem
))
2728 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2730 if (tem
&& integer_onep (tem
))
2736 else if (q
->high
!= NULL_TREE
)
2738 /* If both ranges are the same, sort below by ascending idx. */
2743 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2746 if (p
->idx
< q
->idx
)
2750 gcc_checking_assert (p
->idx
> q
->idx
);
2755 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2756 insert needed statements BEFORE or after GSI. */
2759 force_into_ssa_name (gimple_stmt_iterator
*gsi
, tree expr
, bool before
)
2761 enum gsi_iterator_update m
= before
? GSI_SAME_STMT
: GSI_CONTINUE_LINKING
;
2762 tree ret
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
, before
, m
);
2763 if (TREE_CODE (ret
) != SSA_NAME
)
2765 gimple
*g
= gimple_build_assign (make_ssa_name (TREE_TYPE (ret
)), ret
);
2767 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2769 gsi_insert_after (gsi
, g
, GSI_CONTINUE_LINKING
);
2770 ret
= gimple_assign_lhs (g
);
2775 /* Helper routine of optimize_range_test.
2776 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2777 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2778 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2779 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2780 an array of COUNT pointers to other ranges. Return
2781 true if the range merge has been successful.
2782 If OPCODE is ERROR_MARK, this is called from within
2783 maybe_optimize_range_tests and is performing inter-bb range optimization.
2784 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2788 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2789 struct range_entry
**otherrangep
,
2790 unsigned int count
, enum tree_code opcode
,
2791 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2792 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2794 unsigned int idx
= range
->idx
;
2795 struct range_entry
*swap_with
= NULL
;
2796 basic_block rewrite_bb_first
= NULL
, rewrite_bb_last
= NULL
;
2797 if (opcode
== ERROR_MARK
)
2799 /* For inter-bb range test optimization, pick from the range tests
2800 the one which is tested in the earliest condition (one dominating
2801 the others), because otherwise there could be some UB (e.g. signed
2802 overflow) in following bbs that we'd expose which wasn't there in
2803 the original program. See PR104196. */
2804 basic_block orig_range_bb
= BASIC_BLOCK_FOR_FN (cfun
, (*ops
)[idx
]->id
);
2805 basic_block range_bb
= orig_range_bb
;
2806 for (unsigned int i
= 0; i
< count
; i
++)
2808 struct range_entry
*this_range
;
2810 this_range
= otherrange
+ i
;
2812 this_range
= otherrangep
[i
];
2813 operand_entry
*oe
= (*ops
)[this_range
->idx
];
2814 basic_block this_bb
= BASIC_BLOCK_FOR_FN (cfun
, oe
->id
);
2815 if (range_bb
!= this_bb
2816 && dominated_by_p (CDI_DOMINATORS
, range_bb
, this_bb
))
2818 swap_with
= this_range
;
2820 idx
= this_range
->idx
;
2823 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2824 only defined in later blocks. In this case we can't move the
2825 merged comparison earlier, so instead check if there are any stmts
2826 that might trigger signed integer overflow in between and rewrite
2827 them. But only after we check if the optimization is possible. */
2828 if (seq
&& swap_with
)
2830 rewrite_bb_first
= range_bb
;
2831 rewrite_bb_last
= orig_range_bb
;
2836 operand_entry
*oe
= (*ops
)[idx
];
2838 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2839 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2840 location_t loc
= gimple_location (stmt
);
2841 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2842 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2844 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2845 gimple_stmt_iterator gsi
;
2846 unsigned int i
, uid
;
2848 if (tem
== NULL_TREE
)
2851 /* If op is default def SSA_NAME, there is no place to insert the
2852 new comparison. Give up, unless we can use OP itself as the
2854 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2856 if (op
== range
->exp
2857 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2858 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2860 || (TREE_CODE (tem
) == EQ_EXPR
2861 && TREE_OPERAND (tem
, 0) == op
2862 && integer_onep (TREE_OPERAND (tem
, 1))))
2863 && opcode
!= BIT_IOR_EXPR
2864 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2874 std::swap (range
->idx
, swap_with
->idx
);
2876 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2877 warning_at (loc
, OPT_Wstrict_overflow
,
2878 "assuming signed overflow does not occur "
2879 "when simplifying range test");
2881 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2883 struct range_entry
*r
;
2884 fprintf (dump_file
, "Optimizing range tests ");
2885 dump_range_entry (dump_file
, range
, false);
2886 for (i
= 0; i
< count
; i
++)
2893 && r
->exp
!= range
->exp
2894 && TREE_CODE (r
->exp
) == SSA_NAME
)
2896 fprintf (dump_file
, " and ");
2897 dump_range_entry (dump_file
, r
, false);
2901 fprintf (dump_file
, " and");
2902 dump_range_entry (dump_file
, r
, true);
2905 fprintf (dump_file
, "\n into ");
2906 print_generic_expr (dump_file
, tem
);
2907 fprintf (dump_file
, "\n");
2910 /* In inter-bb range optimization mode, if we have a seq, we can't
2911 move the merged comparison to the earliest bb from the comparisons
2912 being replaced, so instead rewrite stmts that could trigger signed
2913 integer overflow. */
2914 for (basic_block bb
= rewrite_bb_last
;
2915 bb
!= rewrite_bb_first
; bb
= single_pred (bb
))
2916 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
);
2917 !gsi_end_p (gsi
); gsi_next (&gsi
))
2919 gimple
*stmt
= gsi_stmt (gsi
);
2920 if (is_gimple_assign (stmt
))
2921 if (tree lhs
= gimple_assign_lhs (stmt
))
2922 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2923 || POINTER_TYPE_P (TREE_TYPE (lhs
)))
2924 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
)))
2926 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2927 if (arith_code_with_undefined_signed_overflow (code
))
2929 gimple_stmt_iterator gsip
= gsi
;
2930 gimple_stmt_iterator gsin
= gsi
;
2933 rewrite_to_defined_overflow (stmt
, true);
2934 unsigned uid
= gimple_uid (stmt
);
2935 if (gsi_end_p (gsip
))
2936 gsip
= gsi_after_labels (bb
);
2939 for (; gsi_stmt (gsip
) != gsi_stmt (gsin
);
2941 gimple_set_uid (gsi_stmt (gsip
), uid
);
2946 if (opcode
== BIT_IOR_EXPR
2947 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2948 tem
= invert_truthvalue_loc (loc
, tem
);
2950 tem
= fold_convert_loc (loc
, optype
, tem
);
2953 gsi
= gsi_for_stmt (stmt
);
2954 uid
= gimple_uid (stmt
);
2962 gcc_checking_assert (tem
== op
);
2963 /* In rare cases range->exp can be equal to lhs of stmt.
2964 In that case we have to insert after the stmt rather then before
2965 it. If stmt is a PHI, insert it at the start of the basic block. */
2966 else if (op
!= range
->exp
)
2968 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2969 tem
= force_into_ssa_name (&gsi
, tem
, true);
2972 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2974 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2975 tem
= force_into_ssa_name (&gsi
, tem
, false);
2979 gsi
= gsi_after_labels (gimple_bb (stmt
));
2980 if (!gsi_end_p (gsi
))
2981 uid
= gimple_uid (gsi_stmt (gsi
));
2984 gsi
= gsi_start_bb (gimple_bb (stmt
));
2986 while (!gsi_end_p (gsi
))
2988 uid
= gimple_uid (gsi_stmt (gsi
));
2992 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2993 tem
= force_into_ssa_name (&gsi
, tem
, true);
2994 if (gsi_end_p (gsi
))
2995 gsi
= gsi_last_bb (gimple_bb (stmt
));
2999 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
3000 if (gimple_uid (gsi_stmt (gsi
)))
3003 gimple_set_uid (gsi_stmt (gsi
), uid
);
3010 range
->strict_overflow_p
= false;
3012 for (i
= 0; i
< count
; i
++)
3015 range
= otherrange
+ i
;
3017 range
= otherrangep
[i
];
3018 oe
= (*ops
)[range
->idx
];
3019 /* Now change all the other range test immediate uses, so that
3020 those tests will be optimized away. */
3021 if (opcode
== ERROR_MARK
)
3024 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
3025 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
3027 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
3028 ? boolean_false_node
: boolean_true_node
);
3031 oe
->op
= error_mark_node
;
3032 range
->exp
= NULL_TREE
;
3033 range
->low
= NULL_TREE
;
3034 range
->high
= NULL_TREE
;
3039 /* Optimize X == CST1 || X == CST2
3040 if popcount (CST1 ^ CST2) == 1 into
3041 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3042 Similarly for ranges. E.g.
3043 X != 2 && X != 3 && X != 10 && X != 11
3044 will be transformed by the previous optimization into
3045 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3046 and this loop can transform that into
3047 !(((X & ~8) - 2U) <= 1U). */
3050 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
3051 tree lowi
, tree lowj
, tree highi
, tree highj
,
3052 vec
<operand_entry
*> *ops
,
3053 struct range_entry
*rangei
,
3054 struct range_entry
*rangej
)
3056 tree lowxor
, highxor
, tem
, exp
;
3057 /* Check lowi ^ lowj == highi ^ highj and
3058 popcount (lowi ^ lowj) == 1. */
3059 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
3060 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
3062 if (!integer_pow2p (lowxor
))
3064 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
3065 if (!tree_int_cst_equal (lowxor
, highxor
))
3069 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
3070 int prec
= GET_MODE_PRECISION (mode
);
3071 if (TYPE_PRECISION (type
) < prec
3072 || (wi::to_wide (TYPE_MIN_VALUE (type
))
3073 != wi::min_value (prec
, TYPE_SIGN (type
)))
3074 || (wi::to_wide (TYPE_MAX_VALUE (type
))
3075 != wi::max_value (prec
, TYPE_SIGN (type
))))
3077 type
= build_nonstandard_integer_type (prec
, TYPE_UNSIGNED (type
));
3078 exp
= fold_convert (type
, exp
);
3079 lowxor
= fold_convert (type
, lowxor
);
3080 lowi
= fold_convert (type
, lowi
);
3081 highi
= fold_convert (type
, highi
);
3083 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
3084 exp
= fold_build2 (BIT_AND_EXPR
, type
, exp
, tem
);
3085 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
3086 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
3087 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
3088 NULL
, rangei
->in_p
, lowj
, highj
,
3089 rangei
->strict_overflow_p
3090 || rangej
->strict_overflow_p
))
3095 /* Optimize X == CST1 || X == CST2
3096 if popcount (CST2 - CST1) == 1 into
3097 ((X - CST1) & ~(CST2 - CST1)) == 0.
3098 Similarly for ranges. E.g.
3099 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3100 || X == 75 || X == 45
3101 will be transformed by the previous optimization into
3102 (X - 43U) <= 3U || (X - 75U) <= 3U
3103 and this loop can transform that into
3104 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3106 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
3107 tree lowi
, tree lowj
, tree highi
, tree highj
,
3108 vec
<operand_entry
*> *ops
,
3109 struct range_entry
*rangei
,
3110 struct range_entry
*rangej
)
3112 tree tem1
, tem2
, mask
;
3113 /* Check highi - lowi == highj - lowj. */
3114 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
3115 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
3117 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
3118 if (!tree_int_cst_equal (tem1
, tem2
))
3120 /* Check popcount (lowj - lowi) == 1. */
3121 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
3122 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
3124 if (!integer_pow2p (tem1
))
3127 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
3128 int prec
= GET_MODE_PRECISION (mode
);
3129 if (TYPE_PRECISION (type
) < prec
3130 || (wi::to_wide (TYPE_MIN_VALUE (type
))
3131 != wi::min_value (prec
, TYPE_SIGN (type
)))
3132 || (wi::to_wide (TYPE_MAX_VALUE (type
))
3133 != wi::max_value (prec
, TYPE_SIGN (type
))))
3134 type
= build_nonstandard_integer_type (prec
, 1);
3136 type
= unsigned_type_for (type
);
3137 tem1
= fold_convert (type
, tem1
);
3138 tem2
= fold_convert (type
, tem2
);
3139 lowi
= fold_convert (type
, lowi
);
3140 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
3141 tem1
= fold_build2 (MINUS_EXPR
, type
,
3142 fold_convert (type
, rangei
->exp
), lowi
);
3143 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
3144 lowj
= build_int_cst (type
, 0);
3145 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
3146 NULL
, rangei
->in_p
, lowj
, tem2
,
3147 rangei
->strict_overflow_p
3148 || rangej
->strict_overflow_p
))
3153 /* It does some common checks for function optimize_range_tests_xor and
3154 optimize_range_tests_diff.
3155 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3156 Else it calls optimize_range_tests_diff. */
3159 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
3160 bool optimize_xor
, vec
<operand_entry
*> *ops
,
3161 struct range_entry
*ranges
)
3164 bool any_changes
= false;
3165 for (i
= first
; i
< length
; i
++)
3167 tree lowi
, highi
, lowj
, highj
, type
, tem
;
3169 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3171 type
= TREE_TYPE (ranges
[i
].exp
);
3172 if (!INTEGRAL_TYPE_P (type
))
3174 lowi
= ranges
[i
].low
;
3175 if (lowi
== NULL_TREE
)
3176 lowi
= TYPE_MIN_VALUE (type
);
3177 highi
= ranges
[i
].high
;
3178 if (highi
== NULL_TREE
)
3180 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
3183 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
3185 lowj
= ranges
[j
].low
;
3186 if (lowj
== NULL_TREE
)
3188 highj
= ranges
[j
].high
;
3189 if (highj
== NULL_TREE
)
3190 highj
= TYPE_MAX_VALUE (type
);
3191 /* Check lowj > highi. */
3192 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
3194 if (tem
== NULL_TREE
|| !integer_onep (tem
))
3197 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
3199 ranges
+ i
, ranges
+ j
);
3201 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
3203 ranges
+ i
, ranges
+ j
);
3214 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3215 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3216 EXP on success, NULL otherwise. */
3219 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
3220 wide_int
*mask
, tree
*totallowp
)
3222 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
3223 if (tem
== NULL_TREE
3224 || TREE_CODE (tem
) != INTEGER_CST
3225 || TREE_OVERFLOW (tem
)
3226 || tree_int_cst_sgn (tem
) == -1
3227 || compare_tree_int (tem
, prec
) != -1)
3230 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
3231 *mask
= wi::shifted_mask (0, max
, false, prec
);
3232 if (TREE_CODE (exp
) == BIT_AND_EXPR
3233 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3235 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
3236 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
3237 if (wi::popcount (msk
) == 1
3238 && wi::ltu_p (msk
, prec
- max
))
3240 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
3241 max
+= msk
.to_uhwi ();
3242 exp
= TREE_OPERAND (exp
, 0);
3243 if (integer_zerop (low
)
3244 && TREE_CODE (exp
) == PLUS_EXPR
3245 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3247 tree ret
= TREE_OPERAND (exp
, 0);
3250 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
3251 TYPE_PRECISION (TREE_TYPE (low
))));
3252 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
3258 else if (!tree_int_cst_lt (totallow
, tbias
))
3260 bias
= wi::to_widest (tbias
);
3261 bias
-= wi::to_widest (totallow
);
3262 if (bias
>= 0 && bias
< prec
- max
)
3264 *mask
= wi::lshift (*mask
, bias
);
3272 if (!tree_int_cst_lt (totallow
, low
))
3274 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
3275 if (tem
== NULL_TREE
3276 || TREE_CODE (tem
) != INTEGER_CST
3277 || TREE_OVERFLOW (tem
)
3278 || compare_tree_int (tem
, prec
- max
) == 1)
3281 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
3285 /* Attempt to optimize small range tests using bit test.
3287 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3288 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3289 has been by earlier optimizations optimized into:
3290 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3291 As all the 43 through 82 range is less than 64 numbers,
3292 for 64-bit word targets optimize that into:
3293 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3296 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
3297 vec
<operand_entry
*> *ops
,
3298 struct range_entry
*ranges
)
3301 bool any_changes
= false;
3302 int prec
= GET_MODE_BITSIZE (word_mode
);
3303 auto_vec
<struct range_entry
*, 64> candidates
;
3305 for (i
= first
; i
< length
- 1; i
++)
3307 tree lowi
, highi
, lowj
, highj
, type
;
3309 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3311 type
= TREE_TYPE (ranges
[i
].exp
);
3312 if (!INTEGRAL_TYPE_P (type
))
3314 lowi
= ranges
[i
].low
;
3315 if (lowi
== NULL_TREE
)
3316 lowi
= TYPE_MIN_VALUE (type
);
3317 highi
= ranges
[i
].high
;
3318 if (highi
== NULL_TREE
)
3321 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
3322 highi
, &mask
, &lowi
);
3323 if (exp
== NULL_TREE
)
3325 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3326 candidates
.truncate (0);
3327 int end
= MIN (i
+ 64, length
);
3328 for (j
= i
+ 1; j
< end
; j
++)
3331 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
3333 if (ranges
[j
].exp
== exp
)
3335 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
3337 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
3340 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
3342 exp2
= TREE_OPERAND (exp2
, 0);
3352 lowj
= ranges
[j
].low
;
3353 if (lowj
== NULL_TREE
)
3355 highj
= ranges
[j
].high
;
3356 if (highj
== NULL_TREE
)
3357 highj
= TYPE_MAX_VALUE (type
);
3359 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
3360 highj
, &mask2
, NULL
);
3364 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3365 candidates
.safe_push (&ranges
[j
]);
3368 /* If every possible relative value of the expression is a valid shift
3369 amount, then we can merge the entry test in the bit test. In this
3370 case, if we would need otherwise 2 or more comparisons, then use
3371 the bit test; in the other cases, the threshold is 3 comparisons. */
3372 bool entry_test_needed
;
3374 if (TREE_CODE (exp
) == SSA_NAME
3375 && get_range_query (cfun
)->range_of_expr (r
, exp
)
3376 && !r
.undefined_p ()
3378 && wi::leu_p (r
.upper_bound () - r
.lower_bound (), prec
- 1))
3380 wide_int min
= r
.lower_bound ();
3381 wide_int ilowi
= wi::to_wide (lowi
);
3382 if (wi::lt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3384 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3385 mask
= wi::lshift (mask
, ilowi
- min
);
3387 else if (wi::gt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3389 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3390 mask
= wi::lrshift (mask
, min
- ilowi
);
3392 entry_test_needed
= false;
3395 entry_test_needed
= true;
3396 if (candidates
.length () >= (entry_test_needed
? 2 : 1))
3398 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
3399 wi::to_widest (lowi
)
3400 + prec
- 1 - wi::clz (mask
));
3401 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3403 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
3404 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3406 location_t loc
= gimple_location (stmt
);
3407 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
3409 /* See if it isn't cheaper to pretend the minimum value of the
3410 range is 0, if maximum value is small enough.
3411 We can avoid then subtraction of the minimum value, but the
3412 mask constant could be perhaps more expensive. */
3413 if (compare_tree_int (lowi
, 0) > 0
3414 && compare_tree_int (high
, prec
) < 0)
3417 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
3418 rtx reg
= gen_raw_REG (word_mode
, 10000);
3419 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
3420 cost_diff
= set_src_cost (gen_rtx_PLUS (word_mode
, reg
,
3422 word_mode
, speed_p
);
3423 rtx r
= immed_wide_int_const (mask
, word_mode
);
3424 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3425 word_mode
, speed_p
);
3426 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
3427 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3428 word_mode
, speed_p
);
3431 mask
= wi::lshift (mask
, m
);
3432 lowi
= build_zero_cst (TREE_TYPE (lowi
));
3437 if (entry_test_needed
)
3439 tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
3441 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
3446 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
3447 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
3448 fold_convert_loc (loc
, etype
, exp
),
3449 fold_convert_loc (loc
, etype
, lowi
));
3450 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
3451 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
3452 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
3453 build_int_cst (word_type
, 1), exp
);
3454 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
3455 wide_int_to_tree (word_type
, mask
));
3456 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
3457 build_zero_cst (word_type
));
3458 if (is_gimple_val (exp
))
3461 /* The shift might have undefined behavior if TEM is true,
3462 but reassociate_bb isn't prepared to have basic blocks
3463 split when it is running. So, temporarily emit a code
3464 with BIT_IOR_EXPR instead of &&, and fix it up in
3466 gimple_seq seq
= NULL
;
3469 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
3470 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
3471 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
3474 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
3475 gimple_seq_add_seq_without_update (&seq
, seq2
);
3476 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
3477 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
3480 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
3481 BIT_IOR_EXPR
, tem
, exp
);
3482 gimple_set_location (g
, loc
);
3483 gimple_seq_add_stmt_without_update (&seq
, g
);
3484 exp
= gimple_assign_lhs (g
);
3486 tree val
= build_zero_cst (optype
);
3487 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
3488 candidates
.length (), opcode
, ops
, exp
,
3489 seq
, false, val
, val
, strict_overflow_p
))
3493 reassoc_branch_fixups
.safe_push (tem
);
3496 gimple_seq_discard (seq
);
3502 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3503 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3504 Also, handle x < C && y < C && z < C where C is power of two as
3505 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3506 as (x | y | z) < 0. */
3509 optimize_range_tests_cmp_bitwise (enum tree_code opcode
, int first
, int length
,
3510 vec
<operand_entry
*> *ops
,
3511 struct range_entry
*ranges
)
3515 bool any_changes
= false;
3516 auto_vec
<int, 128> buckets
;
3517 auto_vec
<int, 32> chains
;
3518 auto_vec
<struct range_entry
*, 32> candidates
;
3520 for (i
= first
; i
< length
; i
++)
3524 if (ranges
[i
].exp
== NULL_TREE
3525 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3526 || TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) <= 1
3527 || TREE_CODE (TREE_TYPE (ranges
[i
].exp
)) == BOOLEAN_TYPE
)
3530 if (ranges
[i
].low
!= NULL_TREE
3531 && ranges
[i
].high
!= NULL_TREE
3533 && tree_int_cst_equal (ranges
[i
].low
, ranges
[i
].high
))
3535 idx
= !integer_zerop (ranges
[i
].low
);
3536 if (idx
&& !integer_all_onesp (ranges
[i
].low
))
3539 else if (ranges
[i
].high
!= NULL_TREE
3540 && TREE_CODE (ranges
[i
].high
) == INTEGER_CST
3543 wide_int w
= wi::to_wide (ranges
[i
].high
);
3544 int prec
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
));
3545 int l
= wi::clz (w
);
3549 || w
!= wi::mask (prec
- l
, false, prec
))
3551 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3552 && ranges
[i
].low
== NULL_TREE
)
3554 && integer_zerop (ranges
[i
].low
))))
3557 else if (ranges
[i
].high
== NULL_TREE
3558 && ranges
[i
].low
!= NULL_TREE
3559 /* Perform this optimization only in the last
3560 reassoc pass, as it interferes with the reassociation
3561 itself or could also with VRP etc. which might not
3562 be able to virtually undo the optimization. */
3563 && !reassoc_insert_powi_p
3564 && !TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3565 && integer_zerop (ranges
[i
].low
))
3570 b
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) * 4 + idx
;
3571 if (buckets
.length () <= b
)
3572 buckets
.safe_grow_cleared (b
+ 1, true);
3573 if (chains
.length () <= (unsigned) i
)
3574 chains
.safe_grow (i
+ 1, true);
3575 chains
[i
] = buckets
[b
];
3579 FOR_EACH_VEC_ELT (buckets
, b
, i
)
3580 if (i
&& chains
[i
- 1])
3585 /* When ranges[X - 1].high + 1 is a power of two,
3586 we need to process the same bucket up to
3587 precision - 1 times, each time split the entries
3588 with the same high bound into one chain and the
3589 rest into another one to be processed later. */
3592 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3594 if (tree_int_cst_equal (ranges
[i
- 1].high
,
3595 ranges
[j
- 1].high
))
3597 chains
[this_prev
- 1] = j
;
3600 else if (other_prev
== 0)
3607 chains
[other_prev
- 1] = j
;
3611 chains
[this_prev
- 1] = 0;
3613 chains
[other_prev
- 1] = 0;
3614 if (chains
[i
- 1] == 0)
3621 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3623 gimple
*gk
= SSA_NAME_DEF_STMT (ranges
[k
- 1].exp
);
3624 gimple
*gj
= SSA_NAME_DEF_STMT (ranges
[j
- 1].exp
);
3625 if (reassoc_stmt_dominates_stmt_p (gk
, gj
))
3628 tree type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3629 tree type2
= NULL_TREE
;
3630 bool strict_overflow_p
= false;
3631 candidates
.truncate (0);
3632 if (POINTER_TYPE_P (type1
) || TREE_CODE (type1
) == OFFSET_TYPE
)
3633 type1
= pointer_sized_int_node
;
3634 for (j
= i
; j
; j
= chains
[j
- 1])
3636 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3637 strict_overflow_p
|= ranges
[j
- 1].strict_overflow_p
;
3638 if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == OFFSET_TYPE
)
3639 type
= pointer_sized_int_node
;
3642 /* For the signed < 0 cases, the types should be
3643 really compatible (all signed with the same precision,
3644 instead put ranges that have different in_p from
3646 if (!useless_type_conversion_p (type1
, type
))
3648 if (ranges
[j
- 1].in_p
!= ranges
[k
- 1].in_p
)
3649 candidates
.safe_push (&ranges
[j
- 1]);
3654 || useless_type_conversion_p (type1
, type
))
3656 else if (type2
== NULL_TREE
3657 || useless_type_conversion_p (type2
, type
))
3659 if (type2
== NULL_TREE
)
3661 candidates
.safe_push (&ranges
[j
- 1]);
3664 unsigned l
= candidates
.length ();
3665 for (j
= i
; j
; j
= chains
[j
- 1])
3667 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3670 if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == OFFSET_TYPE
)
3671 type
= pointer_sized_int_node
;
3674 if (!useless_type_conversion_p (type1
, type
))
3676 if (ranges
[j
- 1].in_p
== ranges
[k
- 1].in_p
)
3677 candidates
.safe_push (&ranges
[j
- 1]);
3680 if (useless_type_conversion_p (type1
, type
))
3682 else if (type2
== NULL_TREE
3683 || useless_type_conversion_p (type2
, type
))
3685 candidates
.safe_push (&ranges
[j
- 1]);
3687 gimple_seq seq
= NULL
;
3688 tree op
= NULL_TREE
;
3690 struct range_entry
*r
;
3691 candidates
.safe_push (&ranges
[k
- 1]);
3692 FOR_EACH_VEC_ELT (candidates
, id
, r
)
3695 enum tree_code code
;
3702 || POINTER_TYPE_P (TREE_TYPE (op
))
3703 || TREE_CODE (TREE_TYPE (op
)) == OFFSET_TYPE
)
3705 code
= (b
% 4) == 3 ? BIT_NOT_EXPR
: NOP_EXPR
;
3706 tree type3
= id
>= l
? type1
: pointer_sized_int_node
;
3707 if (code
== BIT_NOT_EXPR
3708 && TREE_CODE (TREE_TYPE (op
)) == OFFSET_TYPE
)
3710 g
= gimple_build_assign (make_ssa_name (type3
),
3712 gimple_seq_add_stmt_without_update (&seq
, g
);
3713 op
= gimple_assign_lhs (g
);
3715 g
= gimple_build_assign (make_ssa_name (type3
), code
, op
);
3716 gimple_seq_add_stmt_without_update (&seq
, g
);
3717 op
= gimple_assign_lhs (g
);
3719 tree type
= TREE_TYPE (r
->exp
);
3721 if (POINTER_TYPE_P (type
)
3722 || TREE_CODE (type
) == OFFSET_TYPE
3723 || (id
>= l
&& !useless_type_conversion_p (type1
, type
)))
3725 tree type3
= id
>= l
? type1
: pointer_sized_int_node
;
3726 g
= gimple_build_assign (make_ssa_name (type3
), NOP_EXPR
, exp
);
3727 gimple_seq_add_stmt_without_update (&seq
, g
);
3728 exp
= gimple_assign_lhs (g
);
3731 code
= r
->in_p
? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3733 code
= (b
% 4) == 1 ? BIT_AND_EXPR
: BIT_IOR_EXPR
;
3734 g
= gimple_build_assign (make_ssa_name (id
>= l
? type1
: type2
),
3736 gimple_seq_add_stmt_without_update (&seq
, g
);
3737 op
= gimple_assign_lhs (g
);
3739 type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3740 if (POINTER_TYPE_P (type1
) || TREE_CODE (type1
) == OFFSET_TYPE
)
3743 = gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, op
);
3744 gimple_seq_add_stmt_without_update (&seq
, g
);
3745 op
= gimple_assign_lhs (g
);
3748 if (update_range_test (&ranges
[k
- 1], NULL
, candidates
.address (),
3749 candidates
.length (), opcode
, ops
, op
,
3750 seq
, ranges
[k
- 1].in_p
, ranges
[k
- 1].low
,
3751 ranges
[k
- 1].high
, strict_overflow_p
))
3754 gimple_seq_discard (seq
);
3755 if ((b
% 4) == 2 && buckets
[b
] != i
)
3756 /* There is more work to do for this bucket. */
3763 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3764 a >= 0 && a < b into (unsigned) a < (unsigned) b
3765 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3768 optimize_range_tests_var_bound (enum tree_code opcode
, int first
, int length
,
3769 vec
<operand_entry
*> *ops
,
3770 struct range_entry
*ranges
,
3771 basic_block first_bb
)
3774 bool any_changes
= false;
3775 hash_map
<tree
, int> *map
= NULL
;
3777 for (i
= first
; i
< length
; i
++)
3779 if (ranges
[i
].exp
== NULL_TREE
3780 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3784 tree type
= TREE_TYPE (ranges
[i
].exp
);
3785 if (!INTEGRAL_TYPE_P (type
)
3786 || TYPE_UNSIGNED (type
)
3787 || ranges
[i
].low
== NULL_TREE
3788 || !integer_zerop (ranges
[i
].low
)
3789 || ranges
[i
].high
!= NULL_TREE
)
3791 /* EXP >= 0 here. */
3793 map
= new hash_map
<tree
, int>;
3794 map
->put (ranges
[i
].exp
, i
);
3800 for (i
= 0; i
< length
; i
++)
3802 bool in_p
= ranges
[i
].in_p
;
3803 if (ranges
[i
].low
== NULL_TREE
3804 || ranges
[i
].high
== NULL_TREE
)
3806 if (!integer_zerop (ranges
[i
].low
)
3807 || !integer_zerop (ranges
[i
].high
))
3810 && TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) == 1
3811 && TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3812 && integer_onep (ranges
[i
].low
)
3813 && integer_onep (ranges
[i
].high
))
3824 if (TREE_CODE (ranges
[i
].exp
) != SSA_NAME
)
3826 stmt
= SSA_NAME_DEF_STMT (ranges
[i
].exp
);
3827 if (!is_gimple_assign (stmt
))
3829 ccode
= gimple_assign_rhs_code (stmt
);
3830 rhs1
= gimple_assign_rhs1 (stmt
);
3831 rhs2
= gimple_assign_rhs2 (stmt
);
3835 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3836 stmt
= last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3837 if (gimple_code (stmt
) != GIMPLE_COND
)
3839 ccode
= gimple_cond_code (stmt
);
3840 rhs1
= gimple_cond_lhs (stmt
);
3841 rhs2
= gimple_cond_rhs (stmt
);
3844 if (TREE_CODE (rhs1
) != SSA_NAME
3845 || rhs2
== NULL_TREE
3846 || TREE_CODE (rhs2
) != SSA_NAME
)
3860 ccode
= invert_tree_comparison (ccode
, false);
3865 std::swap (rhs1
, rhs2
);
3866 ccode
= swap_tree_comparison (ccode
);
3875 int *idx
= map
->get (rhs1
);
3879 /* maybe_optimize_range_tests allows statements without side-effects
3880 in the basic blocks as long as they are consumed in the same bb.
3881 Make sure rhs2's def stmt is not among them, otherwise we can't
3882 use safely get_nonzero_bits on it. E.g. in:
3883 # RANGE [-83, 1] NONZERO 173
3884 # k_32 = PHI <k_47(13), k_12(9)>
3887 goto <bb 5>; [26.46%]
3889 goto <bb 9>; [73.54%]
3891 <bb 5> [local count: 140323371]:
3892 # RANGE [0, 1] NONZERO 1
3894 # RANGE [0, 4] NONZERO 4
3896 # RANGE [0, 4] NONZERO 4
3897 iftmp.0_44 = (char) _21;
3898 if (k_32 < iftmp.0_44)
3899 goto <bb 6>; [84.48%]
3901 goto <bb 9>; [15.52%]
3902 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3903 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3904 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3905 those stmts even for negative k_32 and the value ranges would be no
3906 longer guaranteed and so the optimization would be invalid. */
3907 while (opcode
== ERROR_MARK
)
3909 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3910 basic_block bb2
= gimple_bb (g
);
3913 && dominated_by_p (CDI_DOMINATORS
, bb2
, first_bb
))
3915 /* As an exception, handle a few common cases. */
3916 if (gimple_assign_cast_p (g
)
3917 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g
))))
3919 tree op0
= gimple_assign_rhs1 (g
);
3920 if (TYPE_UNSIGNED (TREE_TYPE (op0
))
3921 && (TYPE_PRECISION (TREE_TYPE (rhs2
))
3922 > TYPE_PRECISION (TREE_TYPE (op0
))))
3923 /* Zero-extension is always ok. */
3925 else if (TYPE_PRECISION (TREE_TYPE (rhs2
))
3926 == TYPE_PRECISION (TREE_TYPE (op0
))
3927 && TREE_CODE (op0
) == SSA_NAME
)
3929 /* Cast from signed to unsigned or vice versa. Retry
3930 with the op0 as new rhs2. */
3935 else if (is_gimple_assign (g
)
3936 && gimple_assign_rhs_code (g
) == BIT_AND_EXPR
3937 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
3938 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g
))))
3939 /* Masking with INTEGER_CST with MSB clear is always ok
3946 if (rhs2
== NULL_TREE
)
3949 wide_int nz
= get_nonzero_bits (rhs2
);
3953 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3954 and RHS2 is known to be RHS2 >= 0. */
3955 tree utype
= unsigned_type_for (TREE_TYPE (rhs1
));
3957 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
3958 if ((ranges
[*idx
].strict_overflow_p
3959 || ranges
[i
].strict_overflow_p
)
3960 && issue_strict_overflow_warning (wc
))
3961 warning_at (gimple_location (stmt
), OPT_Wstrict_overflow
,
3962 "assuming signed overflow does not occur "
3963 "when simplifying range test");
3965 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3967 struct range_entry
*r
= &ranges
[*idx
];
3968 fprintf (dump_file
, "Optimizing range test ");
3969 print_generic_expr (dump_file
, r
->exp
);
3970 fprintf (dump_file
, " +[");
3971 print_generic_expr (dump_file
, r
->low
);
3972 fprintf (dump_file
, ", ");
3973 print_generic_expr (dump_file
, r
->high
);
3974 fprintf (dump_file
, "] and comparison ");
3975 print_generic_expr (dump_file
, rhs1
);
3976 fprintf (dump_file
, " %s ", op_symbol_code (ccode
));
3977 print_generic_expr (dump_file
, rhs2
);
3978 fprintf (dump_file
, "\n into (");
3979 print_generic_expr (dump_file
, utype
);
3980 fprintf (dump_file
, ") ");
3981 print_generic_expr (dump_file
, rhs1
);
3982 fprintf (dump_file
, " %s (", op_symbol_code (ccode
));
3983 print_generic_expr (dump_file
, utype
);
3984 fprintf (dump_file
, ") ");
3985 print_generic_expr (dump_file
, rhs2
);
3986 fprintf (dump_file
, "\n");
3989 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3991 if (opcode
== BIT_IOR_EXPR
3992 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3995 ccode
= invert_tree_comparison (ccode
, false);
3998 unsigned int uid
= gimple_uid (stmt
);
3999 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4000 gimple
*g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs1
);
4001 gimple_set_uid (g
, uid
);
4002 rhs1
= gimple_assign_lhs (g
);
4003 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4004 if (!useless_type_conversion_p (utype
, TREE_TYPE (rhs2
)))
4006 g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs2
);
4007 gimple_set_uid (g
, uid
);
4008 rhs2
= gimple_assign_lhs (g
);
4009 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4011 if (tree_swap_operands_p (rhs1
, rhs2
))
4013 std::swap (rhs1
, rhs2
);
4014 ccode
= swap_tree_comparison (ccode
);
4016 if (gimple_code (stmt
) == GIMPLE_COND
)
4018 gcond
*c
= as_a
<gcond
*> (stmt
);
4019 gimple_cond_set_code (c
, ccode
);
4020 gimple_cond_set_lhs (c
, rhs1
);
4021 gimple_cond_set_rhs (c
, rhs2
);
4026 tree ctype
= oe
->op
? TREE_TYPE (oe
->op
) : boolean_type_node
;
4027 if (!INTEGRAL_TYPE_P (ctype
)
4028 || (TREE_CODE (ctype
) != BOOLEAN_TYPE
4029 && TYPE_PRECISION (ctype
) != 1))
4030 ctype
= boolean_type_node
;
4031 g
= gimple_build_assign (make_ssa_name (ctype
), ccode
, rhs1
, rhs2
);
4032 gimple_set_uid (g
, uid
);
4033 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4034 if (oe
->op
&& ctype
!= TREE_TYPE (oe
->op
))
4036 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (oe
->op
)),
4037 NOP_EXPR
, gimple_assign_lhs (g
));
4038 gimple_set_uid (g
, uid
);
4039 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4041 ranges
[i
].exp
= gimple_assign_lhs (g
);
4042 oe
->op
= ranges
[i
].exp
;
4043 ranges
[i
].low
= build_zero_cst (TREE_TYPE (ranges
[i
].exp
));
4044 ranges
[i
].high
= ranges
[i
].low
;
4046 ranges
[i
].strict_overflow_p
= false;
4047 oe
= (*ops
)[ranges
[*idx
].idx
];
4048 /* Now change all the other range test immediate uses, so that
4049 those tests will be optimized away. */
4050 if (opcode
== ERROR_MARK
)
4053 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
4054 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
4056 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
4057 ? boolean_false_node
: boolean_true_node
);
4060 oe
->op
= error_mark_node
;
4061 ranges
[*idx
].exp
= NULL_TREE
;
4062 ranges
[*idx
].low
= NULL_TREE
;
4063 ranges
[*idx
].high
= NULL_TREE
;
4071 /* Optimize range tests, similarly how fold_range_test optimizes
4072 it on trees. The tree code for the binary
4073 operation between all the operands is OPCODE.
4074 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4075 maybe_optimize_range_tests for inter-bb range optimization.
4076 In that case if oe->op is NULL, oe->id is bb->index whose
4077 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4079 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4082 optimize_range_tests (enum tree_code opcode
,
4083 vec
<operand_entry
*> *ops
, basic_block first_bb
)
4085 unsigned int length
= ops
->length (), i
, j
, first
;
4087 struct range_entry
*ranges
;
4088 bool any_changes
= false;
4093 ranges
= XNEWVEC (struct range_entry
, length
);
4094 for (i
= 0; i
< length
; i
++)
4098 init_range_entry (ranges
+ i
, oe
->op
,
4101 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
4102 /* For | invert it now, we will invert it again before emitting
4103 the optimized expression. */
4104 if (opcode
== BIT_IOR_EXPR
4105 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
4106 ranges
[i
].in_p
= !ranges
[i
].in_p
;
4109 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
4110 for (i
= 0; i
< length
; i
++)
4111 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
4114 /* Try to merge ranges. */
4115 for (first
= i
; i
< length
; i
++)
4117 tree low
= ranges
[i
].low
;
4118 tree high
= ranges
[i
].high
;
4119 int in_p
= ranges
[i
].in_p
;
4120 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
4121 int update_fail_count
= 0;
4123 for (j
= i
+ 1; j
< length
; j
++)
4125 if (ranges
[i
].exp
!= ranges
[j
].exp
)
4127 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
4128 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
4130 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
4136 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
4137 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
4138 low
, high
, strict_overflow_p
))
4143 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4144 while update_range_test would fail. */
4145 else if (update_fail_count
== 64)
4148 ++update_fail_count
;
4151 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
4154 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
4155 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
4157 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
4158 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
4160 any_changes
|= optimize_range_tests_var_bound (opcode
, first
, length
, ops
,
4162 any_changes
|= optimize_range_tests_cmp_bitwise (opcode
, first
, length
,
4165 if (any_changes
&& opcode
!= ERROR_MARK
)
4168 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4170 if (oe
->op
== error_mark_node
)
4179 XDELETEVEC (ranges
);
4183 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4184 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4185 otherwise the comparison code. TYPE is a return value that is set
4186 to type of comparison. */
4189 ovce_extract_ops (tree var
, gassign
**rets
, bool *reti
, tree
*type
,
4190 tree
*lhs
, tree
*rhs
, gassign
**vcond
)
4192 if (TREE_CODE (var
) != SSA_NAME
)
4195 gassign
*stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (var
));
4201 /* ??? If we start creating more COND_EXPR, we could perform
4202 this same optimization with them. For now, simplify. */
4203 if (gimple_assign_rhs_code (stmt
) != VEC_COND_EXPR
)
4206 tree cond
= gimple_assign_rhs1 (stmt
);
4207 tree_code cmp
= TREE_CODE (cond
);
4208 if (cmp
!= SSA_NAME
)
4211 gassign
*assign
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (cond
));
4213 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign
)) != tcc_comparison
)
4216 cmp
= gimple_assign_rhs_code (assign
);
4218 *lhs
= gimple_assign_rhs1 (assign
);
4220 *rhs
= gimple_assign_rhs2 (assign
);
4222 /* ??? For now, allow only canonical true and false result vectors.
4223 We could expand this to other constants should the need arise,
4224 but at the moment we don't create them. */
4225 tree t
= gimple_assign_rhs2 (stmt
);
4226 tree f
= gimple_assign_rhs3 (stmt
);
4228 if (integer_all_onesp (t
))
4230 else if (integer_all_onesp (f
))
4232 cmp
= invert_tree_comparison (cmp
, false);
4237 if (!integer_zerop (f
))
4246 *type
= TREE_TYPE (cond
);
4250 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4251 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4254 optimize_vec_cond_expr (tree_code opcode
, vec
<operand_entry
*> *ops
)
4256 unsigned int length
= ops
->length (), i
, j
;
4257 bool any_changes
= false;
4262 for (i
= 0; i
< length
; ++i
)
4264 tree elt0
= (*ops
)[i
]->op
;
4266 gassign
*stmt0
, *vcond0
;
4268 tree type
, lhs0
, rhs0
;
4269 tree_code cmp0
= ovce_extract_ops (elt0
, &stmt0
, &invert
, &type
, &lhs0
,
4271 if (cmp0
== ERROR_MARK
)
4274 for (j
= i
+ 1; j
< length
; ++j
)
4276 tree
&elt1
= (*ops
)[j
]->op
;
4278 gassign
*stmt1
, *vcond1
;
4280 tree_code cmp1
= ovce_extract_ops (elt1
, &stmt1
, NULL
, NULL
, &lhs1
,
4282 if (cmp1
== ERROR_MARK
)
4286 if (opcode
== BIT_AND_EXPR
)
4287 comb
= maybe_fold_and_comparisons (type
, cmp0
, lhs0
, rhs0
,
4289 else if (opcode
== BIT_IOR_EXPR
)
4290 comb
= maybe_fold_or_comparisons (type
, cmp0
, lhs0
, rhs0
,
4298 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4300 fprintf (dump_file
, "Transforming ");
4301 print_generic_expr (dump_file
, gimple_assign_lhs (stmt0
));
4302 fprintf (dump_file
, " %c ", opcode
== BIT_AND_EXPR
? '&' : '|');
4303 print_generic_expr (dump_file
, gimple_assign_lhs (stmt1
));
4304 fprintf (dump_file
, " into ");
4305 print_generic_expr (dump_file
, comb
);
4306 fputc ('\n', dump_file
);
4309 gimple_stmt_iterator gsi
= gsi_for_stmt (vcond0
);
4310 tree exp
= force_gimple_operand_gsi (&gsi
, comb
, true, NULL_TREE
,
4311 true, GSI_SAME_STMT
);
4313 swap_ssa_operands (vcond0
, gimple_assign_rhs2_ptr (vcond0
),
4314 gimple_assign_rhs3_ptr (vcond0
));
4315 gimple_assign_set_rhs1 (vcond0
, exp
);
4316 update_stmt (vcond0
);
4318 elt1
= error_mark_node
;
4327 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4329 if (oe
->op
== error_mark_node
)
4341 /* Return true if STMT is a cast like:
4347 # _345 = PHI <_123(N), 1(...), 1(...)>
4348 where _234 has bool type, _123 has single use and
4349 bb N has a single successor M. This is commonly used in
4350 the last block of a range test.
4352 Also Return true if STMT is tcc_compare like:
4358 # _345 = PHI <_234(N), 1(...), 1(...)>
4360 where _234 has booltype, single use and
4361 bb N has a single successor M. This is commonly used in
4362 the last block of a range test. */
4365 final_range_test_p (gimple
*stmt
)
4367 basic_block bb
, rhs_bb
, lhs_bb
;
4370 use_operand_p use_p
;
4373 if (!gimple_assign_cast_p (stmt
)
4374 && (!is_gimple_assign (stmt
)
4375 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4376 != tcc_comparison
)))
4378 bb
= gimple_bb (stmt
);
4379 if (!single_succ_p (bb
))
4381 e
= single_succ_edge (bb
);
4382 if (e
->flags
& EDGE_COMPLEX
)
4385 lhs
= gimple_assign_lhs (stmt
);
4386 rhs
= gimple_assign_rhs1 (stmt
);
4387 if (gimple_assign_cast_p (stmt
)
4388 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4389 || TREE_CODE (rhs
) != SSA_NAME
4390 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
))
4393 if (!gimple_assign_cast_p (stmt
)
4394 && (TREE_CODE (TREE_TYPE (lhs
)) != BOOLEAN_TYPE
))
4397 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4398 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
4401 if (gimple_code (use_stmt
) != GIMPLE_PHI
4402 || gimple_bb (use_stmt
) != e
->dest
)
4405 /* And that the rhs is defined in the same loop. */
4406 if (gimple_assign_cast_p (stmt
))
4408 if (TREE_CODE (rhs
) != SSA_NAME
4409 || !(rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
)))
4410 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
4415 if (TREE_CODE (lhs
) != SSA_NAME
4416 || !(lhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
)))
4417 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), lhs_bb
))
4424 /* Return true if BB is suitable basic block for inter-bb range test
4425 optimization. If BACKWARD is true, BB should be the only predecessor
4426 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4427 or compared with to find a common basic block to which all conditions
4428 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4429 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4430 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4431 block points to an empty block that falls through into *OTHER_BB and
4432 the phi args match that path. */
4435 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
4436 bool *test_swapped_p
, bool backward
)
4438 edge_iterator ei
, ei2
;
4442 bool other_edge_seen
= false;
4447 /* Check last stmt first. */
4448 stmt
= last_nondebug_stmt (bb
);
4450 || (gimple_code (stmt
) != GIMPLE_COND
4451 && (backward
|| !final_range_test_p (stmt
)))
4452 || gimple_visited_p (stmt
)
4453 || stmt_could_throw_p (cfun
, stmt
)
4456 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
4459 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4460 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4461 to *OTHER_BB (if not set yet, try to find it out). */
4462 if (EDGE_COUNT (bb
->succs
) != 2)
4464 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
4466 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4468 if (e
->dest
== test_bb
)
4477 if (*other_bb
== NULL
)
4479 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
4480 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4482 else if (e
->dest
== e2
->dest
)
4483 *other_bb
= e
->dest
;
4484 if (*other_bb
== NULL
)
4487 if (e
->dest
== *other_bb
)
4488 other_edge_seen
= true;
4492 if (*other_bb
== NULL
|| !other_edge_seen
)
4495 else if (single_succ (bb
) != *other_bb
)
4498 /* Now check all PHIs of *OTHER_BB. */
4499 e
= find_edge (bb
, *other_bb
);
4500 e2
= find_edge (test_bb
, *other_bb
);
4502 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4504 gphi
*phi
= gsi
.phi ();
4505 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4506 corresponding to BB and TEST_BB predecessor must be the same. */
4507 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
4508 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
4510 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4511 one of the PHIs should have the lhs of the last stmt in
4512 that block as PHI arg and that PHI should have 0 or 1
4513 corresponding to it in all other range test basic blocks
4517 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
4518 == gimple_assign_lhs (stmt
)
4519 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
4520 || integer_onep (gimple_phi_arg_def (phi
,
4526 gimple
*test_last
= last_nondebug_stmt (test_bb
);
4527 if (gimple_code (test_last
) == GIMPLE_COND
)
4529 if (backward
? e2
->src
!= test_bb
: e
->src
!= bb
)
4532 /* For last_bb, handle also:
4534 goto <bb 6>; [34.00%]
4536 goto <bb 7>; [66.00%]
4538 <bb 6> [local count: 79512730]:
4540 <bb 7> [local count: 1073741824]:
4541 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4542 where bb 7 is *OTHER_BB, but the PHI values from the
4543 earlier bbs match the path through the empty bb
4547 e3
= EDGE_SUCC (test_bb
,
4548 e2
== EDGE_SUCC (test_bb
, 0) ? 1 : 0);
4551 e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4552 if (empty_block_p (e3
->dest
)
4553 && single_succ_p (e3
->dest
)
4554 && single_succ (e3
->dest
) == *other_bb
4555 && single_pred_p (e3
->dest
)
4556 && single_succ_edge (e3
->dest
)->flags
== EDGE_FALLTHRU
)
4559 e2
= single_succ_edge (e3
->dest
);
4561 e
= single_succ_edge (e3
->dest
);
4563 *test_swapped_p
= true;
4567 else if (gimple_phi_arg_def (phi
, e2
->dest_idx
)
4568 == gimple_assign_lhs (test_last
)
4569 && (integer_zerop (gimple_phi_arg_def (phi
,
4571 || integer_onep (gimple_phi_arg_def (phi
,
4582 /* Return true if BB doesn't have side-effects that would disallow
4583 range test optimization, all SSA_NAMEs set in the bb are consumed
4584 in the bb and there are no PHIs. */
4587 no_side_effect_bb (basic_block bb
)
4589 gimple_stmt_iterator gsi
;
4592 if (!gimple_seq_empty_p (phi_nodes (bb
)))
4594 last
= last_nondebug_stmt (bb
);
4595 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4597 gimple
*stmt
= gsi_stmt (gsi
);
4599 imm_use_iterator imm_iter
;
4600 use_operand_p use_p
;
4602 if (is_gimple_debug (stmt
))
4604 if (gimple_has_side_effects (stmt
))
4608 if (!is_gimple_assign (stmt
))
4610 lhs
= gimple_assign_lhs (stmt
);
4611 if (TREE_CODE (lhs
) != SSA_NAME
)
4613 if (gimple_assign_rhs_could_trap_p (stmt
))
4615 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
4617 gimple
*use_stmt
= USE_STMT (use_p
);
4618 if (is_gimple_debug (use_stmt
))
4620 if (gimple_bb (use_stmt
) != bb
)
4627 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4628 return true and fill in *OPS recursively. */
4631 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
4634 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4638 if (!is_reassociable_op (stmt
, code
, loop
))
4641 rhs
[0] = gimple_assign_rhs1 (stmt
);
4642 rhs
[1] = gimple_assign_rhs2 (stmt
);
4643 gimple_set_visited (stmt
, true);
4644 for (i
= 0; i
< 2; i
++)
4645 if (TREE_CODE (rhs
[i
]) == SSA_NAME
4646 && !get_ops (rhs
[i
], code
, ops
, loop
)
4647 && has_single_use (rhs
[i
]))
4649 operand_entry
*oe
= operand_entry_pool
.allocate ();
4655 oe
->stmt_to_insert
= NULL
;
4656 ops
->safe_push (oe
);
4661 /* Find the ops that were added by get_ops starting from VAR, see if
4662 they were changed during update_range_test and if yes, create new
4666 update_ops (tree var
, enum tree_code code
, const vec
<operand_entry
*> &ops
,
4667 unsigned int *pidx
, class loop
*loop
)
4669 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4673 if (!is_reassociable_op (stmt
, code
, loop
))
4676 rhs
[0] = gimple_assign_rhs1 (stmt
);
4677 rhs
[1] = gimple_assign_rhs2 (stmt
);
4680 for (i
= 0; i
< 2; i
++)
4681 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
4683 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
4684 if (rhs
[2 + i
] == NULL_TREE
)
4686 if (has_single_use (rhs
[i
]))
4687 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
4689 rhs
[2 + i
] = rhs
[i
];
4692 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
4693 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
4695 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4696 var
= make_ssa_name (TREE_TYPE (var
));
4697 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
4699 gimple_set_uid (g
, gimple_uid (stmt
));
4700 gimple_set_visited (g
, true);
4701 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4702 gimple_stmt_iterator gsi2
= gsi_for_stmt (g
);
4703 if (fold_stmt_inplace (&gsi2
))
4709 /* Structure to track the initial value passed to get_ops and
4710 the range in the ops vector for each basic block. */
4712 struct inter_bb_range_test_entry
4715 unsigned int first_idx
, last_idx
;
4718 /* Inter-bb range test optimization.
4720 Returns TRUE if a gimple conditional is optimized to a true/false,
4721 otherwise return FALSE.
4723 This indicates to the caller that it should run a CFG cleanup pass
4724 once reassociation is completed. */
4727 maybe_optimize_range_tests (gimple
*stmt
)
4729 basic_block first_bb
= gimple_bb (stmt
);
4730 basic_block last_bb
= first_bb
;
4731 basic_block other_bb
= NULL
;
4735 auto_vec
<operand_entry
*> ops
;
4736 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
4737 bool any_changes
= false;
4738 bool cfg_cleanup_needed
= false;
4740 /* Consider only basic blocks that end with GIMPLE_COND or
4741 a cast statement satisfying final_range_test_p. All
4742 but the last bb in the first_bb .. last_bb range
4743 should end with GIMPLE_COND. */
4744 if (gimple_code (stmt
) == GIMPLE_COND
)
4746 if (EDGE_COUNT (first_bb
->succs
) != 2)
4747 return cfg_cleanup_needed
;
4749 else if (final_range_test_p (stmt
))
4750 other_bb
= single_succ (first_bb
);
4752 return cfg_cleanup_needed
;
4754 if (stmt_could_throw_p (cfun
, stmt
))
4755 return cfg_cleanup_needed
;
4757 /* As relative ordering of post-dominator sons isn't fixed,
4758 maybe_optimize_range_tests can be called first on any
4759 bb in the range we want to optimize. So, start searching
4760 backwards, if first_bb can be set to a predecessor. */
4761 while (single_pred_p (first_bb
))
4763 basic_block pred_bb
= single_pred (first_bb
);
4764 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, NULL
, true))
4766 if (!no_side_effect_bb (first_bb
))
4770 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4771 Before starting forward search in last_bb successors, find
4772 out the other_bb. */
4773 if (first_bb
== last_bb
)
4776 /* As non-GIMPLE_COND last stmt always terminates the range,
4777 if forward search didn't discover anything, just give up. */
4778 if (gimple_code (stmt
) != GIMPLE_COND
)
4779 return cfg_cleanup_needed
;
4780 /* Look at both successors. Either it ends with a GIMPLE_COND
4781 and satisfies suitable_cond_bb, or ends with a cast and
4782 other_bb is that cast's successor. */
4783 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
4784 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
4785 || e
->dest
== first_bb
)
4786 return cfg_cleanup_needed
;
4787 else if (single_pred_p (e
->dest
))
4789 stmt
= last_nondebug_stmt (e
->dest
);
4791 && gimple_code (stmt
) == GIMPLE_COND
4792 && EDGE_COUNT (e
->dest
->succs
) == 2)
4794 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
,
4801 && final_range_test_p (stmt
)
4802 && find_edge (first_bb
, single_succ (e
->dest
)))
4804 other_bb
= single_succ (e
->dest
);
4805 if (other_bb
== first_bb
)
4809 if (other_bb
== NULL
)
4810 return cfg_cleanup_needed
;
4812 /* Now do the forward search, moving last_bb to successor bbs
4813 that aren't other_bb. */
4814 while (EDGE_COUNT (last_bb
->succs
) == 2)
4816 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
4817 if (e
->dest
!= other_bb
)
4821 if (!single_pred_p (e
->dest
))
4823 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, NULL
, false))
4825 if (!no_side_effect_bb (e
->dest
))
4829 if (first_bb
== last_bb
)
4830 return cfg_cleanup_needed
;
4831 /* Here basic blocks first_bb through last_bb's predecessor
4832 end with GIMPLE_COND, all of them have one of the edges to
4833 other_bb and another to another block in the range,
4834 all blocks except first_bb don't have side-effects and
4835 last_bb ends with either GIMPLE_COND, or cast satisfying
4836 final_range_test_p. */
4837 for (bb
= last_bb
; ; bb
= single_pred (bb
))
4839 enum tree_code code
;
4841 inter_bb_range_test_entry bb_ent
;
4843 bb_ent
.op
= NULL_TREE
;
4844 bb_ent
.first_idx
= ops
.length ();
4845 bb_ent
.last_idx
= bb_ent
.first_idx
;
4846 e
= find_edge (bb
, other_bb
);
4847 stmt
= last_nondebug_stmt (bb
);
4848 gimple_set_visited (stmt
, true);
4849 if (gimple_code (stmt
) != GIMPLE_COND
)
4851 use_operand_p use_p
;
4856 lhs
= gimple_assign_lhs (stmt
);
4857 rhs
= gimple_assign_rhs1 (stmt
);
4858 gcc_assert (bb
== last_bb
);
4867 # _345 = PHI <_123(N), 1(...), 1(...)>
4869 or 0 instead of 1. If it is 0, the _234
4870 range test is anded together with all the
4871 other range tests, if it is 1, it is ored with
4873 single_imm_use (lhs
, &use_p
, &phi
);
4874 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
4875 e2
= find_edge (first_bb
, other_bb
);
4877 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
4878 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
4879 code
= BIT_AND_EXPR
;
4882 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
4883 code
= BIT_IOR_EXPR
;
4886 /* If _234 SSA_NAME_DEF_STMT is
4888 (or &, corresponding to 1/0 in the phi arguments,
4889 push into ops the individual range test arguments
4890 of the bitwise or resp. and, recursively. */
4891 if (TREE_CODE (rhs
) == SSA_NAME
4892 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4894 && !get_ops (rhs
, code
, &ops
,
4895 loop_containing_stmt (stmt
))
4896 && has_single_use (rhs
))
4898 /* Otherwise, push the _234 range test itself. */
4899 operand_entry
*oe
= operand_entry_pool
.allocate ();
4905 oe
->stmt_to_insert
= NULL
;
4910 else if (is_gimple_assign (stmt
)
4911 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4913 && !get_ops (lhs
, code
, &ops
,
4914 loop_containing_stmt (stmt
))
4915 && has_single_use (lhs
))
4917 operand_entry
*oe
= operand_entry_pool
.allocate ();
4928 bb_ent
.last_idx
= ops
.length ();
4931 bbinfo
.safe_push (bb_ent
);
4932 for (unsigned int i
= bb_ent
.first_idx
; i
< bb_ent
.last_idx
; ++i
)
4933 ops
[i
]->id
= bb
->index
;
4936 else if (bb
== last_bb
)
4938 /* For last_bb, handle also:
4940 goto <bb 6>; [34.00%]
4942 goto <bb 7>; [66.00%]
4944 <bb 6> [local count: 79512730]:
4946 <bb 7> [local count: 1073741824]:
4947 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4948 where bb 7 is OTHER_BB, but the PHI values from the
4949 earlier bbs match the path through the empty bb
4951 bool test_swapped_p
= false;
4952 bool ok
= suitable_cond_bb (single_pred (last_bb
), last_bb
,
4953 &other_bb
, &test_swapped_p
, true);
4956 e
= EDGE_SUCC (bb
, e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4958 /* Otherwise stmt is GIMPLE_COND. */
4959 code
= gimple_cond_code (stmt
);
4960 lhs
= gimple_cond_lhs (stmt
);
4961 rhs
= gimple_cond_rhs (stmt
);
4962 if (TREE_CODE (lhs
) == SSA_NAME
4963 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4964 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4965 || rhs
!= boolean_false_node
4966 /* Either push into ops the individual bitwise
4967 or resp. and operands, depending on which
4968 edge is other_bb. */
4969 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
4970 ^ (code
== EQ_EXPR
))
4971 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
4972 loop_containing_stmt (stmt
))))
4974 /* Or push the GIMPLE_COND stmt itself. */
4975 operand_entry
*oe
= operand_entry_pool
.allocate ();
4978 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
4979 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
4980 /* oe->op = NULL signs that there is no SSA_NAME
4981 for the range test, and oe->id instead is the
4982 basic block number, at which's end the GIMPLE_COND
4986 oe
->stmt_to_insert
= NULL
;
4991 else if (ops
.length () > bb_ent
.first_idx
)
4994 bb_ent
.last_idx
= ops
.length ();
4996 bbinfo
.safe_push (bb_ent
);
4997 for (unsigned int i
= bb_ent
.first_idx
; i
< bb_ent
.last_idx
; ++i
)
4998 ops
[i
]->id
= bb
->index
;
5002 if (ops
.length () > 1)
5003 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
, first_bb
);
5006 unsigned int idx
, max_idx
= 0;
5007 /* update_ops relies on has_single_use predicates returning the
5008 same values as it did during get_ops earlier. Additionally it
5009 never removes statements, only adds new ones and it should walk
5010 from the single imm use and check the predicate already before
5011 making those changes.
5012 On the other side, the handling of GIMPLE_COND directly can turn
5013 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5014 it needs to be done in a separate loop afterwards. */
5015 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
5017 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
5018 && bbinfo
[idx
].op
!= NULL_TREE
)
5023 stmt
= last_nondebug_stmt (bb
);
5024 new_op
= update_ops (bbinfo
[idx
].op
,
5026 ops
[bbinfo
[idx
].first_idx
]->rank
,
5027 ops
, &bbinfo
[idx
].first_idx
,
5028 loop_containing_stmt (stmt
));
5029 if (new_op
== NULL_TREE
)
5031 gcc_assert (bb
== last_bb
);
5032 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
5034 if (bbinfo
[idx
].op
!= new_op
)
5036 imm_use_iterator iter
;
5037 use_operand_p use_p
;
5038 gimple
*use_stmt
, *cast_or_tcc_cmp_stmt
= NULL
;
5040 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
5041 if (is_gimple_debug (use_stmt
))
5043 else if (gimple_code (use_stmt
) == GIMPLE_COND
5044 || gimple_code (use_stmt
) == GIMPLE_PHI
)
5045 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
5046 SET_USE (use_p
, new_op
);
5047 else if ((is_gimple_assign (use_stmt
)
5049 (gimple_assign_rhs_code (use_stmt
))
5050 == tcc_comparison
)))
5051 cast_or_tcc_cmp_stmt
= use_stmt
;
5052 else if (gimple_assign_cast_p (use_stmt
))
5053 cast_or_tcc_cmp_stmt
= use_stmt
;
5057 if (cast_or_tcc_cmp_stmt
)
5059 gcc_assert (bb
== last_bb
);
5060 tree lhs
= gimple_assign_lhs (cast_or_tcc_cmp_stmt
);
5061 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
5062 enum tree_code rhs_code
5063 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt
)
5064 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt
)
5067 if (is_gimple_min_invariant (new_op
))
5069 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
5070 g
= gimple_build_assign (new_lhs
, new_op
);
5073 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
5074 gimple_stmt_iterator gsi
5075 = gsi_for_stmt (cast_or_tcc_cmp_stmt
);
5076 gimple_set_uid (g
, gimple_uid (cast_or_tcc_cmp_stmt
));
5077 gimple_set_visited (g
, true);
5078 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5079 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
5080 if (is_gimple_debug (use_stmt
))
5082 else if (gimple_code (use_stmt
) == GIMPLE_COND
5083 || gimple_code (use_stmt
) == GIMPLE_PHI
)
5084 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
5085 SET_USE (use_p
, new_lhs
);
5094 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
5096 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
5097 && bbinfo
[idx
].op
== NULL_TREE
5098 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
5100 gcond
*cond_stmt
= as_a
<gcond
*> (*gsi_last_bb (bb
));
5105 /* If we collapse the conditional to a true/false
5106 condition, then bubble that knowledge up to our caller. */
5107 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
5109 gimple_cond_make_false (cond_stmt
);
5110 cfg_cleanup_needed
= true;
5112 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
5114 gimple_cond_make_true (cond_stmt
);
5115 cfg_cleanup_needed
= true;
5119 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
5120 gimple_cond_set_lhs (cond_stmt
,
5121 ops
[bbinfo
[idx
].first_idx
]->op
);
5122 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
5124 update_stmt (cond_stmt
);
5130 /* The above changes could result in basic blocks after the first
5131 modified one, up to and including last_bb, to be executed even if
5132 they would not be in the original program. If the value ranges of
5133 assignment lhs' in those bbs were dependent on the conditions
5134 guarding those basic blocks which now can change, the VRs might
5135 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5136 are only used within the same bb, it should be not a big deal if
5137 we just reset all the VRs in those bbs. See PR68671. */
5138 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
5139 reset_flow_sensitive_info_in_bb (bb
);
5141 return cfg_cleanup_needed
;
5144 /* Remove def stmt of VAR if VAR has zero uses and recurse
5145 on rhs1 operand if so. */
5148 remove_visited_stmt_chain (tree var
)
5151 gimple_stmt_iterator gsi
;
5155 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
5157 stmt
= SSA_NAME_DEF_STMT (var
);
5158 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
5160 var
= gimple_assign_rhs1 (stmt
);
5161 gsi
= gsi_for_stmt (stmt
);
5162 reassoc_remove_stmt (&gsi
);
5163 release_defs (stmt
);
5170 /* This function checks three consequtive operands in
5171 passed operands vector OPS starting from OPINDEX and
5172 swaps two operands if it is profitable for binary operation
5173 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5175 We pair ops with the same rank if possible. */
5178 swap_ops_for_binary_stmt (const vec
<operand_entry
*> &ops
,
5179 unsigned int opindex
)
5181 operand_entry
*oe1
, *oe2
, *oe3
;
5184 oe2
= ops
[opindex
+ 1];
5185 oe3
= ops
[opindex
+ 2];
5187 if (oe1
->rank
== oe2
->rank
&& oe2
->rank
!= oe3
->rank
)
5188 std::swap (*oe1
, *oe3
);
5189 else if (oe1
->rank
== oe3
->rank
&& oe2
->rank
!= oe3
->rank
)
5190 std::swap (*oe1
, *oe2
);
5193 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5194 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5195 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5196 after the returned stmt. */
5198 static inline gimple
*
5199 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
, bool &insert_before
)
5201 insert_before
= true;
5202 if (TREE_CODE (rhs1
) == SSA_NAME
5203 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
5205 stmt
= SSA_NAME_DEF_STMT (rhs1
);
5206 insert_before
= false;
5208 if (TREE_CODE (rhs2
) == SSA_NAME
5209 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
5211 stmt
= SSA_NAME_DEF_STMT (rhs2
);
5212 insert_before
= false;
5217 /* If the stmt that defines operand has to be inserted, insert it
5220 insert_stmt_before_use (gimple
*stmt
, gimple
*stmt_to_insert
)
5222 gcc_assert (is_gimple_assign (stmt_to_insert
));
5223 tree rhs1
= gimple_assign_rhs1 (stmt_to_insert
);
5224 tree rhs2
= gimple_assign_rhs2 (stmt_to_insert
);
5226 gimple
*insert_point
= find_insert_point (stmt
, rhs1
, rhs2
, insert_before
);
5227 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_point
);
5228 gimple_set_uid (stmt_to_insert
, gimple_uid (insert_point
));
5230 /* If the insert point is not stmt, then insert_point would be
5231 the point where operand rhs1 or rhs2 is defined. In this case,
5232 stmt_to_insert has to be inserted afterwards. This would
5233 only happen when the stmt insertion point is flexible. */
5235 gsi_insert_before (&gsi
, stmt_to_insert
, GSI_NEW_STMT
);
5237 insert_stmt_after (stmt_to_insert
, insert_point
);
5241 /* Recursively rewrite our linearized statements so that the operators
5242 match those in OPS[OPINDEX], putting the computation in rank
5243 order. Return new lhs.
5244 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5245 the current stmt and during recursive invocations.
5246 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5247 recursive invocations. */
5250 rewrite_expr_tree (gimple
*stmt
, enum tree_code rhs_code
, unsigned int opindex
,
5251 const vec
<operand_entry
*> &ops
, bool changed
,
5254 tree rhs1
= gimple_assign_rhs1 (stmt
);
5255 tree rhs2
= gimple_assign_rhs2 (stmt
);
5256 tree lhs
= gimple_assign_lhs (stmt
);
5259 /* The final recursion case for this function is that you have
5260 exactly two operations left.
5261 If we had exactly one op in the entire list to start with, we
5262 would have never called this function, and the tail recursion
5263 rewrites them one at a time. */
5264 if (opindex
+ 2 == ops
.length ())
5266 operand_entry
*oe1
, *oe2
;
5269 oe2
= ops
[opindex
+ 1];
5271 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
5273 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5274 unsigned int uid
= gimple_uid (stmt
);
5276 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5278 fprintf (dump_file
, "Transforming ");
5279 print_gimple_stmt (dump_file
, stmt
, 0);
5282 /* If the stmt that defines operand has to be inserted, insert it
5284 if (oe1
->stmt_to_insert
)
5285 insert_stmt_before_use (stmt
, oe1
->stmt_to_insert
);
5286 if (oe2
->stmt_to_insert
)
5287 insert_stmt_before_use (stmt
, oe2
->stmt_to_insert
);
5288 /* Even when changed is false, reassociation could have e.g. removed
5289 some redundant operations, so unless we are just swapping the
5290 arguments or unless there is no change at all (then we just
5291 return lhs), force creation of a new SSA_NAME. */
5292 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
5295 gimple
*insert_point
5296 = find_insert_point (stmt
, oe1
->op
, oe2
->op
, insert_before
);
5297 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5299 = gimple_build_assign (lhs
, rhs_code
,
5301 gimple_set_uid (stmt
, uid
);
5302 gimple_set_visited (stmt
, true);
5304 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5306 insert_stmt_after (stmt
, insert_point
);
5311 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
,
5314 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
5315 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
5319 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
5320 remove_visited_stmt_chain (rhs1
);
5322 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5324 fprintf (dump_file
, " into ");
5325 print_gimple_stmt (dump_file
, stmt
, 0);
5331 /* If we hit here, we should have 3 or more ops left. */
5332 gcc_assert (opindex
+ 2 < ops
.length ());
5334 /* Rewrite the next operator. */
5337 /* If the stmt that defines operand has to be inserted, insert it
5339 if (oe
->stmt_to_insert
)
5340 insert_stmt_before_use (stmt
, oe
->stmt_to_insert
);
5342 /* Recurse on the LHS of the binary operator, which is guaranteed to
5343 be the non-leaf side. */
5345 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), rhs_code
, opindex
+ 1, ops
,
5346 changed
|| oe
->op
!= rhs2
|| next_changed
,
5349 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
5351 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5353 fprintf (dump_file
, "Transforming ");
5354 print_gimple_stmt (dump_file
, stmt
, 0);
5357 /* If changed is false, this is either opindex == 0
5358 or all outer rhs2's were equal to corresponding oe->op,
5359 and powi_result is NULL.
5360 That means lhs is equivalent before and after reassociation.
5361 Otherwise ensure the old lhs SSA_NAME is not reused and
5362 create a new stmt as well, so that any debug stmts will be
5363 properly adjusted. */
5366 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5367 unsigned int uid
= gimple_uid (stmt
);
5369 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
,
5372 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5373 stmt
= gimple_build_assign (lhs
, rhs_code
,
5375 gimple_set_uid (stmt
, uid
);
5376 gimple_set_visited (stmt
, true);
5378 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5380 insert_stmt_after (stmt
, insert_point
);
5385 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
,
5388 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
5389 gimple_assign_set_rhs2 (stmt
, oe
->op
);
5393 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5395 fprintf (dump_file
, " into ");
5396 print_gimple_stmt (dump_file
, stmt
, 0);
5402 /* Find out how many cycles we need to compute statements chain.
5403 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5404 maximum number of independent statements we may execute per cycle. */
5407 get_required_cycles (int ops_num
, int cpu_width
)
5413 /* While we have more than 2 * cpu_width operands
5414 we may reduce number of operands by cpu_width
5416 res
= ops_num
/ (2 * cpu_width
);
5418 /* Remained operands count may be reduced twice per cycle
5419 until we have only one operand. */
5420 rest
= (unsigned)(ops_num
- res
* cpu_width
);
5421 elog
= exact_log2 (rest
);
5425 res
+= floor_log2 (rest
) + 1;
5430 /* Returns an optimal number of registers to use for computation of
5431 given statements. */
5434 get_reassociation_width (int ops_num
, enum tree_code opc
,
5437 int param_width
= param_tree_reassoc_width
;
5442 if (param_width
> 0)
5443 width
= param_width
;
5445 width
= targetm
.sched
.reassociation_width (opc
, mode
);
5450 /* Get the minimal time required for sequence computation. */
5451 cycles_best
= get_required_cycles (ops_num
, width
);
5453 /* Check if we may use less width and still compute sequence for
5454 the same time. It will allow us to reduce registers usage.
5455 get_required_cycles is monotonically increasing with lower width
5456 so we can perform a binary search for the minimal width that still
5457 results in the optimal cycle count. */
5459 while (width
> width_min
)
5461 int width_mid
= (width
+ width_min
) / 2;
5463 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
5465 else if (width_min
< width_mid
)
5466 width_min
= width_mid
;
5474 /* Rewrite statements with dependency chain with regard the chance to generate
5476 For the chain with FMA: Try to keep fma opportunity as much as possible.
5477 For the chain without FMA: Putting the computation in rank order and trying
5478 to allow operations to be executed in parallel.
5480 e + f + g + a * b + c * d;
5484 ssa3 = ssa1 + c * d;
5487 This reassociation approach preserves the chance of fma generation as much
5490 rewrite_expr_tree_parallel (gassign
*stmt
, int width
, bool has_fma
,
5491 const vec
<operand_entry
*> &ops
)
5493 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
5494 int op_num
= ops
.length ();
5495 gcc_assert (op_num
> 0);
5496 int stmt_num
= op_num
- 1;
5497 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
5498 int op_index
= op_num
- 1;
5499 int width_count
= width
;
5501 tree tmp_op
[2], op1
;
5503 gimple
*stmt1
= NULL
;
5504 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
5506 /* We start expression rewriting from the top statements.
5507 So, in this loop we create a full list of statements
5508 we will work with. */
5509 stmts
[stmt_num
- 1] = stmt
;
5510 for (i
= stmt_num
- 2; i
>= 0; i
--)
5511 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
5513 /* Width should not be larger than op_num / 2, since we can not create
5514 more parallel dependency chains that exceeds such value. */
5515 width
= width
<= op_num
/ 2 ? width
: op_num
/ 2;
5517 /* Build parallel dependency chain according to width. */
5518 for (i
= 0; i
< width
; i
++)
5520 /* If the chain has FAM, we do not swap two operands. */
5521 if (op_index
> 1 && !has_fma
)
5522 swap_ops_for_binary_stmt (ops
, op_index
- 2);
5524 for (j
= 0; j
< 2; j
++)
5526 gcc_assert (op_index
>= 0);
5527 oe
= ops
[op_index
--];
5529 /* If the stmt that defines operand has to be inserted, insert it
5531 stmt1
= oe
->stmt_to_insert
;
5533 insert_stmt_before_use (stmts
[i
], stmt1
);
5536 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
),
5540 gimple_set_visited (stmts
[i
], true);
5542 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5544 fprintf (dump_file
, " into ");
5545 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5549 for (i
= width
; i
< stmt_num
; i
++)
5551 /* We keep original statement only for the last one. All others are
5555 if (width_count
== 2)
5558 /* We keep original statement only for the last one. All
5559 others are recreated. */
5560 gimple_assign_set_rhs1 (stmts
[i
], gimple_assign_lhs (stmts
[i
-1]));
5561 gimple_assign_set_rhs2 (stmts
[i
], gimple_assign_lhs (stmts
[i
-2]));
5562 update_stmt (stmts
[i
]);
5568 build_and_add_sum (TREE_TYPE (last_rhs1
),
5569 gimple_assign_lhs (stmts
[i
-width_count
]),
5570 gimple_assign_lhs (stmts
[i
-width_count
+1]),
5572 gimple_set_visited (stmts
[i
], true);
5578 /* Attach the rest of the ops to the parallel dependency chain. */
5579 oe
= ops
[op_index
--];
5581 stmt1
= oe
->stmt_to_insert
;
5583 insert_stmt_before_use (stmts
[i
], stmt1
);
5585 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
),
5586 gimple_assign_lhs (stmts
[i
-width
]),
5589 gimple_set_visited (stmts
[i
], true);
5592 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5594 fprintf (dump_file
, " into ");
5595 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5598 remove_visited_stmt_chain (last_rhs1
);
5601 /* Transform STMT, which is really (A +B) + (C + D) into the left
5602 linear form, ((A+B)+C)+D.
5603 Recurse on D if necessary. */
5606 linearize_expr (gimple
*stmt
)
5608 gimple_stmt_iterator gsi
;
5609 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
5610 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5611 gimple
*oldbinrhs
= binrhs
;
5612 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5613 gimple
*newbinrhs
= NULL
;
5614 class loop
*loop
= loop_containing_stmt (stmt
);
5615 tree lhs
= gimple_assign_lhs (stmt
);
5617 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
5618 && is_reassociable_op (binrhs
, rhscode
, loop
));
5620 gsi
= gsi_for_stmt (stmt
);
5622 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
5623 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
5624 gimple_assign_rhs_code (binrhs
),
5625 gimple_assign_lhs (binlhs
),
5626 gimple_assign_rhs2 (binrhs
));
5627 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
5628 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
5629 gimple_set_uid (binrhs
, gimple_uid (stmt
));
5631 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
5632 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5634 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5636 fprintf (dump_file
, "Linearized: ");
5637 print_gimple_stmt (dump_file
, stmt
, 0);
5640 reassociate_stats
.linearized
++;
5643 gsi
= gsi_for_stmt (oldbinrhs
);
5644 reassoc_remove_stmt (&gsi
);
5645 release_defs (oldbinrhs
);
5647 gimple_set_visited (stmt
, true);
5648 gimple_set_visited (binlhs
, true);
5649 gimple_set_visited (binrhs
, true);
5651 /* Tail recurse on the new rhs if it still needs reassociation. */
5652 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
5653 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5654 want to change the algorithm while converting to tuples. */
5655 linearize_expr (stmt
);
5658 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5659 it. Otherwise, return NULL. */
5662 get_single_immediate_use (tree lhs
)
5664 use_operand_p immuse
;
5667 if (TREE_CODE (lhs
) == SSA_NAME
5668 && single_imm_use (lhs
, &immuse
, &immusestmt
)
5669 && is_gimple_assign (immusestmt
))
5675 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5676 representing the negated value. Insertions of any necessary
5677 instructions go before GSI.
5678 This function is recursive in that, if you hand it "a_5" as the
5679 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5680 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5683 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
5685 gimple
*negatedefstmt
= NULL
;
5686 tree resultofnegate
;
5687 gimple_stmt_iterator gsi
;
5690 /* If we are trying to negate a name, defined by an add, negate the
5691 add operands instead. */
5692 if (TREE_CODE (tonegate
) == SSA_NAME
)
5693 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
5694 if (TREE_CODE (tonegate
) == SSA_NAME
5695 && is_gimple_assign (negatedefstmt
)
5696 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
5697 && has_single_use (gimple_assign_lhs (negatedefstmt
))
5698 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
5700 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
5701 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
5702 tree lhs
= gimple_assign_lhs (negatedefstmt
);
5705 gsi
= gsi_for_stmt (negatedefstmt
);
5706 rhs1
= negate_value (rhs1
, &gsi
);
5708 gsi
= gsi_for_stmt (negatedefstmt
);
5709 rhs2
= negate_value (rhs2
, &gsi
);
5711 gsi
= gsi_for_stmt (negatedefstmt
);
5712 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5713 gimple_set_visited (negatedefstmt
, true);
5714 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
5715 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
5716 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5720 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
5721 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
5722 NULL_TREE
, true, GSI_SAME_STMT
);
5724 uid
= gimple_uid (gsi_stmt (gsi
));
5725 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
5727 gimple
*stmt
= gsi_stmt (gsi
);
5728 if (gimple_uid (stmt
) != 0)
5730 gimple_set_uid (stmt
, uid
);
5732 return resultofnegate
;
5735 /* Return true if we should break up the subtract in STMT into an add
5736 with negate. This is true when we the subtract operands are really
5737 adds, or the subtract itself is used in an add expression. In
5738 either case, breaking up the subtract into an add with negate
5739 exposes the adds to reassociation. */
5742 should_break_up_subtract (gimple
*stmt
)
5744 tree lhs
= gimple_assign_lhs (stmt
);
5745 tree binlhs
= gimple_assign_rhs1 (stmt
);
5746 tree binrhs
= gimple_assign_rhs2 (stmt
);
5748 class loop
*loop
= loop_containing_stmt (stmt
);
5750 if (TREE_CODE (binlhs
) == SSA_NAME
5751 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
5754 if (TREE_CODE (binrhs
) == SSA_NAME
5755 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
5758 if (TREE_CODE (lhs
) == SSA_NAME
5759 && (immusestmt
= get_single_immediate_use (lhs
))
5760 && is_gimple_assign (immusestmt
)
5761 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
5762 || (gimple_assign_rhs_code (immusestmt
) == MINUS_EXPR
5763 && gimple_assign_rhs1 (immusestmt
) == lhs
)
5764 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
5769 /* Transform STMT from A - B into A + -B. */
5772 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
5774 tree rhs1
= gimple_assign_rhs1 (stmt
);
5775 tree rhs2
= gimple_assign_rhs2 (stmt
);
5777 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5779 fprintf (dump_file
, "Breaking up subtract ");
5780 print_gimple_stmt (dump_file
, stmt
, 0);
5783 rhs2
= negate_value (rhs2
, gsip
);
5784 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
5788 /* Determine whether STMT is a builtin call that raises an SSA name
5789 to an integer power and has only one use. If so, and this is early
5790 reassociation and unsafe math optimizations are permitted, place
5791 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5792 If any of these conditions does not hold, return FALSE. */
5795 acceptable_pow_call (gcall
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
5798 REAL_VALUE_TYPE c
, cint
;
5800 switch (gimple_call_combined_fn (stmt
))
5803 if (flag_errno_math
)
5806 *base
= gimple_call_arg (stmt
, 0);
5807 arg1
= gimple_call_arg (stmt
, 1);
5809 if (TREE_CODE (arg1
) != REAL_CST
)
5812 c
= TREE_REAL_CST (arg1
);
5814 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
5817 *exponent
= real_to_integer (&c
);
5818 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
5819 if (!real_identical (&c
, &cint
))
5825 *base
= gimple_call_arg (stmt
, 0);
5826 arg1
= gimple_call_arg (stmt
, 1);
5828 if (!tree_fits_shwi_p (arg1
))
5831 *exponent
= tree_to_shwi (arg1
);
5838 /* Expanding negative exponents is generally unproductive, so we don't
5839 complicate matters with those. Exponents of zero and one should
5840 have been handled by expression folding. */
5841 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
5847 /* Try to derive and add operand entry for OP to *OPS. Return false if
5851 try_special_add_to_ops (vec
<operand_entry
*> *ops
,
5852 enum tree_code code
,
5853 tree op
, gimple
* def_stmt
)
5855 tree base
= NULL_TREE
;
5856 HOST_WIDE_INT exponent
= 0;
5858 if (TREE_CODE (op
) != SSA_NAME
5859 || ! has_single_use (op
))
5862 if (code
== MULT_EXPR
5863 && reassoc_insert_powi_p
5864 && flag_unsafe_math_optimizations
5865 && is_gimple_call (def_stmt
)
5866 && acceptable_pow_call (as_a
<gcall
*> (def_stmt
), &base
, &exponent
))
5868 add_repeat_to_ops_vec (ops
, base
, exponent
);
5869 gimple_set_visited (def_stmt
, true);
5872 else if (code
== MULT_EXPR
5873 && is_gimple_assign (def_stmt
)
5874 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
5875 && !HONOR_SNANS (TREE_TYPE (op
))
5876 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op
))
5877 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op
)))
5878 && (!FLOAT_TYPE_P (TREE_TYPE (op
))
5879 || !DECIMAL_FLOAT_MODE_P (element_mode (op
))))
5881 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5882 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
5883 add_to_ops_vec (ops
, rhs1
);
5884 add_to_ops_vec (ops
, cst
);
5885 gimple_set_visited (def_stmt
, true);
5892 /* Recursively linearize a binary expression that is the RHS of STMT.
5893 Place the operands of the expression tree in the vector named OPS. */
5896 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
5897 bool is_associative
, bool set_visited
)
5899 tree binlhs
= gimple_assign_rhs1 (stmt
);
5900 tree binrhs
= gimple_assign_rhs2 (stmt
);
5901 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
5902 bool binlhsisreassoc
= false;
5903 bool binrhsisreassoc
= false;
5904 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5905 class loop
*loop
= loop_containing_stmt (stmt
);
5908 gimple_set_visited (stmt
, true);
5910 if (TREE_CODE (binlhs
) == SSA_NAME
)
5912 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
5913 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
5914 && !stmt_could_throw_p (cfun
, binlhsdef
));
5917 if (TREE_CODE (binrhs
) == SSA_NAME
)
5919 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
5920 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
5921 && !stmt_could_throw_p (cfun
, binrhsdef
));
5924 /* If the LHS is not reassociable, but the RHS is, we need to swap
5925 them. If neither is reassociable, there is nothing we can do, so
5926 just put them in the ops vector. If the LHS is reassociable,
5927 linearize it. If both are reassociable, then linearize the RHS
5930 if (!binlhsisreassoc
)
5932 /* If this is not a associative operation like division, give up. */
5933 if (!is_associative
)
5935 add_to_ops_vec (ops
, binrhs
);
5939 if (!binrhsisreassoc
)
5942 if (try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5943 /* If we add ops for the rhs we expect to be able to recurse
5944 to it via the lhs during expression rewrite so swap
5948 add_to_ops_vec (ops
, binrhs
);
5950 if (!try_special_add_to_ops (ops
, rhscode
, binlhs
, binlhsdef
))
5951 add_to_ops_vec (ops
, binlhs
);
5957 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5959 fprintf (dump_file
, "swapping operands of ");
5960 print_gimple_stmt (dump_file
, stmt
, 0);
5963 swap_ssa_operands (stmt
,
5964 gimple_assign_rhs1_ptr (stmt
),
5965 gimple_assign_rhs2_ptr (stmt
));
5968 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5970 fprintf (dump_file
, " is now ");
5971 print_gimple_stmt (dump_file
, stmt
, 0);
5973 if (!binrhsisreassoc
)
5976 /* We want to make it so the lhs is always the reassociative op,
5978 std::swap (binlhs
, binrhs
);
5980 else if (binrhsisreassoc
)
5982 linearize_expr (stmt
);
5983 binlhs
= gimple_assign_rhs1 (stmt
);
5984 binrhs
= gimple_assign_rhs2 (stmt
);
5987 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
5988 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
5990 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
5991 is_associative
, set_visited
);
5993 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5994 add_to_ops_vec (ops
, binrhs
);
5997 /* Repropagate the negates back into subtracts, since no other pass
5998 currently does it. */
6001 repropagate_negates (void)
6006 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
6008 gimple
*user
= get_single_immediate_use (negate
);
6009 if (!user
|| !is_gimple_assign (user
))
6012 tree negateop
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
6013 if (TREE_CODE (negateop
) == SSA_NAME
6014 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop
))
6017 /* The negate operand can be either operand of a PLUS_EXPR
6018 (it can be the LHS if the RHS is a constant for example).
6020 Force the negate operand to the RHS of the PLUS_EXPR, then
6021 transform the PLUS_EXPR into a MINUS_EXPR. */
6022 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
6024 /* If the negated operand appears on the LHS of the
6025 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6026 to force the negated operand to the RHS of the PLUS_EXPR. */
6027 if (gimple_assign_rhs1 (user
) == negate
)
6029 swap_ssa_operands (user
,
6030 gimple_assign_rhs1_ptr (user
),
6031 gimple_assign_rhs2_ptr (user
));
6034 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6035 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6036 if (gimple_assign_rhs2 (user
) == negate
)
6038 tree rhs1
= gimple_assign_rhs1 (user
);
6039 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
6040 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
,
6045 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
6047 if (gimple_assign_rhs1 (user
) == negate
)
6052 which we transform into
6055 This pushes down the negate which we possibly can merge
6056 into some other operation, hence insert it into the
6057 plus_negates vector. */
6058 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
6059 tree b
= gimple_assign_rhs2 (user
);
6060 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
6061 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
6062 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
6063 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, negateop
, b
);
6064 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
6065 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
6066 user
= gsi_stmt (gsi2
);
6068 reassoc_remove_stmt (&gsi
);
6069 release_defs (feed
);
6070 plus_negates
.safe_push (gimple_assign_lhs (user
));
6074 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6075 getting rid of one operation. */
6076 tree rhs1
= gimple_assign_rhs1 (user
);
6077 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
6078 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, negateop
);
6079 update_stmt (gsi_stmt (gsi
));
6085 /* Break up subtract operations in block BB.
6087 We do this top down because we don't know whether the subtract is
6088 part of a possible chain of reassociation except at the top.
6097 we want to break up k = t - q, but we won't until we've transformed q
6098 = b - r, which won't be broken up until we transform b = c - d.
6100 En passant, clear the GIMPLE visited flag on every statement
6101 and set UIDs within each basic block. */
6104 break_up_subtract_bb (basic_block bb
)
6106 gimple_stmt_iterator gsi
;
6108 unsigned int uid
= 1;
6110 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6112 gimple
*stmt
= gsi_stmt (gsi
);
6113 gimple_set_visited (stmt
, false);
6114 gimple_set_uid (stmt
, uid
++);
6116 if (!is_gimple_assign (stmt
)
6117 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt
)))
6118 || !can_reassociate_op_p (gimple_assign_lhs (stmt
)))
6121 /* Look for simple gimple subtract operations. */
6122 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
6124 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt
))
6125 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt
)))
6128 /* Check for a subtract used only in an addition. If this
6129 is the case, transform it into add of a negate for better
6130 reassociation. IE transform C = A-B into C = A + -B if C
6131 is only used in an addition. */
6132 if (should_break_up_subtract (stmt
))
6133 break_up_subtract (stmt
, &gsi
);
6135 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
6136 && can_reassociate_op_p (gimple_assign_rhs1 (stmt
)))
6137 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
6139 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
6141 son
= next_dom_son (CDI_DOMINATORS
, son
))
6142 break_up_subtract_bb (son
);
6145 /* Used for repeated factor analysis. */
6146 struct repeat_factor
6148 /* An SSA name that occurs in a multiply chain. */
6151 /* Cached rank of the factor. */
6154 /* Number of occurrences of the factor in the chain. */
6155 HOST_WIDE_INT count
;
6157 /* An SSA name representing the product of this factor and
6158 all factors appearing later in the repeated factor vector. */
6163 static vec
<repeat_factor
> repeat_factor_vec
;
6165 /* Used for sorting the repeat factor vector. Sort primarily by
6166 ascending occurrence count, secondarily by descending rank. */
6169 compare_repeat_factors (const void *x1
, const void *x2
)
6171 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
6172 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
6174 if (rf1
->count
!= rf2
->count
)
6175 return rf1
->count
- rf2
->count
;
6177 return rf2
->rank
- rf1
->rank
;
6180 /* Look for repeated operands in OPS in the multiply tree rooted at
6181 STMT. Replace them with an optimal sequence of multiplies and powi
6182 builtin calls, and remove the used operands from OPS. Return an
6183 SSA name representing the value of the replacement sequence. */
6186 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
6188 unsigned i
, j
, vec_len
;
6191 repeat_factor
*rf1
, *rf2
;
6192 repeat_factor rfnew
;
6193 tree result
= NULL_TREE
;
6194 tree target_ssa
, iter_result
;
6195 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
6196 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
6197 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
6198 gimple
*mul_stmt
, *pow_stmt
;
6200 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6201 target, unless type is integral. */
6202 if (!powi_fndecl
&& !INTEGRAL_TYPE_P (type
))
6205 /* Allocate the repeated factor vector. */
6206 repeat_factor_vec
.create (10);
6208 /* Scan the OPS vector for all SSA names in the product and build
6209 up a vector of occurrence counts for each factor. */
6210 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6212 if (TREE_CODE (oe
->op
) == SSA_NAME
)
6214 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6216 if (rf1
->factor
== oe
->op
)
6218 rf1
->count
+= oe
->count
;
6223 if (j
>= repeat_factor_vec
.length ())
6225 rfnew
.factor
= oe
->op
;
6226 rfnew
.rank
= oe
->rank
;
6227 rfnew
.count
= oe
->count
;
6228 rfnew
.repr
= NULL_TREE
;
6229 repeat_factor_vec
.safe_push (rfnew
);
6234 /* Sort the repeated factor vector by (a) increasing occurrence count,
6235 and (b) decreasing rank. */
6236 repeat_factor_vec
.qsort (compare_repeat_factors
);
6238 /* It is generally best to combine as many base factors as possible
6239 into a product before applying __builtin_powi to the result.
6240 However, the sort order chosen for the repeated factor vector
6241 allows us to cache partial results for the product of the base
6242 factors for subsequent use. When we already have a cached partial
6243 result from a previous iteration, it is best to make use of it
6244 before looking for another __builtin_pow opportunity.
6246 As an example, consider x * x * y * y * y * z * z * z * z.
6247 We want to first compose the product x * y * z, raise it to the
6248 second power, then multiply this by y * z, and finally multiply
6249 by z. This can be done in 5 multiplies provided we cache y * z
6250 for use in both expressions:
6258 If we instead ignored the cached y * z and first multiplied by
6259 the __builtin_pow opportunity z * z, we would get the inferior:
6268 vec_len
= repeat_factor_vec
.length ();
6270 /* Repeatedly look for opportunities to create a builtin_powi call. */
6273 HOST_WIDE_INT power
;
6275 /* First look for the largest cached product of factors from
6276 preceding iterations. If found, create a builtin_powi for
6277 it if the minimum occurrence count for its factors is at
6278 least 2, or just use this cached product as our next
6279 multiplicand if the minimum occurrence count is 1. */
6280 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6282 if (rf1
->repr
&& rf1
->count
> 0)
6292 iter_result
= rf1
->repr
;
6294 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6298 fputs ("Multiplying by cached product ", dump_file
);
6299 for (elt
= j
; elt
< vec_len
; elt
++)
6301 rf
= &repeat_factor_vec
[elt
];
6302 print_generic_expr (dump_file
, rf
->factor
);
6303 if (elt
< vec_len
- 1)
6304 fputs (" * ", dump_file
);
6306 fputs ("\n", dump_file
);
6311 if (INTEGRAL_TYPE_P (type
))
6313 gcc_assert (power
> 1);
6314 gimple_stmt_iterator gsip
= gsi
;
6316 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6318 gimple_stmt_iterator gsic
= gsi
;
6319 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6321 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6322 gimple_set_visited (gsi_stmt (gsic
), true);
6328 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6330 = gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6331 build_int_cst (integer_type_node
,
6333 gimple_call_set_lhs (pow_stmt
, iter_result
);
6334 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6335 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6336 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6339 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6343 fputs ("Building __builtin_pow call for cached product (",
6345 for (elt
= j
; elt
< vec_len
; elt
++)
6347 rf
= &repeat_factor_vec
[elt
];
6348 print_generic_expr (dump_file
, rf
->factor
);
6349 if (elt
< vec_len
- 1)
6350 fputs (" * ", dump_file
);
6352 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
6359 /* Otherwise, find the first factor in the repeated factor
6360 vector whose occurrence count is at least 2. If no such
6361 factor exists, there are no builtin_powi opportunities
6363 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6365 if (rf1
->count
>= 2)
6374 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6378 fputs ("Building __builtin_pow call for (", dump_file
);
6379 for (elt
= j
; elt
< vec_len
; elt
++)
6381 rf
= &repeat_factor_vec
[elt
];
6382 print_generic_expr (dump_file
, rf
->factor
);
6383 if (elt
< vec_len
- 1)
6384 fputs (" * ", dump_file
);
6386 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
6389 reassociate_stats
.pows_created
++;
6391 /* Visit each element of the vector in reverse order (so that
6392 high-occurrence elements are visited first, and within the
6393 same occurrence count, lower-ranked elements are visited
6394 first). Form a linear product of all elements in this order
6395 whose occurrencce count is at least that of element J.
6396 Record the SSA name representing the product of each element
6397 with all subsequent elements in the vector. */
6398 if (j
== vec_len
- 1)
6399 rf1
->repr
= rf1
->factor
;
6402 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
6406 rf1
= &repeat_factor_vec
[ii
];
6407 rf2
= &repeat_factor_vec
[ii
+ 1];
6409 /* Init the last factor's representative to be itself. */
6411 rf2
->repr
= rf2
->factor
;
6416 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6417 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
6419 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6420 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6421 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6422 rf1
->repr
= target_ssa
;
6424 /* Don't reprocess the multiply we just introduced. */
6425 gimple_set_visited (mul_stmt
, true);
6429 /* Form a call to __builtin_powi for the maximum product
6430 just formed, raised to the power obtained earlier. */
6431 rf1
= &repeat_factor_vec
[j
];
6432 if (INTEGRAL_TYPE_P (type
))
6434 gcc_assert (power
> 1);
6435 gimple_stmt_iterator gsip
= gsi
;
6437 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6439 gimple_stmt_iterator gsic
= gsi
;
6440 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6442 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6443 gimple_set_visited (gsi_stmt (gsic
), true);
6449 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6450 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6451 build_int_cst (integer_type_node
,
6453 gimple_call_set_lhs (pow_stmt
, iter_result
);
6454 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6455 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6456 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6460 /* If we previously formed at least one other builtin_powi call,
6461 form the product of this one and those others. */
6464 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6465 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
6466 result
, iter_result
);
6467 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6468 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6469 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6470 gimple_set_visited (mul_stmt
, true);
6471 result
= new_result
;
6474 result
= iter_result
;
6476 /* Decrement the occurrence count of each element in the product
6477 by the count found above, and remove this many copies of each
6479 for (i
= j
; i
< vec_len
; i
++)
6484 rf1
= &repeat_factor_vec
[i
];
6485 rf1
->count
-= power
;
6487 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
6489 if (oe
->op
== rf1
->factor
)
6493 ops
->ordered_remove (n
);
6509 /* At this point all elements in the repeated factor vector have a
6510 remaining occurrence count of 0 or 1, and those with a count of 1
6511 don't have cached representatives. Re-sort the ops vector and
6513 ops
->qsort (sort_by_operand_rank
);
6514 repeat_factor_vec
.release ();
6516 /* Return the final product computed herein. Note that there may
6517 still be some elements with single occurrence count left in OPS;
6518 those will be handled by the normal reassociation logic. */
6522 /* Attempt to optimize
6523 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6524 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6527 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
6531 unsigned int length
= ops
->length ();
6532 tree cst
= ops
->last ()->op
;
6534 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
6537 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6539 if (TREE_CODE (oe
->op
) == SSA_NAME
6540 && has_single_use (oe
->op
))
6542 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6543 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
6546 switch (gimple_call_combined_fn (old_call
))
6549 CASE_CFN_COPYSIGN_FN
:
6550 arg0
= gimple_call_arg (old_call
, 0);
6551 arg1
= gimple_call_arg (old_call
, 1);
6552 /* The first argument of copysign must be a constant,
6553 otherwise there's nothing to do. */
6554 if (TREE_CODE (arg0
) == REAL_CST
)
6556 tree type
= TREE_TYPE (arg0
);
6557 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
6558 /* If we couldn't fold to a single constant, skip it.
6559 That happens e.g. for inexact multiplication when
6561 if (mul
== NULL_TREE
)
6563 /* Instead of adjusting OLD_CALL, let's build a new
6564 call to not leak the LHS and prevent keeping bogus
6565 debug statements. DCE will clean up the old call. */
6567 if (gimple_call_internal_p (old_call
))
6568 new_call
= gimple_build_call_internal
6569 (IFN_COPYSIGN
, 2, mul
, arg1
);
6571 new_call
= gimple_build_call
6572 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
6573 tree lhs
= make_ssa_name (type
);
6574 gimple_call_set_lhs (new_call
, lhs
);
6575 gimple_set_location (new_call
,
6576 gimple_location (old_call
));
6577 insert_stmt_after (new_call
, old_call
);
6578 /* We've used the constant, get rid of it. */
6580 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
6581 /* Handle the CST1 < 0 case by negating the result. */
6584 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
6586 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
6587 insert_stmt_after (negate_stmt
, new_call
);
6592 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6594 fprintf (dump_file
, "Optimizing copysign: ");
6595 print_generic_expr (dump_file
, cst
);
6596 fprintf (dump_file
, " * COPYSIGN (");
6597 print_generic_expr (dump_file
, arg0
);
6598 fprintf (dump_file
, ", ");
6599 print_generic_expr (dump_file
, arg1
);
6600 fprintf (dump_file
, ") into %sCOPYSIGN (",
6601 cst1_neg
? "-" : "");
6602 print_generic_expr (dump_file
, mul
);
6603 fprintf (dump_file
, ", ");
6604 print_generic_expr (dump_file
, arg1
);
6605 fprintf (dump_file
, "\n");
6618 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6621 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
6625 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6627 fprintf (dump_file
, "Transforming ");
6628 print_gimple_stmt (dump_file
, stmt
, 0);
6631 rhs1
= gimple_assign_rhs1 (stmt
);
6632 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
6634 remove_visited_stmt_chain (rhs1
);
6636 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6638 fprintf (dump_file
, " into ");
6639 print_gimple_stmt (dump_file
, stmt
, 0);
6643 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6646 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
6647 tree rhs1
, tree rhs2
)
6649 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6651 fprintf (dump_file
, "Transforming ");
6652 print_gimple_stmt (dump_file
, stmt
, 0);
6655 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
6656 update_stmt (gsi_stmt (*gsi
));
6657 remove_visited_stmt_chain (rhs1
);
6659 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6661 fprintf (dump_file
, " into ");
6662 print_gimple_stmt (dump_file
, stmt
, 0);
6666 /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6667 Put no-mult ops and mult ops alternately at the end of the queue, which is
6668 conducive to generating more FMA and reducing the loss of FMA when breaking
6671 a * b + c * d + e generates:
6673 _4 = c_9(D) * d_10(D);
6674 _12 = .FMA (a_7(D), b_8(D), _4);
6677 Rearrange ops to -> e + a * b + c * d generates:
6679 _4 = .FMA (c_7(D), d_8(D), _3);
6680 _11 = .FMA (a_5(D), b_6(D), _4); */
6682 rank_ops_for_fma (vec
<operand_entry
*> *ops
)
6686 unsigned int ops_length
= ops
->length ();
6687 auto_vec
<operand_entry
*> ops_mult
;
6688 auto_vec
<operand_entry
*> ops_others
;
6690 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6692 if (TREE_CODE (oe
->op
) == SSA_NAME
)
6694 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6695 if (is_gimple_assign (def_stmt
)
6696 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
)
6697 ops_mult
.safe_push (oe
);
6699 ops_others
.safe_push (oe
);
6702 ops_others
.safe_push (oe
);
6704 /* 1. When ops_mult.length == 2, like the following case,
6708 we need to rearrange the ops.
6710 Putting ops that not def from mult in front can generate more FMAs.
6712 2. If all ops are defined with mult, we don't need to rearrange them. */
6713 if (ops_mult
.length () >= 2 && ops_mult
.length () != ops_length
)
6715 /* Put no-mult ops and mult ops alternately at the end of the
6716 queue, which is conducive to generating more FMA and reducing the
6717 loss of FMA when breaking the chain. */
6719 ops
->splice (ops_mult
);
6720 int j
, opindex
= ops
->length ();
6721 int others_length
= ops_others
.length ();
6722 for (j
= 0; j
< others_length
; j
++)
6724 oe
= ops_others
.pop ();
6725 ops
->quick_insert (opindex
, oe
);
6733 /* Reassociate expressions in basic block BB and its post-dominator as
6736 Bubble up return status from maybe_optimize_range_tests. */
6739 reassociate_bb (basic_block bb
)
6741 gimple_stmt_iterator gsi
;
6743 gimple
*stmt
= last_nondebug_stmt (bb
);
6744 bool cfg_cleanup_needed
= false;
6746 if (stmt
&& !gimple_visited_p (stmt
))
6747 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
6749 bool do_prev
= false;
6750 for (gsi
= gsi_last_bb (bb
);
6751 !gsi_end_p (gsi
); do_prev
? gsi_prev (&gsi
) : (void) 0)
6754 stmt
= gsi_stmt (gsi
);
6756 if (is_gimple_assign (stmt
)
6757 && !stmt_could_throw_p (cfun
, stmt
))
6759 tree lhs
, rhs1
, rhs2
;
6760 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
6762 /* If this was part of an already processed statement,
6763 we don't need to touch it again. */
6764 if (gimple_visited_p (stmt
))
6766 /* This statement might have become dead because of previous
6768 if (has_zero_uses (gimple_get_lhs (stmt
)))
6770 reassoc_remove_stmt (&gsi
);
6771 release_defs (stmt
);
6772 /* We might end up removing the last stmt above which
6773 places the iterator to the end of the sequence.
6774 Reset it to the last stmt in this case and make sure
6775 we don't do gsi_prev in that case. */
6776 if (gsi_end_p (gsi
))
6778 gsi
= gsi_last_bb (bb
);
6785 /* If this is not a gimple binary expression, there is
6786 nothing for us to do with it. */
6787 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
6790 lhs
= gimple_assign_lhs (stmt
);
6791 rhs1
= gimple_assign_rhs1 (stmt
);
6792 rhs2
= gimple_assign_rhs2 (stmt
);
6794 /* For non-bit or min/max operations we can't associate
6795 all types. Verify that here. */
6796 if ((rhs_code
!= BIT_IOR_EXPR
6797 && rhs_code
!= BIT_AND_EXPR
6798 && rhs_code
!= BIT_XOR_EXPR
6799 && rhs_code
!= MIN_EXPR
6800 && rhs_code
!= MAX_EXPR
6801 && !can_reassociate_type_p (TREE_TYPE (lhs
)))
6802 || !can_reassociate_op_p (rhs1
)
6803 || !can_reassociate_op_p (rhs2
))
6806 if (associative_tree_code (rhs_code
))
6808 auto_vec
<operand_entry
*> ops
;
6809 tree powi_result
= NULL_TREE
;
6810 bool is_vector
= VECTOR_TYPE_P (TREE_TYPE (lhs
));
6812 /* There may be no immediate uses left by the time we
6813 get here because we may have eliminated them all. */
6814 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
6817 gimple_set_visited (stmt
, true);
6818 linearize_expr_tree (&ops
, stmt
, true, true);
6819 ops
.qsort (sort_by_operand_rank
);
6820 int orig_len
= ops
.length ();
6821 optimize_ops_list (rhs_code
, &ops
);
6822 if (undistribute_ops_list (rhs_code
, &ops
,
6823 loop_containing_stmt (stmt
)))
6825 ops
.qsort (sort_by_operand_rank
);
6826 optimize_ops_list (rhs_code
, &ops
);
6828 if (undistribute_bitref_for_vector (rhs_code
, &ops
,
6829 loop_containing_stmt (stmt
)))
6831 ops
.qsort (sort_by_operand_rank
);
6832 optimize_ops_list (rhs_code
, &ops
);
6834 if (rhs_code
== PLUS_EXPR
6835 && transform_add_to_multiply (&ops
))
6836 ops
.qsort (sort_by_operand_rank
);
6838 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
6841 optimize_vec_cond_expr (rhs_code
, &ops
);
6843 optimize_range_tests (rhs_code
, &ops
, NULL
);
6846 if (rhs_code
== MULT_EXPR
&& !is_vector
)
6848 attempt_builtin_copysign (&ops
);
6850 if (reassoc_insert_powi_p
6851 && (flag_unsafe_math_optimizations
6852 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))))
6853 powi_result
= attempt_builtin_powi (stmt
, &ops
);
6856 operand_entry
*last
;
6857 bool negate_result
= false;
6858 if (ops
.length () > 1
6859 && rhs_code
== MULT_EXPR
)
6862 if ((integer_minus_onep (last
->op
)
6863 || real_minus_onep (last
->op
))
6864 && !HONOR_SNANS (TREE_TYPE (lhs
))
6865 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs
))
6866 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs
))))
6869 negate_result
= true;
6874 /* If the operand vector is now empty, all operands were
6875 consumed by the __builtin_powi optimization. */
6876 if (ops
.length () == 0)
6877 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
6878 else if (ops
.length () == 1)
6880 tree last_op
= ops
.last ()->op
;
6882 /* If the stmt that defines operand has to be inserted, insert it
6884 if (ops
.last ()->stmt_to_insert
)
6885 insert_stmt_before_use (stmt
, ops
.last ()->stmt_to_insert
);
6887 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
6890 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
6894 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
6895 int ops_num
= ops
.length ();
6897 bool has_fma
= false;
6899 /* For binary bit operations, if there are at least 3
6900 operands and the last operand in OPS is a constant,
6901 move it to the front. This helps ensure that we generate
6902 (X & Y) & C rather than (X & C) & Y. The former will
6903 often match a canonical bit test when we get to RTL. */
6904 if (ops
.length () > 2
6905 && (rhs_code
== BIT_AND_EXPR
6906 || rhs_code
== BIT_IOR_EXPR
6907 || rhs_code
== BIT_XOR_EXPR
)
6908 && TREE_CODE (ops
.last ()->op
) == INTEGER_CST
)
6909 std::swap (*ops
[0], *ops
[ops_num
- 1]);
6911 optimization_type opt_type
= bb_optimization_type (bb
);
6913 /* If the target support FMA, rank_ops_for_fma will detect if
6914 the chain has fmas and rearrange the ops if so. */
6915 if (direct_internal_fn_supported_p (IFN_FMA
,
6918 && (rhs_code
== PLUS_EXPR
|| rhs_code
== MINUS_EXPR
))
6920 has_fma
= rank_ops_for_fma (&ops
);
6923 /* Only rewrite the expression tree to parallel in the
6924 last reassoc pass to avoid useless work back-and-forth
6925 with initial linearization. */
6926 if (!reassoc_insert_powi_p
6927 && ops
.length () > 3
6928 && (width
= get_reassociation_width (ops_num
, rhs_code
,
6931 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6933 "Width = %d was chosen for reassociation\n",
6935 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
6942 /* When there are three operands left, we want
6943 to make sure the ones that get the double
6944 binary op are chosen wisely. */
6945 int len
= ops
.length ();
6946 if (len
>= 3 && !has_fma
)
6947 swap_ops_for_binary_stmt (ops
, len
- 3);
6949 new_lhs
= rewrite_expr_tree (stmt
, rhs_code
, 0, ops
,
6955 /* If we combined some repeated factors into a
6956 __builtin_powi call, multiply that result by the
6957 reassociated operands. */
6960 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
6961 tree type
= TREE_TYPE (lhs
);
6962 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
6964 gimple_set_lhs (lhs_stmt
, target_ssa
);
6965 update_stmt (lhs_stmt
);
6968 target_ssa
= new_lhs
;
6971 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
6972 powi_result
, target_ssa
);
6973 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6974 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6975 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
6981 stmt
= SSA_NAME_DEF_STMT (lhs
);
6982 tree tmp
= make_ssa_name (TREE_TYPE (lhs
));
6983 gimple_set_lhs (stmt
, tmp
);
6986 gassign
*neg_stmt
= gimple_build_assign (lhs
, NEGATE_EXPR
,
6988 gimple_set_uid (neg_stmt
, gimple_uid (stmt
));
6989 gsi_insert_after (&gsi
, neg_stmt
, GSI_NEW_STMT
);
6995 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
6997 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
6998 cfg_cleanup_needed
|= reassociate_bb (son
);
7000 return cfg_cleanup_needed
;
7003 /* Add jumps around shifts for range tests turned into bit tests.
7004 For each SSA_NAME VAR we have code like:
7005 VAR = ...; // final stmt of range comparison
7006 // bit test here...;
7007 OTHERVAR = ...; // final stmt of the bit test sequence
7008 RES = VAR | OTHERVAR;
7009 Turn the above into:
7016 // bit test here...;
7019 # RES = PHI<1(l1), OTHERVAR(l2)>; */
7027 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
7029 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
7032 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
7034 && is_gimple_assign (use_stmt
)
7035 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
7036 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
7038 basic_block cond_bb
= gimple_bb (def_stmt
);
7039 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
7040 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
7042 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
7043 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
7044 build_zero_cst (TREE_TYPE (var
)),
7045 NULL_TREE
, NULL_TREE
);
7046 location_t loc
= gimple_location (use_stmt
);
7047 gimple_set_location (g
, loc
);
7048 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
7050 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
7051 etrue
->probability
= profile_probability::even ();
7052 edge efalse
= find_edge (cond_bb
, then_bb
);
7053 efalse
->flags
= EDGE_FALSE_VALUE
;
7054 efalse
->probability
-= etrue
->probability
;
7055 then_bb
->count
-= etrue
->count ();
7057 tree othervar
= NULL_TREE
;
7058 if (gimple_assign_rhs1 (use_stmt
) == var
)
7059 othervar
= gimple_assign_rhs2 (use_stmt
);
7060 else if (gimple_assign_rhs2 (use_stmt
) == var
)
7061 othervar
= gimple_assign_rhs1 (use_stmt
);
7064 tree lhs
= gimple_assign_lhs (use_stmt
);
7065 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
7066 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
7067 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
7068 gsi
= gsi_for_stmt (use_stmt
);
7069 gsi_remove (&gsi
, true);
7071 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
7072 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
7074 reassoc_branch_fixups
.release ();
7077 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
7078 void debug_ops_vector (vec
<operand_entry
*> ops
);
7080 /* Dump the operand entry vector OPS to FILE. */
7083 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
7088 FOR_EACH_VEC_ELT (ops
, i
, oe
)
7090 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
7091 print_generic_expr (file
, oe
->op
);
7092 fprintf (file
, "\n");
7096 /* Dump the operand entry vector OPS to STDERR. */
7099 debug_ops_vector (vec
<operand_entry
*> ops
)
7101 dump_ops_vector (stderr
, ops
);
7104 /* Bubble up return status from reassociate_bb. */
7109 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
7110 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
7113 /* Initialize the reassociation pass. */
7120 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
7122 /* Find the loops, so that we can prevent moving calculations in
7124 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
7126 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
7128 next_operand_entry_id
= 0;
7130 /* Reverse RPO (Reverse Post Order) will give us something where
7131 deeper loops come later. */
7132 pre_and_rev_post_order_compute (NULL
, bbs
, false);
7133 bb_rank
= XCNEWVEC (int64_t, last_basic_block_for_fn (cfun
));
7134 operand_rank
= new hash_map
<tree
, int64_t>;
7136 /* Give each default definition a distinct rank. This includes
7137 parameters and the static chain. Walk backwards over all
7138 SSA names so that we get proper rank ordering according
7139 to tree_swap_operands_p. */
7140 for (i
= num_ssa_names
- 1; i
> 0; --i
)
7142 tree name
= ssa_name (i
);
7143 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
7144 insert_operand_rank (name
, ++rank
);
7147 /* Set up rank for each BB */
7148 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
7149 bb_rank
[bbs
[i
]] = ++rank
<< 16;
7152 calculate_dominance_info (CDI_POST_DOMINATORS
);
7153 plus_negates
= vNULL
;
7156 /* Cleanup after the reassociation pass, and print stats if
7162 statistics_counter_event (cfun
, "Linearized",
7163 reassociate_stats
.linearized
);
7164 statistics_counter_event (cfun
, "Constants eliminated",
7165 reassociate_stats
.constants_eliminated
);
7166 statistics_counter_event (cfun
, "Ops eliminated",
7167 reassociate_stats
.ops_eliminated
);
7168 statistics_counter_event (cfun
, "Statements rewritten",
7169 reassociate_stats
.rewritten
);
7170 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
7171 reassociate_stats
.pows_encountered
);
7172 statistics_counter_event (cfun
, "Built-in powi calls created",
7173 reassociate_stats
.pows_created
);
7175 delete operand_rank
;
7176 bitmap_clear (biased_names
);
7177 operand_entry_pool
.release ();
7179 plus_negates
.release ();
7180 free_dominance_info (CDI_POST_DOMINATORS
);
7181 loop_optimizer_finalize ();
7184 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7185 insertion of __builtin_powi calls.
7187 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7188 optimization of a gimple conditional. Otherwise returns zero. */
7191 execute_reassoc (bool insert_powi_p
, bool bias_loop_carried_phi_ranks_p
)
7193 reassoc_insert_powi_p
= insert_powi_p
;
7194 reassoc_bias_loop_carried_phi_ranks_p
= bias_loop_carried_phi_ranks_p
;
7198 bool cfg_cleanup_needed
;
7199 cfg_cleanup_needed
= do_reassoc ();
7200 repropagate_negates ();
7204 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
7209 const pass_data pass_data_reassoc
=
7211 GIMPLE_PASS
, /* type */
7212 "reassoc", /* name */
7213 OPTGROUP_NONE
, /* optinfo_flags */
7214 TV_TREE_REASSOC
, /* tv_id */
7215 ( PROP_cfg
| PROP_ssa
), /* properties_required */
7216 0, /* properties_provided */
7217 0, /* properties_destroyed */
7218 0, /* todo_flags_start */
7219 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
7222 class pass_reassoc
: public gimple_opt_pass
7225 pass_reassoc (gcc::context
*ctxt
)
7226 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
7229 /* opt_pass methods: */
7230 opt_pass
* clone () final override
{ return new pass_reassoc (m_ctxt
); }
7231 void set_pass_param (unsigned int n
, bool param
) final override
7233 gcc_assert (n
== 0);
7234 insert_powi_p
= param
;
7235 bias_loop_carried_phi_ranks_p
= !param
;
7237 bool gate (function
*) final override
{ return flag_tree_reassoc
!= 0; }
7238 unsigned int execute (function
*) final override
7240 return execute_reassoc (insert_powi_p
, bias_loop_carried_phi_ranks_p
);
7244 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7245 point 3a in the pass header comment. */
7247 bool bias_loop_carried_phi_ranks_p
;
7248 }; // class pass_reassoc
7253 make_pass_reassoc (gcc::context
*ctxt
)
7255 return new pass_reassoc (ctxt
);