1 /* Reassociation for trees.
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimple-fold.h"
44 #include "gimplify-me.h"
46 #include "tree-ssa-loop.h"
49 #include "langhooks.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
57 #include "internal-fn.h"
59 /* This is a simple global reassociation pass. It is, in part, based
60 on the LLVM pass of the same name (They do some things more/less
61 than we do, in different orders, etc).
63 It consists of five steps:
65 1. Breaking up subtract operations into addition + negate, where
66 it would promote the reassociation of adds.
68 2. Left linearization of the expression trees, so that (A+B)+(C+D)
69 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
70 During linearization, we place the operands of the binary
71 expressions into a vector of operand_entry_*
73 3. Optimization of the operand lists, eliminating things like a +
76 3a. Combine repeated factors with the same occurrence counts
77 into a __builtin_powi call that will later be optimized into
78 an optimal number of multiplies.
80 4. Rewrite the expression trees we linearized and optimized so
81 they are in proper rank order.
83 5. Repropagate negates, as nothing else will clean it up ATM.
85 A bit of theory on #4, since nobody seems to write anything down
86 about why it makes sense to do it the way they do it:
88 We could do this much nicer theoretically, but don't (for reasons
89 explained after how to do it theoretically nice :P).
91 In order to promote the most redundancy elimination, you want
92 binary expressions whose operands are the same rank (or
93 preferably, the same value) exposed to the redundancy eliminator,
94 for possible elimination.
96 So the way to do this if we really cared, is to build the new op
97 tree from the leaves to the roots, merging as you go, and putting the
98 new op on the end of the worklist, until you are left with one
99 thing on the worklist.
101 IE if you have to rewrite the following set of operands (listed with
102 rank in parentheses), with opcode PLUS_EXPR:
104 a (1), b (1), c (1), d (2), e (2)
107 We start with our merge worklist empty, and the ops list with all of
110 You want to first merge all leaves of the same rank, as much as
113 So first build a binary op of
115 mergetmp = a + b, and put "mergetmp" on the merge worklist.
117 Because there is no three operand form of PLUS_EXPR, c is not going to
118 be exposed to redundancy elimination as a rank 1 operand.
120 So you might as well throw it on the merge worklist (you could also
121 consider it to now be a rank two operand, and merge it with d and e,
122 but in this case, you then have evicted e from a binary op. So at
123 least in this situation, you can't win.)
125 Then build a binary op of d + e
128 and put mergetmp2 on the merge worklist.
130 so merge worklist = {mergetmp, c, mergetmp2}
132 Continue building binary ops of these operations until you have only
133 one operation left on the worklist.
138 mergetmp3 = mergetmp + c
140 worklist = {mergetmp2, mergetmp3}
142 mergetmp4 = mergetmp2 + mergetmp3
144 worklist = {mergetmp4}
146 because we have one operation left, we can now just set the original
147 statement equal to the result of that operation.
149 This will at least expose a + b and d + e to redundancy elimination
150 as binary operations.
152 For extra points, you can reuse the old statements to build the
153 mergetmps, since you shouldn't run out.
155 So why don't we do this?
157 Because it's expensive, and rarely will help. Most trees we are
158 reassociating have 3 or less ops. If they have 2 ops, they already
159 will be written into a nice single binary op. If you have 3 ops, a
160 single simple check suffices to tell you whether the first two are of the
161 same rank. If so, you know to order it
164 newstmt = mergetmp + op3
168 newstmt = mergetmp + op1
170 If all three are of the same rank, you can't expose them all in a
171 single binary operator anyway, so the above is *still* the best you
174 Thus, this is what we do. When we have three ops left, we check to see
175 what order to put them in, and call it a day. As a nod to vector sum
176 reduction, we check if any of the ops are really a phi node that is a
177 destructive update for the associating op, and keep the destructive
178 update together for vector sum reduction recognition. */
180 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
181 point 3a in the pass header comment. */
182 static bool reassoc_insert_powi_p
;
184 /* Enable biasing ranks of loop accumulators. We don't want this before
185 vectorization, since it interferes with reduction chains. */
186 static bool reassoc_bias_loop_carried_phi_ranks_p
;
192 int constants_eliminated
;
195 int pows_encountered
;
200 static object_allocator
<operand_entry
> operand_entry_pool
201 ("operand entry pool");
203 /* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205 static unsigned int next_operand_entry_id
;
207 /* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
210 static int64_t *bb_rank
;
212 /* Operand->rank hashtable. */
213 static hash_map
<tree
, int64_t> *operand_rank
;
215 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
217 static auto_bitmap biased_names
;
219 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
220 all basic blocks the CFG should be adjusted - basic blocks
221 split right after that SSA_NAME's definition statement and before
222 the only use, which must be a bit ior. */
223 static vec
<tree
> reassoc_branch_fixups
;
226 static int64_t get_rank (tree
);
227 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
229 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
230 possibly added by gsi_remove. */
233 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
235 gimple
*stmt
= gsi_stmt (*gsi
);
237 if (!MAY_HAVE_DEBUG_BIND_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
238 return gsi_remove (gsi
, true);
240 gimple_stmt_iterator prev
= *gsi
;
242 unsigned uid
= gimple_uid (stmt
);
243 basic_block bb
= gimple_bb (stmt
);
244 bool ret
= gsi_remove (gsi
, true);
245 if (!gsi_end_p (prev
))
248 prev
= gsi_start_bb (bb
);
249 gimple
*end_stmt
= gsi_stmt (*gsi
);
250 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
252 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
253 gimple_set_uid (stmt
, uid
);
259 /* Bias amount for loop-carried phis. We want this to be larger than
260 the depth of any reassociation tree we can see, but not larger than
261 the rank difference between two blocks. */
262 #define PHI_LOOP_BIAS (1 << 15)
264 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
265 operands to the STMT's left-hand side. The goal is to preserve bias in code
275 That is, we need to preserve bias along single-use chains originating from
276 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
277 uses, because only they participate in rank propagation. */
279 propagate_bias_p (gimple
*stmt
)
282 imm_use_iterator use_iter
;
283 gimple
*single_use_stmt
= NULL
;
285 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_reference
)
288 FOR_EACH_IMM_USE_FAST (use
, use_iter
, gimple_assign_lhs (stmt
))
290 gimple
*current_use_stmt
= USE_STMT (use
);
292 if (is_gimple_assign (current_use_stmt
)
293 && TREE_CODE (gimple_assign_lhs (current_use_stmt
)) == SSA_NAME
)
295 if (single_use_stmt
!= NULL
&& single_use_stmt
!= current_use_stmt
)
297 single_use_stmt
= current_use_stmt
;
301 if (single_use_stmt
== NULL
)
304 if (gimple_bb (stmt
)->loop_father
305 != gimple_bb (single_use_stmt
)->loop_father
)
311 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
312 an innermost loop, and the phi has only a single use which is inside
313 the loop, then the rank is the block rank of the loop latch plus an
314 extra bias for the loop-carried dependence. This causes expressions
315 calculated into an accumulator variable to be independent for each
316 iteration of the loop. If STMT is some other phi, the rank is the
317 block rank of its containing block. */
319 phi_rank (gimple
*stmt
)
321 basic_block bb
= gimple_bb (stmt
);
322 class loop
*father
= bb
->loop_father
;
328 if (!reassoc_bias_loop_carried_phi_ranks_p
)
329 return bb_rank
[bb
->index
];
331 /* We only care about real loops (those with a latch). */
333 return bb_rank
[bb
->index
];
335 /* Interesting phis must be in headers of innermost loops. */
336 if (bb
!= father
->header
338 return bb_rank
[bb
->index
];
340 /* Ignore virtual SSA_NAMEs. */
341 res
= gimple_phi_result (stmt
);
342 if (virtual_operand_p (res
))
343 return bb_rank
[bb
->index
];
345 /* The phi definition must have a single use, and that use must be
346 within the loop. Otherwise this isn't an accumulator pattern. */
347 if (!single_imm_use (res
, &use
, &use_stmt
)
348 || gimple_bb (use_stmt
)->loop_father
!= father
)
349 return bb_rank
[bb
->index
];
351 /* Look for phi arguments from within the loop. If found, bias this phi. */
352 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
354 tree arg
= gimple_phi_arg_def (stmt
, i
);
355 if (TREE_CODE (arg
) == SSA_NAME
356 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
358 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
359 if (gimple_bb (def_stmt
)->loop_father
== father
)
360 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
364 /* Must be an uninteresting phi. */
365 return bb_rank
[bb
->index
];
368 /* Return the maximum of RANK and the rank that should be propagated
369 from expression OP. For most operands, this is just the rank of OP.
370 For loop-carried phis, the value is zero to avoid undoing the bias
371 in favor of the phi. */
373 propagate_rank (int64_t rank
, tree op
, bool *maybe_biased_p
)
377 op_rank
= get_rank (op
);
379 /* Check whether op is biased after the get_rank () call, since it might have
380 updated biased_names. */
381 if (TREE_CODE (op
) == SSA_NAME
382 && bitmap_bit_p (biased_names
, SSA_NAME_VERSION (op
)))
384 if (maybe_biased_p
== NULL
)
386 *maybe_biased_p
= true;
389 return MAX (rank
, op_rank
);
392 /* Look up the operand rank structure for expression E. */
394 static inline int64_t
395 find_operand_rank (tree e
)
397 int64_t *slot
= operand_rank
->get (e
);
398 return slot
? *slot
: -1;
401 /* Insert {E,RANK} into the operand rank hashtable. */
404 insert_operand_rank (tree e
, int64_t rank
)
406 gcc_assert (rank
> 0);
407 gcc_assert (!operand_rank
->put (e
, rank
));
410 /* Given an expression E, return the rank of the expression. */
415 /* SSA_NAME's have the rank of the expression they are the result
417 For globals and uninitialized values, the rank is 0.
418 For function arguments, use the pre-setup rank.
419 For PHI nodes, stores, asm statements, etc, we use the rank of
421 For simple operations, the rank is the maximum rank of any of
422 its operands, or the bb_rank, whichever is less.
423 I make no claims that this is optimal, however, it gives good
426 /* We make an exception to the normal ranking system to break
427 dependences of accumulator variables in loops. Suppose we
428 have a simple one-block loop containing:
435 As shown, each iteration of the calculation into x is fully
436 dependent upon the iteration before it. We would prefer to
437 see this in the form:
444 If the loop is unrolled, the calculations of b and c from
445 different iterations can be interleaved.
447 To obtain this result during reassociation, we bias the rank
448 of the phi definition x_1 upward, when it is recognized as an
449 accumulator pattern. The artificial rank causes it to be
450 added last, providing the desired independence. */
452 if (TREE_CODE (e
) == SSA_NAME
)
459 /* If we already have a rank for this expression, use that. */
460 rank
= find_operand_rank (e
);
464 stmt
= SSA_NAME_DEF_STMT (e
);
465 if (gimple_code (stmt
) == GIMPLE_PHI
)
467 rank
= phi_rank (stmt
);
468 if (rank
!= bb_rank
[gimple_bb (stmt
)->index
])
469 bitmap_set_bit (biased_names
, SSA_NAME_VERSION (e
));
472 else if (!is_gimple_assign (stmt
))
473 rank
= bb_rank
[gimple_bb (stmt
)->index
];
477 bool biased_p
= false;
478 bool *maybe_biased_p
= propagate_bias_p (stmt
) ? &biased_p
: NULL
;
480 /* Otherwise, find the maximum rank for the operands. As an
481 exception, remove the bias from loop-carried phis when propagating
482 the rank so that dependent operations are not also biased. */
483 /* Simply walk over all SSA uses - this takes advatage of the
484 fact that non-SSA operands are is_gimple_min_invariant and
487 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
488 rank
= propagate_rank (rank
, op
, maybe_biased_p
);
492 bitmap_set_bit (biased_names
, SSA_NAME_VERSION (e
));
495 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
497 fprintf (dump_file
, "Rank for ");
498 print_generic_expr (dump_file
, e
);
499 fprintf (dump_file
, " is %" PRId64
"\n", rank
);
502 /* Note the rank in the hashtable so we don't recompute it. */
503 insert_operand_rank (e
, rank
);
507 /* Constants, globals, etc., are rank 0 */
512 /* We want integer ones to end up last no matter what, since they are
513 the ones we can do the most with. */
514 #define INTEGER_CONST_TYPE 1 << 4
515 #define FLOAT_ONE_CONST_TYPE 1 << 3
516 #define FLOAT_CONST_TYPE 1 << 2
517 #define OTHER_CONST_TYPE 1 << 1
519 /* Classify an invariant tree into integer, float, or other, so that
520 we can sort them to be near other constants of the same type. */
522 constant_type (tree t
)
524 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
525 return INTEGER_CONST_TYPE
;
526 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
528 /* Sort -1.0 and 1.0 constants last, while in some cases
529 const_binop can't optimize some inexact operations, multiplication
530 by -1.0 or 1.0 can be always merged with others. */
531 if (real_onep (t
) || real_minus_onep (t
))
532 return FLOAT_ONE_CONST_TYPE
;
533 return FLOAT_CONST_TYPE
;
536 return OTHER_CONST_TYPE
;
539 /* qsort comparison function to sort operand entries PA and PB by rank
540 so that the sorted array is ordered by rank in decreasing order. */
542 sort_by_operand_rank (const void *pa
, const void *pb
)
544 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
545 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
547 if (oeb
->rank
!= oea
->rank
)
548 return oeb
->rank
> oea
->rank
? 1 : -1;
550 /* It's nicer for optimize_expression if constants that are likely
551 to fold when added/multiplied/whatever are put next to each
552 other. Since all constants have rank 0, order them by type. */
555 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
556 return constant_type (oea
->op
) - constant_type (oeb
->op
);
558 /* To make sorting result stable, we use unique IDs to determine
560 return oeb
->id
> oea
->id
? 1 : -1;
563 if (TREE_CODE (oea
->op
) != SSA_NAME
)
565 if (TREE_CODE (oeb
->op
) != SSA_NAME
)
566 return oeb
->id
> oea
->id
? 1 : -1;
570 else if (TREE_CODE (oeb
->op
) != SSA_NAME
)
573 /* Lastly, make sure the versions that are the same go next to each
575 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
577 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
578 versions of removed SSA_NAMEs, so if possible, prefer to sort
579 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
581 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
582 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
583 basic_block bba
= gimple_bb (stmta
);
584 basic_block bbb
= gimple_bb (stmtb
);
587 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
588 but the other might not. */
593 /* If neither is, compare bb_rank. */
594 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
595 return (bb_rank
[bbb
->index
] >> 16) - (bb_rank
[bba
->index
] >> 16);
598 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
599 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
603 return SSA_NAME_VERSION (oeb
->op
) > SSA_NAME_VERSION (oea
->op
) ? 1 : -1;
606 return oeb
->id
> oea
->id
? 1 : -1;
609 /* Add an operand entry to *OPS for the tree operand OP. */
612 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
, gimple
*stmt_to_insert
= NULL
)
614 operand_entry
*oe
= operand_entry_pool
.allocate ();
617 oe
->rank
= get_rank (op
);
618 oe
->id
= next_operand_entry_id
++;
620 oe
->stmt_to_insert
= stmt_to_insert
;
624 /* Add an operand entry to *OPS for the tree operand OP with repeat
628 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
629 HOST_WIDE_INT repeat
)
631 operand_entry
*oe
= operand_entry_pool
.allocate ();
634 oe
->rank
= get_rank (op
);
635 oe
->id
= next_operand_entry_id
++;
637 oe
->stmt_to_insert
= NULL
;
640 reassociate_stats
.pows_encountered
++;
643 /* Returns true if we can associate the SSA def OP. */
646 can_reassociate_op_p (tree op
)
648 if (TREE_CODE (op
) == SSA_NAME
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op
))
650 /* Make sure asm goto outputs do not participate in reassociation since
651 we have no way to find an insertion place after asm goto. */
652 if (TREE_CODE (op
) == SSA_NAME
653 && gimple_code (SSA_NAME_DEF_STMT (op
)) == GIMPLE_ASM
654 && gimple_asm_nlabels (as_a
<gasm
*> (SSA_NAME_DEF_STMT (op
))) != 0)
659 /* Returns true if we can reassociate operations of TYPE.
660 That is for integral or non-saturating fixed-point types, and for
661 floating point type when associative-math is enabled. */
664 can_reassociate_type_p (tree type
)
666 if ((ANY_INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
667 || NON_SAT_FIXED_POINT_TYPE_P (type
)
668 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
673 /* Return true if STMT is reassociable operation containing a binary
674 operation with tree code CODE, and is inside LOOP. */
677 is_reassociable_op (gimple
*stmt
, enum tree_code code
, class loop
*loop
)
679 basic_block bb
= gimple_bb (stmt
);
681 if (gimple_bb (stmt
) == NULL
)
684 if (!flow_bb_inside_loop_p (loop
, bb
))
687 if (is_gimple_assign (stmt
)
688 && gimple_assign_rhs_code (stmt
) == code
689 && has_single_use (gimple_assign_lhs (stmt
)))
691 tree rhs1
= gimple_assign_rhs1 (stmt
);
692 tree rhs2
= gimple_assign_rhs2 (stmt
);
693 if (!can_reassociate_op_p (rhs1
)
694 || (rhs2
&& !can_reassociate_op_p (rhs2
)))
703 /* Return true if STMT is a nop-conversion. */
706 gimple_nop_conversion_p (gimple
*stmt
)
708 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
710 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
711 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
712 TREE_TYPE (gimple_assign_rhs1 (ass
))))
718 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
719 operand of the negate operation. Otherwise, return NULL. */
722 get_unary_op (tree name
, enum tree_code opcode
)
724 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
726 /* Look through nop conversions (sign changes). */
727 if (gimple_nop_conversion_p (stmt
)
728 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
729 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
731 if (!is_gimple_assign (stmt
))
734 if (gimple_assign_rhs_code (stmt
) == opcode
)
735 return gimple_assign_rhs1 (stmt
);
739 /* Return true if OP1 and OP2 have the same value if casted to either type. */
742 ops_equal_values_p (tree op1
, tree op2
)
748 if (TREE_CODE (op1
) == SSA_NAME
)
750 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
751 if (gimple_nop_conversion_p (stmt
))
753 op1
= gimple_assign_rhs1 (stmt
);
759 if (TREE_CODE (op2
) == SSA_NAME
)
761 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
762 if (gimple_nop_conversion_p (stmt
))
764 op2
= gimple_assign_rhs1 (stmt
);
775 /* If CURR and LAST are a pair of ops that OPCODE allows us to
776 eliminate through equivalences, do so, remove them from OPS, and
777 return true. Otherwise, return false. */
780 eliminate_duplicate_pair (enum tree_code opcode
,
781 vec
<operand_entry
*> *ops
,
788 /* If we have two of the same op, and the opcode is & |, min, or max,
789 we can eliminate one of them.
790 If we have two of the same op, and the opcode is ^, we can
791 eliminate both of them. */
793 if (last
&& last
->op
== curr
->op
)
801 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
803 fprintf (dump_file
, "Equivalence: ");
804 print_generic_expr (dump_file
, curr
->op
);
805 fprintf (dump_file
, " [&|minmax] ");
806 print_generic_expr (dump_file
, last
->op
);
807 fprintf (dump_file
, " -> ");
808 print_generic_stmt (dump_file
, last
->op
);
811 ops
->ordered_remove (i
);
812 reassociate_stats
.ops_eliminated
++;
817 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
819 fprintf (dump_file
, "Equivalence: ");
820 print_generic_expr (dump_file
, curr
->op
);
821 fprintf (dump_file
, " ^ ");
822 print_generic_expr (dump_file
, last
->op
);
823 fprintf (dump_file
, " -> nothing\n");
826 reassociate_stats
.ops_eliminated
+= 2;
828 if (ops
->length () == 2)
831 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
836 ops
->ordered_remove (i
-1);
837 ops
->ordered_remove (i
-1);
849 static vec
<tree
> plus_negates
;
851 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
852 expression, look in OPS for a corresponding positive operation to cancel
853 it out. If we find one, remove the other from OPS, replace
854 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
858 eliminate_plus_minus_pair (enum tree_code opcode
,
859 vec
<operand_entry
*> *ops
,
860 unsigned int currindex
,
868 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
871 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
872 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
873 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
876 /* Any non-negated version will have a rank that is one less than
877 the current rank. So once we hit those ranks, if we don't find
880 for (i
= currindex
+ 1;
881 ops
->iterate (i
, &oe
)
882 && oe
->rank
>= curr
->rank
- 1 ;
886 && ops_equal_values_p (oe
->op
, negateop
))
888 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
890 fprintf (dump_file
, "Equivalence: ");
891 print_generic_expr (dump_file
, negateop
);
892 fprintf (dump_file
, " + -");
893 print_generic_expr (dump_file
, oe
->op
);
894 fprintf (dump_file
, " -> 0\n");
897 ops
->ordered_remove (i
);
898 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
899 ops
->ordered_remove (currindex
);
900 reassociate_stats
.ops_eliminated
++;
905 && ops_equal_values_p (oe
->op
, notop
))
907 tree op_type
= TREE_TYPE (oe
->op
);
909 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
911 fprintf (dump_file
, "Equivalence: ");
912 print_generic_expr (dump_file
, notop
);
913 fprintf (dump_file
, " + ~");
914 print_generic_expr (dump_file
, oe
->op
);
915 fprintf (dump_file
, " -> -1\n");
918 ops
->ordered_remove (i
);
919 add_to_ops_vec (ops
, build_all_ones_cst (op_type
));
920 ops
->ordered_remove (currindex
);
921 reassociate_stats
.ops_eliminated
++;
927 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
928 save it for later inspection in repropagate_negates(). */
929 if (negateop
!= NULL_TREE
930 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
931 plus_negates
.safe_push (curr
->op
);
936 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
937 bitwise not expression, look in OPS for a corresponding operand to
938 cancel it out. If we find one, remove the other from OPS, replace
939 OPS[CURRINDEX] with 0, and return true. Otherwise, return
943 eliminate_not_pairs (enum tree_code opcode
,
944 vec
<operand_entry
*> *ops
,
945 unsigned int currindex
,
952 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
953 || TREE_CODE (curr
->op
) != SSA_NAME
)
956 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
957 if (notop
== NULL_TREE
)
960 /* Any non-not version will have a rank that is one less than
961 the current rank. So once we hit those ranks, if we don't find
964 for (i
= currindex
+ 1;
965 ops
->iterate (i
, &oe
)
966 && oe
->rank
>= curr
->rank
- 1;
971 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
973 fprintf (dump_file
, "Equivalence: ");
974 print_generic_expr (dump_file
, notop
);
975 if (opcode
== BIT_AND_EXPR
)
976 fprintf (dump_file
, " & ~");
977 else if (opcode
== BIT_IOR_EXPR
)
978 fprintf (dump_file
, " | ~");
979 print_generic_expr (dump_file
, oe
->op
);
980 if (opcode
== BIT_AND_EXPR
)
981 fprintf (dump_file
, " -> 0\n");
982 else if (opcode
== BIT_IOR_EXPR
)
983 fprintf (dump_file
, " -> -1\n");
986 if (opcode
== BIT_AND_EXPR
)
987 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
988 else if (opcode
== BIT_IOR_EXPR
)
989 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
991 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
993 ops
->quick_push (oe
);
1001 /* Use constant value that may be present in OPS to try to eliminate
1002 operands. Note that this function is only really used when we've
1003 eliminated ops for other reasons, or merged constants. Across
1004 single statements, fold already does all of this, plus more. There
1005 is little point in duplicating logic, so I've only included the
1006 identities that I could ever construct testcases to trigger. */
1009 eliminate_using_constants (enum tree_code opcode
,
1010 vec
<operand_entry
*> *ops
)
1012 operand_entry
*oelast
= ops
->last ();
1013 tree type
= TREE_TYPE (oelast
->op
);
1015 if (oelast
->rank
== 0
1016 && (ANY_INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
1021 if (integer_zerop (oelast
->op
))
1023 if (ops
->length () != 1)
1025 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1026 fprintf (dump_file
, "Found & 0, removing all other ops\n");
1028 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1031 ops
->quick_push (oelast
);
1035 else if (integer_all_onesp (oelast
->op
))
1037 if (ops
->length () != 1)
1039 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1040 fprintf (dump_file
, "Found & -1, removing\n");
1042 reassociate_stats
.ops_eliminated
++;
1047 if (integer_all_onesp (oelast
->op
))
1049 if (ops
->length () != 1)
1051 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1052 fprintf (dump_file
, "Found | -1, removing all other ops\n");
1054 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1057 ops
->quick_push (oelast
);
1061 else if (integer_zerop (oelast
->op
))
1063 if (ops
->length () != 1)
1065 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1066 fprintf (dump_file
, "Found | 0, removing\n");
1068 reassociate_stats
.ops_eliminated
++;
1073 if (integer_zerop (oelast
->op
)
1074 || (FLOAT_TYPE_P (type
)
1075 && !HONOR_NANS (type
)
1076 && !HONOR_SIGNED_ZEROS (type
)
1077 && real_zerop (oelast
->op
)))
1079 if (ops
->length () != 1)
1081 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1082 fprintf (dump_file
, "Found * 0, removing all other ops\n");
1084 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1086 ops
->quick_push (oelast
);
1090 else if (integer_onep (oelast
->op
)
1091 || (FLOAT_TYPE_P (type
)
1092 && !HONOR_SNANS (type
)
1093 && real_onep (oelast
->op
)))
1095 if (ops
->length () != 1)
1097 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1098 fprintf (dump_file
, "Found * 1, removing\n");
1100 reassociate_stats
.ops_eliminated
++;
1108 if (integer_zerop (oelast
->op
)
1109 || (FLOAT_TYPE_P (type
)
1110 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1111 && fold_real_zero_addition_p (type
, 0, oelast
->op
,
1112 opcode
== MINUS_EXPR
)))
1114 if (ops
->length () != 1)
1116 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1117 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1119 reassociate_stats
.ops_eliminated
++;
1131 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1134 /* Structure for tracking and counting operands. */
1138 enum tree_code oecode
;
1143 /* The heap for the oecount hashtable and the sorted list of operands. */
1144 static vec
<oecount
> cvec
;
1147 /* Oecount hashtable helpers. */
1149 struct oecount_hasher
: int_hash
<int, 0, 1>
1151 static inline hashval_t
hash (int);
1152 static inline bool equal (int, int);
1155 /* Hash function for oecount. */
1158 oecount_hasher::hash (int p
)
1160 const oecount
*c
= &cvec
[p
- 42];
1161 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1164 /* Comparison function for oecount. */
1167 oecount_hasher::equal (int p1
, int p2
)
1169 const oecount
*c1
= &cvec
[p1
- 42];
1170 const oecount
*c2
= &cvec
[p2
- 42];
1171 return c1
->oecode
== c2
->oecode
&& c1
->op
== c2
->op
;
1174 /* Comparison function for qsort sorting oecount elements by count. */
1177 oecount_cmp (const void *p1
, const void *p2
)
1179 const oecount
*c1
= (const oecount
*)p1
;
1180 const oecount
*c2
= (const oecount
*)p2
;
1181 if (c1
->cnt
!= c2
->cnt
)
1182 return c1
->cnt
> c2
->cnt
? 1 : -1;
1184 /* If counts are identical, use unique IDs to stabilize qsort. */
1185 return c1
->id
> c2
->id
? 1 : -1;
1188 /* Return TRUE iff STMT represents a builtin call that raises OP
1189 to some exponent. */
1192 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1194 if (!is_gimple_call (stmt
))
1197 switch (gimple_call_combined_fn (stmt
))
1201 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1208 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1209 in place and return the result. Assumes that stmt_is_power_of_op
1210 was previously called for STMT and returned TRUE. */
1212 static HOST_WIDE_INT
1213 decrement_power (gimple
*stmt
)
1215 REAL_VALUE_TYPE c
, cint
;
1216 HOST_WIDE_INT power
;
1219 switch (gimple_call_combined_fn (stmt
))
1222 arg1
= gimple_call_arg (stmt
, 1);
1223 c
= TREE_REAL_CST (arg1
);
1224 power
= real_to_integer (&c
) - 1;
1225 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1226 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1230 arg1
= gimple_call_arg (stmt
, 1);
1231 power
= TREE_INT_CST_LOW (arg1
) - 1;
1232 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1240 /* Replace SSA defined by STMT and replace all its uses with new
1241 SSA. Also return the new SSA. */
1244 make_new_ssa_for_def (gimple
*stmt
, enum tree_code opcode
, tree op
)
1248 imm_use_iterator iter
;
1249 tree new_lhs
, new_debug_lhs
= NULL_TREE
;
1250 tree lhs
= gimple_get_lhs (stmt
);
1252 new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
1253 gimple_set_lhs (stmt
, new_lhs
);
1255 /* Also need to update GIMPLE_DEBUGs. */
1256 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
1258 tree repl
= new_lhs
;
1259 if (is_gimple_debug (use_stmt
))
1261 if (new_debug_lhs
== NULL_TREE
)
1263 new_debug_lhs
= build_debug_expr_decl (TREE_TYPE (lhs
));
1265 = gimple_build_debug_bind (new_debug_lhs
,
1266 build2 (opcode
, TREE_TYPE (lhs
),
1269 gimple_set_uid (def_temp
, gimple_uid (stmt
));
1270 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1271 gsi_insert_after (&gsi
, def_temp
, GSI_SAME_STMT
);
1273 repl
= new_debug_lhs
;
1275 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1276 SET_USE (use
, repl
);
1277 update_stmt (use_stmt
);
1282 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1283 uses with new SSAs. Also do this for the stmt that defines DEF
1284 if *DEF is not OP. */
1287 make_new_ssa_for_all_defs (tree
*def
, enum tree_code opcode
, tree op
,
1288 vec
<gimple
*> &stmts_to_fix
)
1294 && TREE_CODE (*def
) == SSA_NAME
1295 && (stmt
= SSA_NAME_DEF_STMT (*def
))
1296 && gimple_code (stmt
) != GIMPLE_NOP
)
1297 *def
= make_new_ssa_for_def (stmt
, opcode
, op
);
1299 FOR_EACH_VEC_ELT (stmts_to_fix
, i
, stmt
)
1300 make_new_ssa_for_def (stmt
, opcode
, op
);
1303 /* Find the single immediate use of STMT's LHS, and replace it
1304 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1305 replace *DEF with OP as well. */
1308 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1313 gimple_stmt_iterator gsi
;
1315 if (is_gimple_call (stmt
))
1316 lhs
= gimple_call_lhs (stmt
);
1318 lhs
= gimple_assign_lhs (stmt
);
1320 gcc_assert (has_single_use (lhs
));
1321 single_imm_use (lhs
, &use
, &use_stmt
);
1325 if (TREE_CODE (op
) != SSA_NAME
)
1326 update_stmt (use_stmt
);
1327 gsi
= gsi_for_stmt (stmt
);
1328 unlink_stmt_vdef (stmt
);
1329 reassoc_remove_stmt (&gsi
);
1330 release_defs (stmt
);
1333 /* Walks the linear chain with result *DEF searching for an operation
1334 with operand OP and code OPCODE removing that from the chain. *DEF
1335 is updated if there is only one operand but no operation left. */
1338 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1340 tree orig_def
= *def
;
1341 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1342 /* PR72835 - Record the stmt chain that has to be updated such that
1343 we dont use the same LHS when the values computed are different. */
1344 auto_vec
<gimple
*, 64> stmts_to_fix
;
1350 if (opcode
== MULT_EXPR
)
1352 if (stmt_is_power_of_op (stmt
, op
))
1354 if (decrement_power (stmt
) == 1)
1356 if (stmts_to_fix
.length () > 0)
1357 stmts_to_fix
.pop ();
1358 propagate_op_to_single_use (op
, stmt
, def
);
1362 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
)
1364 if (gimple_assign_rhs1 (stmt
) == op
)
1366 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1367 if (stmts_to_fix
.length () > 0)
1368 stmts_to_fix
.pop ();
1369 propagate_op_to_single_use (cst
, stmt
, def
);
1372 else if (integer_minus_onep (op
)
1373 || real_minus_onep (op
))
1375 gimple_assign_set_rhs_code
1376 (stmt
, TREE_CODE (gimple_assign_rhs1 (stmt
)));
1382 name
= gimple_assign_rhs1 (stmt
);
1384 /* If this is the operation we look for and one of the operands
1385 is ours simply propagate the other operand into the stmts
1387 if (gimple_assign_rhs_code (stmt
) == opcode
1389 || gimple_assign_rhs2 (stmt
) == op
))
1392 name
= gimple_assign_rhs2 (stmt
);
1393 if (stmts_to_fix
.length () > 0)
1394 stmts_to_fix
.pop ();
1395 propagate_op_to_single_use (name
, stmt
, def
);
1399 /* We might have a multiply of two __builtin_pow* calls, and
1400 the operand might be hiding in the rightmost one. Likewise
1401 this can happen for a negate. */
1402 if (opcode
== MULT_EXPR
1403 && gimple_assign_rhs_code (stmt
) == opcode
1404 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1405 && has_single_use (gimple_assign_rhs2 (stmt
)))
1407 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1408 if (stmt_is_power_of_op (stmt2
, op
))
1410 if (decrement_power (stmt2
) == 1)
1411 propagate_op_to_single_use (op
, stmt2
, def
);
1413 stmts_to_fix
.safe_push (stmt2
);
1416 else if (is_gimple_assign (stmt2
)
1417 && gimple_assign_rhs_code (stmt2
) == NEGATE_EXPR
)
1419 if (gimple_assign_rhs1 (stmt2
) == op
)
1421 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1422 propagate_op_to_single_use (cst
, stmt2
, def
);
1425 else if (integer_minus_onep (op
)
1426 || real_minus_onep (op
))
1428 stmts_to_fix
.safe_push (stmt2
);
1429 gimple_assign_set_rhs_code
1430 (stmt2
, TREE_CODE (gimple_assign_rhs1 (stmt2
)));
1436 /* Continue walking the chain. */
1437 gcc_assert (name
!= op
1438 && TREE_CODE (name
) == SSA_NAME
);
1439 stmt
= SSA_NAME_DEF_STMT (name
);
1440 stmts_to_fix
.safe_push (stmt
);
1444 if (stmts_to_fix
.length () > 0 || *def
== orig_def
)
1445 make_new_ssa_for_all_defs (def
, opcode
, op
, stmts_to_fix
);
1448 /* Returns true if statement S1 dominates statement S2. Like
1449 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1452 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1454 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1456 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1457 SSA_NAME. Assume it lives at the beginning of function and
1458 thus dominates everything. */
1459 if (!bb1
|| s1
== s2
)
1462 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1468 /* PHIs in the same basic block are assumed to be
1469 executed all in parallel, if only one stmt is a PHI,
1470 it dominates the other stmt in the same basic block. */
1471 if (gimple_code (s1
) == GIMPLE_PHI
)
1474 if (gimple_code (s2
) == GIMPLE_PHI
)
1477 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1479 if (gimple_uid (s1
) < gimple_uid (s2
))
1482 if (gimple_uid (s1
) > gimple_uid (s2
))
1485 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1486 unsigned int uid
= gimple_uid (s1
);
1487 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1489 gimple
*s
= gsi_stmt (gsi
);
1490 if (gimple_uid (s
) != uid
)
1499 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1502 /* Insert STMT after INSERT_POINT. */
1505 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1507 gimple_stmt_iterator gsi
;
1510 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1511 bb
= gimple_bb (insert_point
);
1512 else if (!stmt_ends_bb_p (insert_point
))
1514 gsi
= gsi_for_stmt (insert_point
);
1515 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1516 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1519 else if (gimple_code (insert_point
) == GIMPLE_ASM
1520 && gimple_asm_nlabels (as_a
<gasm
*> (insert_point
)) != 0)
1521 /* We have no idea where to insert - it depends on where the
1522 uses will be placed. */
1525 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1526 thus if it must end a basic block, it should be a call that can
1527 throw, or some assignment that can throw. If it throws, the LHS
1528 of it will not be initialized though, so only valid places using
1529 the SSA_NAME should be dominated by the fallthru edge. */
1530 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1531 gsi
= gsi_after_labels (bb
);
1532 if (gsi_end_p (gsi
))
1534 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1535 gimple_set_uid (stmt
,
1536 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1539 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1540 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1543 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1544 the result. Places the statement after the definition of either
1545 OP1 or OP2. Returns the new statement. */
1548 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1550 gimple
*op1def
= NULL
, *op2def
= NULL
;
1551 gimple_stmt_iterator gsi
;
1555 /* Create the addition statement. */
1556 op
= make_ssa_name (type
);
1557 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1559 /* Find an insertion place and insert. */
1560 if (TREE_CODE (op1
) == SSA_NAME
)
1561 op1def
= SSA_NAME_DEF_STMT (op1
);
1562 if (TREE_CODE (op2
) == SSA_NAME
)
1563 op2def
= SSA_NAME_DEF_STMT (op2
);
1564 if ((!op1def
|| gimple_nop_p (op1def
))
1565 && (!op2def
|| gimple_nop_p (op2def
)))
1567 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1568 if (!gsi_end_p (gsi
)
1569 && is_gimple_call (gsi_stmt (gsi
))
1570 && (gimple_call_flags (gsi_stmt (gsi
)) & ECF_RETURNS_TWICE
))
1572 /* Don't add statements before a returns_twice call at the start
1574 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1575 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1577 if (gsi_end_p (gsi
))
1579 gimple_stmt_iterator gsi2
1580 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1581 gimple_set_uid (sum
,
1582 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1585 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1586 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1590 gimple
*insert_point
;
1591 if ((!op1def
|| gimple_nop_p (op1def
))
1592 || (op2def
&& !gimple_nop_p (op2def
)
1593 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1594 insert_point
= op2def
;
1596 insert_point
= op1def
;
1597 insert_stmt_after (sum
, insert_point
);
1604 /* Perform un-distribution of divisions and multiplications.
1605 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1606 to (A + B) / X for real X.
1608 The algorithm is organized as follows.
1610 - First we walk the addition chain *OPS looking for summands that
1611 are defined by a multiplication or a real division. This results
1612 in the candidates bitmap with relevant indices into *OPS.
1614 - Second we build the chains of multiplications or divisions for
1615 these candidates, counting the number of occurrences of (operand, code)
1616 pairs in all of the candidates chains.
1618 - Third we sort the (operand, code) pairs by number of occurrence and
1619 process them starting with the pair with the most uses.
1621 * For each such pair we walk the candidates again to build a
1622 second candidate bitmap noting all multiplication/division chains
1623 that have at least one occurrence of (operand, code).
1625 * We build an alternate addition chain only covering these
1626 candidates with one (operand, code) operation removed from their
1627 multiplication/division chain.
1629 * The first candidate gets replaced by the alternate addition chain
1630 multiplied/divided by the operand.
1632 * All candidate chains get disabled for further processing and
1633 processing of (operand, code) pairs continues.
1635 The alternate addition chains built are re-processed by the main
1636 reassociation algorithm which allows optimizing a * x * y + b * y * x
1637 to (a + b ) * x * y in one invocation of the reassociation pass. */
1640 undistribute_ops_list (enum tree_code opcode
,
1641 vec
<operand_entry
*> *ops
, class loop
*loop
)
1643 unsigned int length
= ops
->length ();
1646 unsigned nr_candidates
, nr_candidates2
;
1647 sbitmap_iterator sbi0
;
1648 vec
<operand_entry
*> *subops
;
1649 bool changed
= false;
1650 unsigned int next_oecount_id
= 0;
1653 || opcode
!= PLUS_EXPR
)
1656 /* Build a list of candidates to process. */
1657 auto_sbitmap
candidates (length
);
1658 bitmap_clear (candidates
);
1660 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1662 enum tree_code dcode
;
1665 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1667 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1668 if (!is_gimple_assign (oe1def
))
1670 dcode
= gimple_assign_rhs_code (oe1def
);
1671 if ((dcode
!= MULT_EXPR
1672 && dcode
!= RDIV_EXPR
)
1673 || !is_reassociable_op (oe1def
, dcode
, loop
))
1676 bitmap_set_bit (candidates
, i
);
1680 if (nr_candidates
< 2)
1683 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1685 fprintf (dump_file
, "searching for un-distribute opportunities ");
1686 print_generic_expr (dump_file
,
1687 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, TDF_NONE
);
1688 fprintf (dump_file
, " %d\n", nr_candidates
);
1691 /* Build linearized sub-operand lists and the counting table. */
1694 hash_table
<oecount_hasher
> ctable (15);
1696 /* ??? Macro arguments cannot have multi-argument template types in
1697 them. This typedef is needed to workaround that limitation. */
1698 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1699 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1700 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1703 enum tree_code oecode
;
1706 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1707 oecode
= gimple_assign_rhs_code (oedef
);
1708 linearize_expr_tree (&subops
[i
], oedef
,
1709 associative_tree_code (oecode
), false);
1711 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1718 c
.id
= next_oecount_id
++;
1721 idx
= cvec
.length () + 41;
1722 slot
= ctable
.find_slot (idx
, INSERT
);
1730 cvec
[*slot
- 42].cnt
++;
1735 /* Sort the counting table. */
1736 cvec
.qsort (oecount_cmp
);
1738 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1741 fprintf (dump_file
, "Candidates:\n");
1742 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1744 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1745 c
->oecode
== MULT_EXPR
1746 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1747 print_generic_expr (dump_file
, c
->op
);
1748 fprintf (dump_file
, "\n");
1752 /* Process the (operand, code) pairs in order of most occurrence. */
1753 auto_sbitmap
candidates2 (length
);
1754 while (!cvec
.is_empty ())
1756 oecount
*c
= &cvec
.last ();
1760 /* Now collect the operands in the outer chain that contain
1761 the common operand in their inner chain. */
1762 bitmap_clear (candidates2
);
1764 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1767 enum tree_code oecode
;
1769 tree op
= (*ops
)[i
]->op
;
1771 /* If we undistributed in this chain already this may be
1773 if (TREE_CODE (op
) != SSA_NAME
)
1776 oedef
= SSA_NAME_DEF_STMT (op
);
1777 oecode
= gimple_assign_rhs_code (oedef
);
1778 if (oecode
!= c
->oecode
)
1781 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1783 if (oe1
->op
== c
->op
)
1785 bitmap_set_bit (candidates2
, i
);
1792 if (nr_candidates2
>= 2)
1794 operand_entry
*oe1
, *oe2
;
1796 int first
= bitmap_first_set_bit (candidates2
);
1798 /* Build the new addition chain. */
1799 oe1
= (*ops
)[first
];
1800 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1802 fprintf (dump_file
, "Building (");
1803 print_generic_expr (dump_file
, oe1
->op
);
1805 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1806 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1810 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1812 fprintf (dump_file
, " + ");
1813 print_generic_expr (dump_file
, oe2
->op
);
1815 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1816 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1817 oe1
->op
, oe2
->op
, opcode
);
1818 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1820 oe1
->op
= gimple_get_lhs (sum
);
1823 /* Apply the multiplication/division. */
1824 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1825 oe1
->op
, c
->op
, c
->oecode
);
1826 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1828 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1829 print_generic_expr (dump_file
, c
->op
);
1830 fprintf (dump_file
, "\n");
1833 /* Record it in the addition chain and disable further
1834 undistribution with this op. */
1835 oe1
->op
= gimple_assign_lhs (prod
);
1836 oe1
->rank
= get_rank (oe1
->op
);
1837 subops
[first
].release ();
1845 for (i
= 0; i
< ops
->length (); ++i
)
1846 subops
[i
].release ();
1853 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1854 first: element index for each relevant BIT_FIELD_REF.
1855 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1856 typedef std::pair
<unsigned, unsigned> v_info_elem
;
1859 auto_vec
<v_info_elem
, 32> vec
;
1861 typedef v_info
*v_info_ptr
;
1863 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1865 sort_by_mach_mode (const void *p_i
, const void *p_j
)
1867 const tree tr1
= *((const tree
*) p_i
);
1868 const tree tr2
= *((const tree
*) p_j
);
1869 unsigned int mode1
= TYPE_MODE (TREE_TYPE (tr1
));
1870 unsigned int mode2
= TYPE_MODE (TREE_TYPE (tr2
));
1873 else if (mode1
< mode2
)
1875 if (SSA_NAME_VERSION (tr1
) < SSA_NAME_VERSION (tr2
))
1877 else if (SSA_NAME_VERSION (tr1
) > SSA_NAME_VERSION (tr2
))
1882 /* Cleanup hash map for VECTOR information. */
1884 cleanup_vinfo_map (hash_map
<tree
, v_info_ptr
> &info_map
)
1886 for (hash_map
<tree
, v_info_ptr
>::iterator it
= info_map
.begin ();
1887 it
!= info_map
.end (); ++it
)
1889 v_info_ptr info
= (*it
).second
;
1891 (*it
).second
= NULL
;
1895 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1896 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1898 Vs = (V1 + V2 + ... + Vn)
1899 Vs[0] + Vs[1] + ... + Vs[k]
1901 The basic steps are listed below:
1903 1) Check the addition chain *OPS by looking those summands coming from
1904 VECTOR bit_field_ref on VECTOR type. Put the information into
1905 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1907 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1908 continuous, they can cover the whole VECTOR perfectly without any holes.
1909 Obtain one VECTOR list which contain candidates to be transformed.
1911 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1912 candidates with same mode, build the addition statements for them and
1913 generate BIT_FIELD_REFs accordingly.
1916 The current implementation requires the whole VECTORs should be fully
1917 covered, but it can be extended to support partial, checking adjacent
1918 but not fill the whole, it may need some cost model to define the
1919 boundary to do or not.
1922 undistribute_bitref_for_vector (enum tree_code opcode
,
1923 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1925 if (ops
->length () <= 1)
1928 if (opcode
!= PLUS_EXPR
1929 && opcode
!= MULT_EXPR
1930 && opcode
!= BIT_XOR_EXPR
1931 && opcode
!= BIT_IOR_EXPR
1932 && opcode
!= BIT_AND_EXPR
)
1935 hash_map
<tree
, v_info_ptr
> v_info_map
;
1939 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1940 information into map. */
1941 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1943 enum tree_code dcode
;
1946 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1948 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1949 if (!is_gimple_assign (oe1def
))
1951 dcode
= gimple_assign_rhs_code (oe1def
);
1952 if (dcode
!= BIT_FIELD_REF
|| !is_reassociable_op (oe1def
, dcode
, loop
))
1955 tree rhs
= gimple_assign_rhs1 (oe1def
);
1956 tree vec
= TREE_OPERAND (rhs
, 0);
1957 tree vec_type
= TREE_TYPE (vec
);
1959 if (TREE_CODE (vec
) != SSA_NAME
|| !VECTOR_TYPE_P (vec_type
))
1962 /* Ignore it if target machine can't support this VECTOR type. */
1963 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1966 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1967 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
1970 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
1971 || !is_a
<scalar_mode
> (TYPE_MODE (TREE_TYPE (rhs
))))
1974 /* The type of BIT_FIELD_REF might not be equal to the element type of
1975 the vector. We want to use a vector type with element type the
1976 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1977 if (!useless_type_conversion_p (TREE_TYPE (rhs
), TREE_TYPE (vec_type
)))
1979 machine_mode simd_mode
;
1980 unsigned HOST_WIDE_INT size
, nunits
;
1981 unsigned HOST_WIDE_INT elem_size
1982 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs
)));
1983 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type
)).is_constant (&size
))
1985 if (size
<= elem_size
|| (size
% elem_size
) != 0)
1987 nunits
= size
/ elem_size
;
1988 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs
)),
1989 nunits
).exists (&simd_mode
))
1991 vec_type
= build_vector_type_for_mode (TREE_TYPE (rhs
), simd_mode
);
1993 /* Ignore it if target machine can't support this VECTOR type. */
1994 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1997 /* Check const vector type, constrain BIT_FIELD_REF offset and
1999 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
2002 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type
)),
2003 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec
)))))
2007 tree elem_type
= TREE_TYPE (vec_type
);
2008 unsigned HOST_WIDE_INT elem_size
= tree_to_uhwi (TYPE_SIZE (elem_type
));
2009 if (maybe_ne (bit_field_size (rhs
), elem_size
))
2013 if (!constant_multiple_p (bit_field_offset (rhs
), elem_size
, &idx
))
2016 /* Ignore it if target machine can't support this type of VECTOR
2018 optab op_tab
= optab_for_tree_code (opcode
, vec_type
, optab_vector
);
2019 if (optab_handler (op_tab
, TYPE_MODE (vec_type
)) == CODE_FOR_nothing
)
2023 v_info_ptr
&info
= v_info_map
.get_or_insert (vec
, &existed
);
2027 info
->vec_type
= vec_type
;
2029 else if (!types_compatible_p (vec_type
, info
->vec_type
))
2031 info
->vec
.safe_push (std::make_pair (idx
, i
));
2034 /* At least two VECTOR to combine. */
2035 if (v_info_map
.elements () <= 1)
2037 cleanup_vinfo_map (v_info_map
);
2041 /* Verify all VECTOR candidates by checking two conditions:
2042 1) sorted offsets are adjacent, no holes.
2043 2) can fill the whole VECTOR perfectly.
2044 And add the valid candidates to a vector for further handling. */
2045 auto_vec
<tree
> valid_vecs (v_info_map
.elements ());
2046 for (hash_map
<tree
, v_info_ptr
>::iterator it
= v_info_map
.begin ();
2047 it
!= v_info_map
.end (); ++it
)
2049 tree cand_vec
= (*it
).first
;
2050 v_info_ptr cand_info
= (*it
).second
;
2051 unsigned int num_elems
2052 = TYPE_VECTOR_SUBPARTS (cand_info
->vec_type
).to_constant ();
2053 if (cand_info
->vec
.length () != num_elems
)
2055 sbitmap holes
= sbitmap_alloc (num_elems
);
2056 bitmap_ones (holes
);
2059 FOR_EACH_VEC_ELT (cand_info
->vec
, i
, curr
)
2061 if (!bitmap_bit_p (holes
, curr
->first
))
2067 bitmap_clear_bit (holes
, curr
->first
);
2069 if (valid
&& bitmap_empty_p (holes
))
2070 valid_vecs
.quick_push (cand_vec
);
2071 sbitmap_free (holes
);
2074 /* At least two VECTOR to combine. */
2075 if (valid_vecs
.length () <= 1)
2077 cleanup_vinfo_map (v_info_map
);
2081 valid_vecs
.qsort (sort_by_mach_mode
);
2082 /* Go through all candidates by machine mode order, query the mode_to_total
2083 to get the total number for each mode and skip the single one. */
2084 for (unsigned i
= 0; i
< valid_vecs
.length () - 1; ++i
)
2086 tree tvec
= valid_vecs
[i
];
2087 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (tvec
));
2089 /* Skip modes with only a single candidate. */
2090 if (TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) != mode
)
2093 unsigned int idx
, j
;
2095 tree sum_vec
= tvec
;
2096 v_info_ptr info_ptr
= *(v_info_map
.get (tvec
));
2098 tree vec_type
= info_ptr
->vec_type
;
2100 /* Build the sum for all candidates with same mode. */
2103 sum
= build_and_add_sum (vec_type
, sum_vec
,
2104 valid_vecs
[i
+ 1], opcode
);
2105 if (!useless_type_conversion_p (vec_type
,
2106 TREE_TYPE (valid_vecs
[i
+ 1])))
2108 /* Update the operands only after build_and_add_sum,
2109 so that we don't have to repeat the placement algorithm
2110 of build_and_add_sum. */
2111 gimple_stmt_iterator gsi
= gsi_for_stmt (sum
);
2112 tree vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
,
2114 tree lhs
= make_ssa_name (vec_type
);
2115 gimple
*g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2116 gimple_set_uid (g
, gimple_uid (sum
));
2117 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2118 gimple_assign_set_rhs2 (sum
, lhs
);
2119 if (sum_vec
== tvec
)
2121 vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
, sum_vec
);
2122 lhs
= make_ssa_name (vec_type
);
2123 g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2124 gimple_set_uid (g
, gimple_uid (sum
));
2125 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2126 gimple_assign_set_rhs1 (sum
, lhs
);
2130 sum_vec
= gimple_get_lhs (sum
);
2131 info_ptr
= *(v_info_map
.get (valid_vecs
[i
+ 1]));
2132 gcc_assert (types_compatible_p (vec_type
, info_ptr
->vec_type
));
2133 /* Update those related ops of current candidate VECTOR. */
2134 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2137 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2138 /* Set this then op definition will get DCEd later. */
2139 gimple_set_visited (def
, true);
2140 if (opcode
== PLUS_EXPR
2141 || opcode
== BIT_XOR_EXPR
2142 || opcode
== BIT_IOR_EXPR
)
2143 (*ops
)[idx
]->op
= build_zero_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2144 else if (opcode
== MULT_EXPR
)
2145 (*ops
)[idx
]->op
= build_one_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2148 gcc_assert (opcode
== BIT_AND_EXPR
);
2150 = build_all_ones_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2152 (*ops
)[idx
]->rank
= 0;
2154 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2156 fprintf (dump_file
, "Generating addition -> ");
2157 print_gimple_stmt (dump_file
, sum
, 0);
2161 while ((i
< valid_vecs
.length () - 1)
2162 && TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) == mode
);
2164 /* Referring to first valid VECTOR with this mode, generate the
2165 BIT_FIELD_REF statements accordingly. */
2166 info_ptr
= *(v_info_map
.get (tvec
));
2168 tree elem_type
= TREE_TYPE (vec_type
);
2169 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2172 tree dst
= make_ssa_name (elem_type
);
2173 tree pos
= bitsize_int (elem
->first
2174 * tree_to_uhwi (TYPE_SIZE (elem_type
)));
2175 tree bfr
= build3 (BIT_FIELD_REF
, elem_type
, sum_vec
,
2176 TYPE_SIZE (elem_type
), pos
);
2177 gimple
*gs
= gimple_build_assign (dst
, BIT_FIELD_REF
, bfr
);
2178 insert_stmt_after (gs
, sum
);
2179 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2180 /* Set this then op definition will get DCEd later. */
2181 gimple_set_visited (def
, true);
2182 (*ops
)[idx
]->op
= gimple_assign_lhs (gs
);
2183 (*ops
)[idx
]->rank
= get_rank ((*ops
)[idx
]->op
);
2184 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2186 fprintf (dump_file
, "Generating bit_field_ref -> ");
2187 print_gimple_stmt (dump_file
, gs
, 0);
2192 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2193 fprintf (dump_file
, "undistributiong bit_field_ref for vector done.\n");
2195 cleanup_vinfo_map (v_info_map
);
2200 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2201 expression, examine the other OPS to see if any of them are comparisons
2202 of the same values, which we may be able to combine or eliminate.
2203 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2206 eliminate_redundant_comparison (enum tree_code opcode
,
2207 vec
<operand_entry
*> *ops
,
2208 unsigned int currindex
,
2209 operand_entry
*curr
)
2212 enum tree_code lcode
, rcode
;
2213 gimple
*def1
, *def2
;
2217 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
2220 /* Check that CURR is a comparison. */
2221 if (TREE_CODE (curr
->op
) != SSA_NAME
)
2223 def1
= SSA_NAME_DEF_STMT (curr
->op
);
2224 if (!is_gimple_assign (def1
))
2226 lcode
= gimple_assign_rhs_code (def1
);
2227 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
2229 op1
= gimple_assign_rhs1 (def1
);
2230 op2
= gimple_assign_rhs2 (def1
);
2232 /* Now look for a similar comparison in the remaining OPS. */
2233 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
2237 if (TREE_CODE (oe
->op
) != SSA_NAME
)
2239 def2
= SSA_NAME_DEF_STMT (oe
->op
);
2240 if (!is_gimple_assign (def2
))
2242 rcode
= gimple_assign_rhs_code (def2
);
2243 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
2246 /* If we got here, we have a match. See if we can combine the
2248 tree type
= TREE_TYPE (gimple_assign_lhs (def1
));
2249 if (opcode
== BIT_IOR_EXPR
)
2250 t
= maybe_fold_or_comparisons (type
,
2252 rcode
, gimple_assign_rhs1 (def2
),
2253 gimple_assign_rhs2 (def2
));
2255 t
= maybe_fold_and_comparisons (type
,
2257 rcode
, gimple_assign_rhs1 (def2
),
2258 gimple_assign_rhs2 (def2
));
2262 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2263 always give us a boolean_type_node value back. If the original
2264 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2265 we need to convert. */
2266 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
2268 if (!fold_convertible_p (TREE_TYPE (curr
->op
), t
))
2270 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
2273 if (TREE_CODE (t
) != INTEGER_CST
2274 && !operand_equal_p (t
, curr
->op
, 0))
2276 enum tree_code subcode
;
2277 tree newop1
, newop2
;
2278 if (!COMPARISON_CLASS_P (t
))
2280 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2281 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2282 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2283 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
2285 if (lcode
== TREE_CODE (t
)
2286 && operand_equal_p (op1
, newop1
, 0)
2287 && operand_equal_p (op2
, newop2
, 0))
2289 else if ((TREE_CODE (newop1
) == SSA_NAME
2290 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1
))
2291 || (TREE_CODE (newop2
) == SSA_NAME
2292 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2
)))
2296 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2298 fprintf (dump_file
, "Equivalence: ");
2299 print_generic_expr (dump_file
, curr
->op
);
2300 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
2301 print_generic_expr (dump_file
, oe
->op
);
2302 fprintf (dump_file
, " -> ");
2303 print_generic_expr (dump_file
, t
);
2304 fprintf (dump_file
, "\n");
2307 /* Now we can delete oe, as it has been subsumed by the new combined
2309 ops
->ordered_remove (i
);
2310 reassociate_stats
.ops_eliminated
++;
2312 /* If t is the same as curr->op, we're done. Otherwise we must
2313 replace curr->op with t. Special case is if we got a constant
2314 back, in which case we add it to the end instead of in place of
2315 the current entry. */
2316 if (TREE_CODE (t
) == INTEGER_CST
)
2318 ops
->ordered_remove (currindex
);
2319 add_to_ops_vec (ops
, t
);
2321 else if (!operand_equal_p (t
, curr
->op
, 0))
2324 enum tree_code subcode
;
2327 gcc_assert (COMPARISON_CLASS_P (t
));
2328 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2329 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2330 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2331 gcc_checking_assert (is_gimple_val (newop1
)
2332 && is_gimple_val (newop2
));
2333 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
2334 curr
->op
= gimple_get_lhs (sum
);
2343 /* Transform repeated addition of same values into multiply with
2346 transform_add_to_multiply (vec
<operand_entry
*> *ops
)
2349 tree op
= NULL_TREE
;
2351 int i
, start
= -1, end
= 0, count
= 0;
2352 auto_vec
<std::pair
<int, int> > indxs
;
2353 bool changed
= false;
2355 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2356 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2357 || !flag_unsafe_math_optimizations
))
2360 /* Look for repeated operands. */
2361 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2369 else if (operand_equal_p (oe
->op
, op
, 0))
2377 indxs
.safe_push (std::make_pair (start
, end
));
2385 indxs
.safe_push (std::make_pair (start
, end
));
2387 for (j
= indxs
.length () - 1; j
>= 0; --j
)
2389 /* Convert repeated operand addition to multiplication. */
2390 start
= indxs
[j
].first
;
2391 end
= indxs
[j
].second
;
2392 op
= (*ops
)[start
]->op
;
2393 count
= end
- start
+ 1;
2394 for (i
= end
; i
>= start
; --i
)
2395 ops
->unordered_remove (i
);
2396 tree tmp
= make_ssa_name (TREE_TYPE (op
));
2397 tree cst
= build_int_cst (integer_type_node
, count
);
2399 = gimple_build_assign (tmp
, MULT_EXPR
,
2400 op
, fold_convert (TREE_TYPE (op
), cst
));
2401 gimple_set_visited (mul_stmt
, true);
2402 add_to_ops_vec (ops
, tmp
, mul_stmt
);
2410 /* Perform various identities and other optimizations on the list of
2411 operand entries, stored in OPS. The tree code for the binary
2412 operation between all the operands is OPCODE. */
2415 optimize_ops_list (enum tree_code opcode
,
2416 vec
<operand_entry
*> *ops
)
2418 unsigned int length
= ops
->length ();
2421 operand_entry
*oelast
= NULL
;
2422 bool iterate
= false;
2427 oelast
= ops
->last ();
2429 /* If the last two are constants, pop the constants off, merge them
2430 and try the next two. */
2431 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
2433 operand_entry
*oelm1
= (*ops
)[length
- 2];
2435 if (oelm1
->rank
== 0
2436 && is_gimple_min_invariant (oelm1
->op
)
2437 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
2438 TREE_TYPE (oelast
->op
)))
2440 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
2441 oelm1
->op
, oelast
->op
);
2443 if (folded
&& is_gimple_min_invariant (folded
))
2445 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2446 fprintf (dump_file
, "Merging constants\n");
2451 add_to_ops_vec (ops
, folded
);
2452 reassociate_stats
.constants_eliminated
++;
2454 optimize_ops_list (opcode
, ops
);
2460 eliminate_using_constants (opcode
, ops
);
2463 for (i
= 0; ops
->iterate (i
, &oe
);)
2467 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
2469 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
2470 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
2471 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
2484 optimize_ops_list (opcode
, ops
);
2487 /* The following functions are subroutines to optimize_range_tests and allow
2488 it to try to change a logical combination of comparisons into a range
2492 X == 2 || X == 5 || X == 3 || X == 4
2496 (unsigned) (X - 2) <= 3
2498 For more information see comments above fold_test_range in fold-const.cc,
2499 this implementation is for GIMPLE. */
2503 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2506 dump_range_entry (FILE *file
, struct range_entry
*r
, bool skip_exp
)
2509 print_generic_expr (file
, r
->exp
);
2510 fprintf (file
, " %c[", r
->in_p
? '+' : '-');
2511 print_generic_expr (file
, r
->low
);
2513 print_generic_expr (file
, r
->high
);
2517 /* Dump the range entry R to STDERR. */
2520 debug_range_entry (struct range_entry
*r
)
2522 dump_range_entry (stderr
, r
, false);
2523 fputc ('\n', stderr
);
2526 /* This is similar to make_range in fold-const.cc, but on top of
2527 GIMPLE instead of trees. If EXP is non-NULL, it should be
2528 an SSA_NAME and STMT argument is ignored, otherwise STMT
2529 argument should be a GIMPLE_COND. */
2532 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
2536 bool is_bool
, strict_overflow_p
;
2540 r
->strict_overflow_p
= false;
2542 r
->high
= NULL_TREE
;
2543 if (exp
!= NULL_TREE
2544 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
2547 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2548 and see if we can refine the range. Some of the cases below may not
2549 happen, but it doesn't seem worth worrying about this. We "continue"
2550 the outer loop when we've changed something; otherwise we "break"
2551 the switch, which will "break" the while. */
2552 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
2555 strict_overflow_p
= false;
2557 if (exp
== NULL_TREE
)
2559 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
2561 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
2566 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
2571 enum tree_code code
;
2572 tree arg0
, arg1
, exp_type
;
2576 if (exp
!= NULL_TREE
)
2578 if (TREE_CODE (exp
) != SSA_NAME
2579 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
2582 stmt
= SSA_NAME_DEF_STMT (exp
);
2583 if (!is_gimple_assign (stmt
))
2586 code
= gimple_assign_rhs_code (stmt
);
2587 arg0
= gimple_assign_rhs1 (stmt
);
2588 arg1
= gimple_assign_rhs2 (stmt
);
2589 exp_type
= TREE_TYPE (exp
);
2593 code
= gimple_cond_code (stmt
);
2594 arg0
= gimple_cond_lhs (stmt
);
2595 arg1
= gimple_cond_rhs (stmt
);
2596 exp_type
= boolean_type_node
;
2599 if (TREE_CODE (arg0
) != SSA_NAME
2600 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0
))
2602 loc
= gimple_location (stmt
);
2606 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
2607 /* Ensure the range is either +[-,0], +[0,0],
2608 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2609 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2610 or similar expression of unconditional true or
2611 false, it should not be negated. */
2612 && ((high
&& integer_zerop (high
))
2613 || (low
&& integer_onep (low
))))
2626 if ((TYPE_PRECISION (exp_type
) == 1
2627 || TREE_CODE (exp_type
) == BOOLEAN_TYPE
)
2628 && TYPE_PRECISION (TREE_TYPE (arg0
)) > 1)
2631 else if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
2633 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
2638 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2653 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2655 &strict_overflow_p
);
2656 if (nexp
!= NULL_TREE
)
2659 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2672 r
->strict_overflow_p
= strict_overflow_p
;
2676 /* Comparison function for qsort. Sort entries
2677 without SSA_NAME exp first, then with SSA_NAMEs sorted
2678 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2679 by increasing ->low and if ->low is the same, by increasing
2680 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2684 range_entry_cmp (const void *a
, const void *b
)
2686 const struct range_entry
*p
= (const struct range_entry
*) a
;
2687 const struct range_entry
*q
= (const struct range_entry
*) b
;
2689 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2691 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2693 /* Group range_entries for the same SSA_NAME together. */
2694 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2696 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2698 /* If ->low is different, NULL low goes first, then by
2700 if (p
->low
!= NULL_TREE
)
2702 if (q
->low
!= NULL_TREE
)
2704 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2706 if (tem
&& integer_onep (tem
))
2708 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2710 if (tem
&& integer_onep (tem
))
2716 else if (q
->low
!= NULL_TREE
)
2718 /* If ->high is different, NULL high goes last, before that by
2720 if (p
->high
!= NULL_TREE
)
2722 if (q
->high
!= NULL_TREE
)
2724 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2726 if (tem
&& integer_onep (tem
))
2728 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2730 if (tem
&& integer_onep (tem
))
2736 else if (q
->high
!= NULL_TREE
)
2738 /* If both ranges are the same, sort below by ascending idx. */
2743 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2746 if (p
->idx
< q
->idx
)
2750 gcc_checking_assert (p
->idx
> q
->idx
);
2755 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2756 insert needed statements BEFORE or after GSI. */
2759 force_into_ssa_name (gimple_stmt_iterator
*gsi
, tree expr
, bool before
)
2761 enum gsi_iterator_update m
= before
? GSI_SAME_STMT
: GSI_CONTINUE_LINKING
;
2762 tree ret
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
, before
, m
);
2763 if (TREE_CODE (ret
) != SSA_NAME
)
2765 gimple
*g
= gimple_build_assign (make_ssa_name (TREE_TYPE (ret
)), ret
);
2767 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2769 gsi_insert_after (gsi
, g
, GSI_CONTINUE_LINKING
);
2770 ret
= gimple_assign_lhs (g
);
2775 /* Helper routine of optimize_range_test.
2776 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2777 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2778 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2779 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2780 an array of COUNT pointers to other ranges. Return
2781 true if the range merge has been successful.
2782 If OPCODE is ERROR_MARK, this is called from within
2783 maybe_optimize_range_tests and is performing inter-bb range optimization.
2784 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2788 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2789 struct range_entry
**otherrangep
,
2790 unsigned int count
, enum tree_code opcode
,
2791 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2792 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2794 unsigned int idx
= range
->idx
;
2795 struct range_entry
*swap_with
= NULL
;
2796 basic_block rewrite_bb_first
= NULL
, rewrite_bb_last
= NULL
;
2797 if (opcode
== ERROR_MARK
)
2799 /* For inter-bb range test optimization, pick from the range tests
2800 the one which is tested in the earliest condition (one dominating
2801 the others), because otherwise there could be some UB (e.g. signed
2802 overflow) in following bbs that we'd expose which wasn't there in
2803 the original program. See PR104196. */
2804 basic_block orig_range_bb
= BASIC_BLOCK_FOR_FN (cfun
, (*ops
)[idx
]->id
);
2805 basic_block range_bb
= orig_range_bb
;
2806 for (unsigned int i
= 0; i
< count
; i
++)
2808 struct range_entry
*this_range
;
2810 this_range
= otherrange
+ i
;
2812 this_range
= otherrangep
[i
];
2813 operand_entry
*oe
= (*ops
)[this_range
->idx
];
2814 basic_block this_bb
= BASIC_BLOCK_FOR_FN (cfun
, oe
->id
);
2815 if (range_bb
!= this_bb
2816 && dominated_by_p (CDI_DOMINATORS
, range_bb
, this_bb
))
2818 swap_with
= this_range
;
2820 idx
= this_range
->idx
;
2823 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2824 only defined in later blocks. In this case we can't move the
2825 merged comparison earlier, so instead check if there are any stmts
2826 that might trigger signed integer overflow in between and rewrite
2827 them. But only after we check if the optimization is possible. */
2828 if (seq
&& swap_with
)
2830 rewrite_bb_first
= range_bb
;
2831 rewrite_bb_last
= orig_range_bb
;
2836 operand_entry
*oe
= (*ops
)[idx
];
2838 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2839 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2840 location_t loc
= gimple_location (stmt
);
2841 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2842 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2844 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2845 gimple_stmt_iterator gsi
;
2846 unsigned int i
, uid
;
2848 if (tem
== NULL_TREE
)
2851 /* If op is default def SSA_NAME, there is no place to insert the
2852 new comparison. Give up, unless we can use OP itself as the
2854 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2856 if (op
== range
->exp
2857 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2858 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2860 || (TREE_CODE (tem
) == EQ_EXPR
2861 && TREE_OPERAND (tem
, 0) == op
2862 && integer_onep (TREE_OPERAND (tem
, 1))))
2863 && opcode
!= BIT_IOR_EXPR
2864 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2874 std::swap (range
->idx
, swap_with
->idx
);
2876 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2877 warning_at (loc
, OPT_Wstrict_overflow
,
2878 "assuming signed overflow does not occur "
2879 "when simplifying range test");
2881 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2883 struct range_entry
*r
;
2884 fprintf (dump_file
, "Optimizing range tests ");
2885 dump_range_entry (dump_file
, range
, false);
2886 for (i
= 0; i
< count
; i
++)
2893 && r
->exp
!= range
->exp
2894 && TREE_CODE (r
->exp
) == SSA_NAME
)
2896 fprintf (dump_file
, " and ");
2897 dump_range_entry (dump_file
, r
, false);
2901 fprintf (dump_file
, " and");
2902 dump_range_entry (dump_file
, r
, true);
2905 fprintf (dump_file
, "\n into ");
2906 print_generic_expr (dump_file
, tem
);
2907 fprintf (dump_file
, "\n");
2910 /* In inter-bb range optimization mode, if we have a seq, we can't
2911 move the merged comparison to the earliest bb from the comparisons
2912 being replaced, so instead rewrite stmts that could trigger signed
2913 integer overflow. */
2914 for (basic_block bb
= rewrite_bb_last
;
2915 bb
!= rewrite_bb_first
; bb
= single_pred (bb
))
2916 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
);
2917 !gsi_end_p (gsi
); gsi_next (&gsi
))
2919 gimple
*stmt
= gsi_stmt (gsi
);
2920 if (is_gimple_assign (stmt
))
2921 if (tree lhs
= gimple_assign_lhs (stmt
))
2922 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2923 || POINTER_TYPE_P (TREE_TYPE (lhs
)))
2924 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
)))
2926 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2927 if (arith_code_with_undefined_signed_overflow (code
))
2929 gimple_stmt_iterator gsip
= gsi
;
2930 gimple_stmt_iterator gsin
= gsi
;
2933 rewrite_to_defined_overflow (stmt
, true);
2934 unsigned uid
= gimple_uid (stmt
);
2935 if (gsi_end_p (gsip
))
2936 gsip
= gsi_after_labels (bb
);
2939 for (; gsi_stmt (gsip
) != gsi_stmt (gsin
);
2941 gimple_set_uid (gsi_stmt (gsip
), uid
);
2946 if (opcode
== BIT_IOR_EXPR
2947 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2948 tem
= invert_truthvalue_loc (loc
, tem
);
2950 tem
= fold_convert_loc (loc
, optype
, tem
);
2953 gsi
= gsi_for_stmt (stmt
);
2954 uid
= gimple_uid (stmt
);
2962 gcc_checking_assert (tem
== op
);
2963 /* In rare cases range->exp can be equal to lhs of stmt.
2964 In that case we have to insert after the stmt rather then before
2965 it. If stmt is a PHI, insert it at the start of the basic block. */
2966 else if (op
!= range
->exp
)
2968 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2969 tem
= force_into_ssa_name (&gsi
, tem
, true);
2972 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2974 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2975 tem
= force_into_ssa_name (&gsi
, tem
, false);
2979 gsi
= gsi_after_labels (gimple_bb (stmt
));
2980 if (!gsi_end_p (gsi
))
2981 uid
= gimple_uid (gsi_stmt (gsi
));
2984 gsi
= gsi_start_bb (gimple_bb (stmt
));
2986 while (!gsi_end_p (gsi
))
2988 uid
= gimple_uid (gsi_stmt (gsi
));
2992 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2993 tem
= force_into_ssa_name (&gsi
, tem
, true);
2994 if (gsi_end_p (gsi
))
2995 gsi
= gsi_last_bb (gimple_bb (stmt
));
2999 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
3000 if (gimple_uid (gsi_stmt (gsi
)))
3003 gimple_set_uid (gsi_stmt (gsi
), uid
);
3010 range
->strict_overflow_p
= false;
3012 for (i
= 0; i
< count
; i
++)
3015 range
= otherrange
+ i
;
3017 range
= otherrangep
[i
];
3018 oe
= (*ops
)[range
->idx
];
3019 /* Now change all the other range test immediate uses, so that
3020 those tests will be optimized away. */
3021 if (opcode
== ERROR_MARK
)
3024 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
3025 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
3027 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
3028 ? boolean_false_node
: boolean_true_node
);
3031 oe
->op
= error_mark_node
;
3032 range
->exp
= NULL_TREE
;
3033 range
->low
= NULL_TREE
;
3034 range
->high
= NULL_TREE
;
3039 /* Optimize X == CST1 || X == CST2
3040 if popcount (CST1 ^ CST2) == 1 into
3041 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3042 Similarly for ranges. E.g.
3043 X != 2 && X != 3 && X != 10 && X != 11
3044 will be transformed by the previous optimization into
3045 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3046 and this loop can transform that into
3047 !(((X & ~8) - 2U) <= 1U). */
3050 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
3051 tree lowi
, tree lowj
, tree highi
, tree highj
,
3052 vec
<operand_entry
*> *ops
,
3053 struct range_entry
*rangei
,
3054 struct range_entry
*rangej
)
3056 tree lowxor
, highxor
, tem
, exp
;
3057 /* Check lowi ^ lowj == highi ^ highj and
3058 popcount (lowi ^ lowj) == 1. */
3059 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
3060 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
3062 if (!integer_pow2p (lowxor
))
3064 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
3065 if (!tree_int_cst_equal (lowxor
, highxor
))
3069 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
3070 int prec
= GET_MODE_PRECISION (mode
);
3071 if (TYPE_PRECISION (type
) < prec
3072 || (wi::to_wide (TYPE_MIN_VALUE (type
))
3073 != wi::min_value (prec
, TYPE_SIGN (type
)))
3074 || (wi::to_wide (TYPE_MAX_VALUE (type
))
3075 != wi::max_value (prec
, TYPE_SIGN (type
))))
3077 type
= build_nonstandard_integer_type (prec
, TYPE_UNSIGNED (type
));
3078 exp
= fold_convert (type
, exp
);
3079 lowxor
= fold_convert (type
, lowxor
);
3080 lowi
= fold_convert (type
, lowi
);
3081 highi
= fold_convert (type
, highi
);
3083 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
3084 exp
= fold_build2 (BIT_AND_EXPR
, type
, exp
, tem
);
3085 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
3086 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
3087 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
3088 NULL
, rangei
->in_p
, lowj
, highj
,
3089 rangei
->strict_overflow_p
3090 || rangej
->strict_overflow_p
))
3095 /* Optimize X == CST1 || X == CST2
3096 if popcount (CST2 - CST1) == 1 into
3097 ((X - CST1) & ~(CST2 - CST1)) == 0.
3098 Similarly for ranges. E.g.
3099 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3100 || X == 75 || X == 45
3101 will be transformed by the previous optimization into
3102 (X - 43U) <= 3U || (X - 75U) <= 3U
3103 and this loop can transform that into
3104 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3106 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
3107 tree lowi
, tree lowj
, tree highi
, tree highj
,
3108 vec
<operand_entry
*> *ops
,
3109 struct range_entry
*rangei
,
3110 struct range_entry
*rangej
)
3112 tree tem1
, tem2
, mask
;
3113 /* Check highi - lowi == highj - lowj. */
3114 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
3115 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
3117 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
3118 if (!tree_int_cst_equal (tem1
, tem2
))
3120 /* Check popcount (lowj - lowi) == 1. */
3121 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
3122 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
3124 if (!integer_pow2p (tem1
))
3127 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
3128 int prec
= GET_MODE_PRECISION (mode
);
3129 if (TYPE_PRECISION (type
) < prec
3130 || (wi::to_wide (TYPE_MIN_VALUE (type
))
3131 != wi::min_value (prec
, TYPE_SIGN (type
)))
3132 || (wi::to_wide (TYPE_MAX_VALUE (type
))
3133 != wi::max_value (prec
, TYPE_SIGN (type
))))
3134 type
= build_nonstandard_integer_type (prec
, 1);
3136 type
= unsigned_type_for (type
);
3137 tem1
= fold_convert (type
, tem1
);
3138 tem2
= fold_convert (type
, tem2
);
3139 lowi
= fold_convert (type
, lowi
);
3140 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
3141 tem1
= fold_build2 (MINUS_EXPR
, type
,
3142 fold_convert (type
, rangei
->exp
), lowi
);
3143 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
3144 lowj
= build_int_cst (type
, 0);
3145 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
3146 NULL
, rangei
->in_p
, lowj
, tem2
,
3147 rangei
->strict_overflow_p
3148 || rangej
->strict_overflow_p
))
3153 /* It does some common checks for function optimize_range_tests_xor and
3154 optimize_range_tests_diff.
3155 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3156 Else it calls optimize_range_tests_diff. */
3159 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
3160 bool optimize_xor
, vec
<operand_entry
*> *ops
,
3161 struct range_entry
*ranges
)
3164 bool any_changes
= false;
3165 for (i
= first
; i
< length
; i
++)
3167 tree lowi
, highi
, lowj
, highj
, type
, tem
;
3169 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3171 type
= TREE_TYPE (ranges
[i
].exp
);
3172 if (!INTEGRAL_TYPE_P (type
))
3174 lowi
= ranges
[i
].low
;
3175 if (lowi
== NULL_TREE
)
3176 lowi
= TYPE_MIN_VALUE (type
);
3177 highi
= ranges
[i
].high
;
3178 if (highi
== NULL_TREE
)
3180 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
3183 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
3185 lowj
= ranges
[j
].low
;
3186 if (lowj
== NULL_TREE
)
3188 highj
= ranges
[j
].high
;
3189 if (highj
== NULL_TREE
)
3190 highj
= TYPE_MAX_VALUE (type
);
3191 /* Check lowj > highi. */
3192 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
3194 if (tem
== NULL_TREE
|| !integer_onep (tem
))
3197 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
3199 ranges
+ i
, ranges
+ j
);
3201 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
3203 ranges
+ i
, ranges
+ j
);
3214 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3215 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3216 EXP on success, NULL otherwise. */
3219 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
3220 wide_int
*mask
, tree
*totallowp
)
3222 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
3223 if (tem
== NULL_TREE
3224 || TREE_CODE (tem
) != INTEGER_CST
3225 || TREE_OVERFLOW (tem
)
3226 || tree_int_cst_sgn (tem
) == -1
3227 || compare_tree_int (tem
, prec
) != -1)
3230 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
3231 *mask
= wi::shifted_mask (0, max
, false, prec
);
3232 if (TREE_CODE (exp
) == BIT_AND_EXPR
3233 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3235 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
3236 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
3237 if (wi::popcount (msk
) == 1
3238 && wi::ltu_p (msk
, prec
- max
))
3240 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
3241 max
+= msk
.to_uhwi ();
3242 exp
= TREE_OPERAND (exp
, 0);
3243 if (integer_zerop (low
)
3244 && TREE_CODE (exp
) == PLUS_EXPR
3245 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3247 tree ret
= TREE_OPERAND (exp
, 0);
3250 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
3251 TYPE_PRECISION (TREE_TYPE (low
))));
3252 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
3258 else if (!tree_int_cst_lt (totallow
, tbias
))
3260 bias
= wi::to_widest (tbias
);
3261 bias
-= wi::to_widest (totallow
);
3262 if (bias
>= 0 && bias
< prec
- max
)
3264 *mask
= wi::lshift (*mask
, bias
);
3272 if (!tree_int_cst_lt (totallow
, low
))
3274 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
3275 if (tem
== NULL_TREE
3276 || TREE_CODE (tem
) != INTEGER_CST
3277 || TREE_OVERFLOW (tem
)
3278 || compare_tree_int (tem
, prec
- max
) == 1)
3281 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
3285 /* Attempt to optimize small range tests using bit test.
3287 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3288 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3289 has been by earlier optimizations optimized into:
3290 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3291 As all the 43 through 82 range is less than 64 numbers,
3292 for 64-bit word targets optimize that into:
3293 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3296 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
3297 vec
<operand_entry
*> *ops
,
3298 struct range_entry
*ranges
)
3301 bool any_changes
= false;
3302 int prec
= GET_MODE_BITSIZE (word_mode
);
3303 auto_vec
<struct range_entry
*, 64> candidates
;
3305 for (i
= first
; i
< length
- 1; i
++)
3307 tree lowi
, highi
, lowj
, highj
, type
;
3309 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3311 type
= TREE_TYPE (ranges
[i
].exp
);
3312 if (!INTEGRAL_TYPE_P (type
))
3314 lowi
= ranges
[i
].low
;
3315 if (lowi
== NULL_TREE
)
3316 lowi
= TYPE_MIN_VALUE (type
);
3317 highi
= ranges
[i
].high
;
3318 if (highi
== NULL_TREE
)
3321 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
3322 highi
, &mask
, &lowi
);
3323 if (exp
== NULL_TREE
)
3325 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3326 candidates
.truncate (0);
3327 int end
= MIN (i
+ 64, length
);
3328 for (j
= i
+ 1; j
< end
; j
++)
3331 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
3333 if (ranges
[j
].exp
== exp
)
3335 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
3337 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
3340 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
3342 exp2
= TREE_OPERAND (exp2
, 0);
3352 lowj
= ranges
[j
].low
;
3353 if (lowj
== NULL_TREE
)
3355 highj
= ranges
[j
].high
;
3356 if (highj
== NULL_TREE
)
3357 highj
= TYPE_MAX_VALUE (type
);
3359 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
3360 highj
, &mask2
, NULL
);
3364 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3365 candidates
.safe_push (&ranges
[j
]);
3368 /* If every possible relative value of the expression is a valid shift
3369 amount, then we can merge the entry test in the bit test. In this
3370 case, if we would need otherwise 2 or more comparisons, then use
3371 the bit test; in the other cases, the threshold is 3 comparisons. */
3372 bool entry_test_needed
;
3374 if (TREE_CODE (exp
) == SSA_NAME
3375 && get_range_query (cfun
)->range_of_expr (r
, exp
)
3376 && !r
.undefined_p ()
3378 && wi::leu_p (r
.upper_bound () - r
.lower_bound (), prec
- 1))
3380 wide_int min
= r
.lower_bound ();
3381 wide_int ilowi
= wi::to_wide (lowi
);
3382 if (wi::lt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3384 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3385 mask
= wi::lshift (mask
, ilowi
- min
);
3387 else if (wi::gt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3389 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3390 mask
= wi::lrshift (mask
, min
- ilowi
);
3392 entry_test_needed
= false;
3395 entry_test_needed
= true;
3396 if (candidates
.length () >= (entry_test_needed
? 2 : 1))
3398 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
3399 wi::to_widest (lowi
)
3400 + prec
- 1 - wi::clz (mask
));
3401 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3403 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
3404 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3406 location_t loc
= gimple_location (stmt
);
3407 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
3409 /* See if it isn't cheaper to pretend the minimum value of the
3410 range is 0, if maximum value is small enough.
3411 We can avoid then subtraction of the minimum value, but the
3412 mask constant could be perhaps more expensive. */
3413 if (compare_tree_int (lowi
, 0) > 0
3414 && compare_tree_int (high
, prec
) < 0)
3417 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
3418 rtx reg
= gen_raw_REG (word_mode
, 10000);
3419 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
3420 cost_diff
= set_src_cost (gen_rtx_PLUS (word_mode
, reg
,
3422 word_mode
, speed_p
);
3423 rtx r
= immed_wide_int_const (mask
, word_mode
);
3424 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3425 word_mode
, speed_p
);
3426 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
3427 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3428 word_mode
, speed_p
);
3431 mask
= wi::lshift (mask
, m
);
3432 lowi
= build_zero_cst (TREE_TYPE (lowi
));
3437 if (entry_test_needed
)
3439 tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
3441 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
3446 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
3447 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
3448 fold_convert_loc (loc
, etype
, exp
),
3449 fold_convert_loc (loc
, etype
, lowi
));
3450 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
3451 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
3452 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
3453 build_int_cst (word_type
, 1), exp
);
3454 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
3455 wide_int_to_tree (word_type
, mask
));
3456 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
3457 build_zero_cst (word_type
));
3458 if (is_gimple_val (exp
))
3461 /* The shift might have undefined behavior if TEM is true,
3462 but reassociate_bb isn't prepared to have basic blocks
3463 split when it is running. So, temporarily emit a code
3464 with BIT_IOR_EXPR instead of &&, and fix it up in
3466 gimple_seq seq
= NULL
;
3469 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
3470 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
3471 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
3474 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
3475 gimple_seq_add_seq_without_update (&seq
, seq2
);
3476 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
3477 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
3480 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
3481 BIT_IOR_EXPR
, tem
, exp
);
3482 gimple_set_location (g
, loc
);
3483 gimple_seq_add_stmt_without_update (&seq
, g
);
3484 exp
= gimple_assign_lhs (g
);
3486 tree val
= build_zero_cst (optype
);
3487 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
3488 candidates
.length (), opcode
, ops
, exp
,
3489 seq
, false, val
, val
, strict_overflow_p
))
3493 reassoc_branch_fixups
.safe_push (tem
);
3496 gimple_seq_discard (seq
);
3502 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3503 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3504 Also, handle x < C && y < C && z < C where C is power of two as
3505 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3506 as (x | y | z) < 0. */
3509 optimize_range_tests_cmp_bitwise (enum tree_code opcode
, int first
, int length
,
3510 vec
<operand_entry
*> *ops
,
3511 struct range_entry
*ranges
)
3515 bool any_changes
= false;
3516 auto_vec
<int, 128> buckets
;
3517 auto_vec
<int, 32> chains
;
3518 auto_vec
<struct range_entry
*, 32> candidates
;
3520 for (i
= first
; i
< length
; i
++)
3524 if (ranges
[i
].exp
== NULL_TREE
3525 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3526 || TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) <= 1
3527 || TREE_CODE (TREE_TYPE (ranges
[i
].exp
)) == BOOLEAN_TYPE
)
3530 if (ranges
[i
].low
!= NULL_TREE
3531 && ranges
[i
].high
!= NULL_TREE
3533 && tree_int_cst_equal (ranges
[i
].low
, ranges
[i
].high
))
3535 idx
= !integer_zerop (ranges
[i
].low
);
3536 if (idx
&& !integer_all_onesp (ranges
[i
].low
))
3539 else if (ranges
[i
].high
!= NULL_TREE
3540 && TREE_CODE (ranges
[i
].high
) == INTEGER_CST
3543 wide_int w
= wi::to_wide (ranges
[i
].high
);
3544 int prec
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
));
3545 int l
= wi::clz (w
);
3549 || w
!= wi::mask (prec
- l
, false, prec
))
3551 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3552 && ranges
[i
].low
== NULL_TREE
)
3554 && integer_zerop (ranges
[i
].low
))))
3557 else if (ranges
[i
].high
== NULL_TREE
3558 && ranges
[i
].low
!= NULL_TREE
3559 /* Perform this optimization only in the last
3560 reassoc pass, as it interferes with the reassociation
3561 itself or could also with VRP etc. which might not
3562 be able to virtually undo the optimization. */
3563 && !reassoc_insert_powi_p
3564 && !TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3565 && integer_zerop (ranges
[i
].low
))
3570 b
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) * 4 + idx
;
3571 if (buckets
.length () <= b
)
3572 buckets
.safe_grow_cleared (b
+ 1, true);
3573 if (chains
.length () <= (unsigned) i
)
3574 chains
.safe_grow (i
+ 1, true);
3575 chains
[i
] = buckets
[b
];
3579 FOR_EACH_VEC_ELT (buckets
, b
, i
)
3580 if (i
&& chains
[i
- 1])
3585 /* When ranges[X - 1].high + 1 is a power of two,
3586 we need to process the same bucket up to
3587 precision - 1 times, each time split the entries
3588 with the same high bound into one chain and the
3589 rest into another one to be processed later. */
3592 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3594 if (tree_int_cst_equal (ranges
[i
- 1].high
,
3595 ranges
[j
- 1].high
))
3597 chains
[this_prev
- 1] = j
;
3600 else if (other_prev
== 0)
3607 chains
[other_prev
- 1] = j
;
3611 chains
[this_prev
- 1] = 0;
3613 chains
[other_prev
- 1] = 0;
3614 if (chains
[i
- 1] == 0)
3621 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3623 gimple
*gk
= SSA_NAME_DEF_STMT (ranges
[k
- 1].exp
);
3624 gimple
*gj
= SSA_NAME_DEF_STMT (ranges
[j
- 1].exp
);
3625 if (reassoc_stmt_dominates_stmt_p (gk
, gj
))
3628 tree type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3629 tree type2
= NULL_TREE
;
3630 bool strict_overflow_p
= false;
3631 candidates
.truncate (0);
3632 if (POINTER_TYPE_P (type1
) || TREE_CODE (type1
) == OFFSET_TYPE
)
3633 type1
= pointer_sized_int_node
;
3634 for (j
= i
; j
; j
= chains
[j
- 1])
3636 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3637 strict_overflow_p
|= ranges
[j
- 1].strict_overflow_p
;
3638 if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == OFFSET_TYPE
)
3639 type
= pointer_sized_int_node
;
3642 /* For the signed < 0 cases, the types should be
3643 really compatible (all signed with the same precision,
3644 instead put ranges that have different in_p from
3646 if (!useless_type_conversion_p (type1
, type
))
3648 if (ranges
[j
- 1].in_p
!= ranges
[k
- 1].in_p
)
3649 candidates
.safe_push (&ranges
[j
- 1]);
3654 || useless_type_conversion_p (type1
, type
))
3656 else if (type2
== NULL_TREE
3657 || useless_type_conversion_p (type2
, type
))
3659 if (type2
== NULL_TREE
)
3661 candidates
.safe_push (&ranges
[j
- 1]);
3664 unsigned l
= candidates
.length ();
3665 for (j
= i
; j
; j
= chains
[j
- 1])
3667 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3670 if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == OFFSET_TYPE
)
3671 type
= pointer_sized_int_node
;
3674 if (!useless_type_conversion_p (type1
, type
))
3676 if (ranges
[j
- 1].in_p
== ranges
[k
- 1].in_p
)
3677 candidates
.safe_push (&ranges
[j
- 1]);
3680 if (useless_type_conversion_p (type1
, type
))
3682 else if (type2
== NULL_TREE
3683 || useless_type_conversion_p (type2
, type
))
3685 candidates
.safe_push (&ranges
[j
- 1]);
3687 gimple_seq seq
= NULL
;
3688 tree op
= NULL_TREE
;
3690 struct range_entry
*r
;
3691 candidates
.safe_push (&ranges
[k
- 1]);
3692 FOR_EACH_VEC_ELT (candidates
, id
, r
)
3695 enum tree_code code
;
3702 || POINTER_TYPE_P (TREE_TYPE (op
))
3703 || TREE_CODE (TREE_TYPE (op
)) == OFFSET_TYPE
)
3705 code
= (b
% 4) == 3 ? BIT_NOT_EXPR
: NOP_EXPR
;
3706 tree type3
= id
>= l
? type1
: pointer_sized_int_node
;
3707 if (code
== BIT_NOT_EXPR
3708 && TREE_CODE (TREE_TYPE (op
)) == OFFSET_TYPE
)
3710 g
= gimple_build_assign (make_ssa_name (type3
),
3712 gimple_seq_add_stmt_without_update (&seq
, g
);
3713 op
= gimple_assign_lhs (g
);
3715 g
= gimple_build_assign (make_ssa_name (type3
), code
, op
);
3716 gimple_seq_add_stmt_without_update (&seq
, g
);
3717 op
= gimple_assign_lhs (g
);
3719 tree type
= TREE_TYPE (r
->exp
);
3721 if (POINTER_TYPE_P (type
)
3722 || TREE_CODE (type
) == OFFSET_TYPE
3723 || (id
>= l
&& !useless_type_conversion_p (type1
, type
)))
3725 tree type3
= id
>= l
? type1
: pointer_sized_int_node
;
3726 g
= gimple_build_assign (make_ssa_name (type3
), NOP_EXPR
, exp
);
3727 gimple_seq_add_stmt_without_update (&seq
, g
);
3728 exp
= gimple_assign_lhs (g
);
3731 code
= r
->in_p
? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3733 code
= (b
% 4) == 1 ? BIT_AND_EXPR
: BIT_IOR_EXPR
;
3734 g
= gimple_build_assign (make_ssa_name (id
>= l
? type1
: type2
),
3736 gimple_seq_add_stmt_without_update (&seq
, g
);
3737 op
= gimple_assign_lhs (g
);
3739 type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3740 if (POINTER_TYPE_P (type1
) || TREE_CODE (type1
) == OFFSET_TYPE
)
3743 = gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, op
);
3744 gimple_seq_add_stmt_without_update (&seq
, g
);
3745 op
= gimple_assign_lhs (g
);
3748 if (update_range_test (&ranges
[k
- 1], NULL
, candidates
.address (),
3749 candidates
.length (), opcode
, ops
, op
,
3750 seq
, ranges
[k
- 1].in_p
, ranges
[k
- 1].low
,
3751 ranges
[k
- 1].high
, strict_overflow_p
))
3754 gimple_seq_discard (seq
);
3755 if ((b
% 4) == 2 && buckets
[b
] != i
)
3756 /* There is more work to do for this bucket. */
3763 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3764 a >= 0 && a < b into (unsigned) a < (unsigned) b
3765 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3768 optimize_range_tests_var_bound (enum tree_code opcode
, int first
, int length
,
3769 vec
<operand_entry
*> *ops
,
3770 struct range_entry
*ranges
,
3771 basic_block first_bb
)
3774 bool any_changes
= false;
3775 hash_map
<tree
, int> *map
= NULL
;
3777 for (i
= first
; i
< length
; i
++)
3779 if (ranges
[i
].exp
== NULL_TREE
3780 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3784 tree type
= TREE_TYPE (ranges
[i
].exp
);
3785 if (!INTEGRAL_TYPE_P (type
)
3786 || TYPE_UNSIGNED (type
)
3787 || ranges
[i
].low
== NULL_TREE
3788 || !integer_zerop (ranges
[i
].low
)
3789 || ranges
[i
].high
!= NULL_TREE
)
3791 /* EXP >= 0 here. */
3793 map
= new hash_map
<tree
, int>;
3794 map
->put (ranges
[i
].exp
, i
);
3800 for (i
= 0; i
< length
; i
++)
3802 bool in_p
= ranges
[i
].in_p
;
3803 if (ranges
[i
].low
== NULL_TREE
3804 || ranges
[i
].high
== NULL_TREE
)
3806 if (!integer_zerop (ranges
[i
].low
)
3807 || !integer_zerop (ranges
[i
].high
))
3810 && TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) == 1
3811 && TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3812 && integer_onep (ranges
[i
].low
)
3813 && integer_onep (ranges
[i
].high
))
3824 if (TREE_CODE (ranges
[i
].exp
) != SSA_NAME
)
3826 stmt
= SSA_NAME_DEF_STMT (ranges
[i
].exp
);
3827 if (!is_gimple_assign (stmt
))
3829 ccode
= gimple_assign_rhs_code (stmt
);
3830 rhs1
= gimple_assign_rhs1 (stmt
);
3831 rhs2
= gimple_assign_rhs2 (stmt
);
3835 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3836 stmt
= last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3837 if (gimple_code (stmt
) != GIMPLE_COND
)
3839 ccode
= gimple_cond_code (stmt
);
3840 rhs1
= gimple_cond_lhs (stmt
);
3841 rhs2
= gimple_cond_rhs (stmt
);
3844 if (TREE_CODE (rhs1
) != SSA_NAME
3845 || rhs2
== NULL_TREE
3846 || TREE_CODE (rhs2
) != SSA_NAME
)
3860 ccode
= invert_tree_comparison (ccode
, false);
3865 std::swap (rhs1
, rhs2
);
3866 ccode
= swap_tree_comparison (ccode
);
3875 int *idx
= map
->get (rhs1
);
3879 /* maybe_optimize_range_tests allows statements without side-effects
3880 in the basic blocks as long as they are consumed in the same bb.
3881 Make sure rhs2's def stmt is not among them, otherwise we can't
3882 use safely get_nonzero_bits on it. E.g. in:
3883 # RANGE [-83, 1] NONZERO 173
3884 # k_32 = PHI <k_47(13), k_12(9)>
3887 goto <bb 5>; [26.46%]
3889 goto <bb 9>; [73.54%]
3891 <bb 5> [local count: 140323371]:
3892 # RANGE [0, 1] NONZERO 1
3894 # RANGE [0, 4] NONZERO 4
3896 # RANGE [0, 4] NONZERO 4
3897 iftmp.0_44 = (char) _21;
3898 if (k_32 < iftmp.0_44)
3899 goto <bb 6>; [84.48%]
3901 goto <bb 9>; [15.52%]
3902 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3903 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3904 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3905 those stmts even for negative k_32 and the value ranges would be no
3906 longer guaranteed and so the optimization would be invalid. */
3907 while (opcode
== ERROR_MARK
)
3909 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3910 basic_block bb2
= gimple_bb (g
);
3913 && dominated_by_p (CDI_DOMINATORS
, bb2
, first_bb
))
3915 /* As an exception, handle a few common cases. */
3916 if (gimple_assign_cast_p (g
)
3917 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g
))))
3919 tree op0
= gimple_assign_rhs1 (g
);
3920 if (TYPE_UNSIGNED (TREE_TYPE (op0
))
3921 && (TYPE_PRECISION (TREE_TYPE (rhs2
))
3922 > TYPE_PRECISION (TREE_TYPE (op0
))))
3923 /* Zero-extension is always ok. */
3925 else if (TYPE_PRECISION (TREE_TYPE (rhs2
))
3926 == TYPE_PRECISION (TREE_TYPE (op0
))
3927 && TREE_CODE (op0
) == SSA_NAME
)
3929 /* Cast from signed to unsigned or vice versa. Retry
3930 with the op0 as new rhs2. */
3935 else if (is_gimple_assign (g
)
3936 && gimple_assign_rhs_code (g
) == BIT_AND_EXPR
3937 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
3938 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g
))))
3939 /* Masking with INTEGER_CST with MSB clear is always ok
3946 if (rhs2
== NULL_TREE
)
3949 wide_int nz
= get_nonzero_bits (rhs2
);
3953 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3954 and RHS2 is known to be RHS2 >= 0. */
3955 tree utype
= unsigned_type_for (TREE_TYPE (rhs1
));
3957 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
3958 if ((ranges
[*idx
].strict_overflow_p
3959 || ranges
[i
].strict_overflow_p
)
3960 && issue_strict_overflow_warning (wc
))
3961 warning_at (gimple_location (stmt
), OPT_Wstrict_overflow
,
3962 "assuming signed overflow does not occur "
3963 "when simplifying range test");
3965 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3967 struct range_entry
*r
= &ranges
[*idx
];
3968 fprintf (dump_file
, "Optimizing range test ");
3969 print_generic_expr (dump_file
, r
->exp
);
3970 fprintf (dump_file
, " +[");
3971 print_generic_expr (dump_file
, r
->low
);
3972 fprintf (dump_file
, ", ");
3973 print_generic_expr (dump_file
, r
->high
);
3974 fprintf (dump_file
, "] and comparison ");
3975 print_generic_expr (dump_file
, rhs1
);
3976 fprintf (dump_file
, " %s ", op_symbol_code (ccode
));
3977 print_generic_expr (dump_file
, rhs2
);
3978 fprintf (dump_file
, "\n into (");
3979 print_generic_expr (dump_file
, utype
);
3980 fprintf (dump_file
, ") ");
3981 print_generic_expr (dump_file
, rhs1
);
3982 fprintf (dump_file
, " %s (", op_symbol_code (ccode
));
3983 print_generic_expr (dump_file
, utype
);
3984 fprintf (dump_file
, ") ");
3985 print_generic_expr (dump_file
, rhs2
);
3986 fprintf (dump_file
, "\n");
3989 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3991 if (opcode
== BIT_IOR_EXPR
3992 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3995 ccode
= invert_tree_comparison (ccode
, false);
3998 unsigned int uid
= gimple_uid (stmt
);
3999 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4000 gimple
*g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs1
);
4001 gimple_set_uid (g
, uid
);
4002 rhs1
= gimple_assign_lhs (g
);
4003 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4004 if (!useless_type_conversion_p (utype
, TREE_TYPE (rhs2
)))
4006 g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs2
);
4007 gimple_set_uid (g
, uid
);
4008 rhs2
= gimple_assign_lhs (g
);
4009 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4011 if (tree_swap_operands_p (rhs1
, rhs2
))
4013 std::swap (rhs1
, rhs2
);
4014 ccode
= swap_tree_comparison (ccode
);
4016 if (gimple_code (stmt
) == GIMPLE_COND
)
4018 gcond
*c
= as_a
<gcond
*> (stmt
);
4019 gimple_cond_set_code (c
, ccode
);
4020 gimple_cond_set_lhs (c
, rhs1
);
4021 gimple_cond_set_rhs (c
, rhs2
);
4026 tree ctype
= oe
->op
? TREE_TYPE (oe
->op
) : boolean_type_node
;
4027 if (!INTEGRAL_TYPE_P (ctype
)
4028 || (TREE_CODE (ctype
) != BOOLEAN_TYPE
4029 && TYPE_PRECISION (ctype
) != 1))
4030 ctype
= boolean_type_node
;
4031 g
= gimple_build_assign (make_ssa_name (ctype
), ccode
, rhs1
, rhs2
);
4032 gimple_set_uid (g
, uid
);
4033 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4034 if (oe
->op
&& ctype
!= TREE_TYPE (oe
->op
))
4036 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (oe
->op
)),
4037 NOP_EXPR
, gimple_assign_lhs (g
));
4038 gimple_set_uid (g
, uid
);
4039 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4041 ranges
[i
].exp
= gimple_assign_lhs (g
);
4042 oe
->op
= ranges
[i
].exp
;
4043 ranges
[i
].low
= build_zero_cst (TREE_TYPE (ranges
[i
].exp
));
4044 ranges
[i
].high
= ranges
[i
].low
;
4046 ranges
[i
].strict_overflow_p
= false;
4047 oe
= (*ops
)[ranges
[*idx
].idx
];
4048 /* Now change all the other range test immediate uses, so that
4049 those tests will be optimized away. */
4050 if (opcode
== ERROR_MARK
)
4053 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
4054 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
4056 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
4057 ? boolean_false_node
: boolean_true_node
);
4060 oe
->op
= error_mark_node
;
4061 ranges
[*idx
].exp
= NULL_TREE
;
4062 ranges
[*idx
].low
= NULL_TREE
;
4063 ranges
[*idx
].high
= NULL_TREE
;
4071 /* Optimize range tests, similarly how fold_range_test optimizes
4072 it on trees. The tree code for the binary
4073 operation between all the operands is OPCODE.
4074 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4075 maybe_optimize_range_tests for inter-bb range optimization.
4076 In that case if oe->op is NULL, oe->id is bb->index whose
4077 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4079 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4082 optimize_range_tests (enum tree_code opcode
,
4083 vec
<operand_entry
*> *ops
, basic_block first_bb
)
4085 unsigned int length
= ops
->length (), i
, j
, first
;
4087 struct range_entry
*ranges
;
4088 bool any_changes
= false;
4093 ranges
= XNEWVEC (struct range_entry
, length
);
4094 for (i
= 0; i
< length
; i
++)
4098 init_range_entry (ranges
+ i
, oe
->op
,
4101 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
4102 /* For | invert it now, we will invert it again before emitting
4103 the optimized expression. */
4104 if (opcode
== BIT_IOR_EXPR
4105 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
4106 ranges
[i
].in_p
= !ranges
[i
].in_p
;
4109 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
4110 for (i
= 0; i
< length
; i
++)
4111 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
4114 /* Try to merge ranges. */
4115 for (first
= i
; i
< length
; i
++)
4117 tree low
= ranges
[i
].low
;
4118 tree high
= ranges
[i
].high
;
4119 int in_p
= ranges
[i
].in_p
;
4120 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
4121 int update_fail_count
= 0;
4123 for (j
= i
+ 1; j
< length
; j
++)
4125 if (ranges
[i
].exp
!= ranges
[j
].exp
)
4127 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
4128 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
4130 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
4136 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
4137 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
4138 low
, high
, strict_overflow_p
))
4143 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4144 while update_range_test would fail. */
4145 else if (update_fail_count
== 64)
4148 ++update_fail_count
;
4151 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
4154 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
4155 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
4157 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
4158 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
4160 any_changes
|= optimize_range_tests_var_bound (opcode
, first
, length
, ops
,
4162 any_changes
|= optimize_range_tests_cmp_bitwise (opcode
, first
, length
,
4165 if (any_changes
&& opcode
!= ERROR_MARK
)
4168 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4170 if (oe
->op
== error_mark_node
)
4179 XDELETEVEC (ranges
);
4183 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4184 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4185 otherwise the comparison code. TYPE is a return value that is set
4186 to type of comparison. */
4189 ovce_extract_ops (tree var
, gassign
**rets
, bool *reti
, tree
*type
,
4190 tree
*lhs
, tree
*rhs
, gassign
**vcond
)
4192 if (TREE_CODE (var
) != SSA_NAME
)
4195 gassign
*stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (var
));
4201 /* ??? If we start creating more COND_EXPR, we could perform
4202 this same optimization with them. For now, simplify. */
4203 if (gimple_assign_rhs_code (stmt
) != VEC_COND_EXPR
)
4206 tree cond
= gimple_assign_rhs1 (stmt
);
4207 tree_code cmp
= TREE_CODE (cond
);
4208 if (cmp
!= SSA_NAME
)
4211 gassign
*assign
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (cond
));
4213 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign
)) != tcc_comparison
)
4216 cmp
= gimple_assign_rhs_code (assign
);
4218 *lhs
= gimple_assign_rhs1 (assign
);
4220 *rhs
= gimple_assign_rhs2 (assign
);
4222 /* ??? For now, allow only canonical true and false result vectors.
4223 We could expand this to other constants should the need arise,
4224 but at the moment we don't create them. */
4225 tree t
= gimple_assign_rhs2 (stmt
);
4226 tree f
= gimple_assign_rhs3 (stmt
);
4228 if (integer_all_onesp (t
))
4230 else if (integer_all_onesp (f
))
4232 cmp
= invert_tree_comparison (cmp
, false);
4237 if (!integer_zerop (f
))
4246 *type
= TREE_TYPE (cond
);
4250 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4251 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4254 optimize_vec_cond_expr (tree_code opcode
, vec
<operand_entry
*> *ops
)
4256 unsigned int length
= ops
->length (), i
, j
;
4257 bool any_changes
= false;
4262 for (i
= 0; i
< length
; ++i
)
4264 tree elt0
= (*ops
)[i
]->op
;
4266 gassign
*stmt0
, *vcond0
;
4268 tree type
, lhs0
, rhs0
;
4269 tree_code cmp0
= ovce_extract_ops (elt0
, &stmt0
, &invert
, &type
, &lhs0
,
4271 if (cmp0
== ERROR_MARK
)
4274 for (j
= i
+ 1; j
< length
; ++j
)
4276 tree
&elt1
= (*ops
)[j
]->op
;
4278 gassign
*stmt1
, *vcond1
;
4280 tree_code cmp1
= ovce_extract_ops (elt1
, &stmt1
, NULL
, NULL
, &lhs1
,
4282 if (cmp1
== ERROR_MARK
)
4286 if (opcode
== BIT_AND_EXPR
)
4287 comb
= maybe_fold_and_comparisons (type
, cmp0
, lhs0
, rhs0
,
4289 else if (opcode
== BIT_IOR_EXPR
)
4290 comb
= maybe_fold_or_comparisons (type
, cmp0
, lhs0
, rhs0
,
4298 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4300 fprintf (dump_file
, "Transforming ");
4301 print_generic_expr (dump_file
, gimple_assign_lhs (stmt0
));
4302 fprintf (dump_file
, " %c ", opcode
== BIT_AND_EXPR
? '&' : '|');
4303 print_generic_expr (dump_file
, gimple_assign_lhs (stmt1
));
4304 fprintf (dump_file
, " into ");
4305 print_generic_expr (dump_file
, comb
);
4306 fputc ('\n', dump_file
);
4309 gimple_stmt_iterator gsi
= gsi_for_stmt (vcond0
);
4310 tree exp
= force_gimple_operand_gsi (&gsi
, comb
, true, NULL_TREE
,
4311 true, GSI_SAME_STMT
);
4313 swap_ssa_operands (vcond0
, gimple_assign_rhs2_ptr (vcond0
),
4314 gimple_assign_rhs3_ptr (vcond0
));
4315 gimple_assign_set_rhs1 (vcond0
, exp
);
4316 update_stmt (vcond0
);
4318 elt1
= error_mark_node
;
4327 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4329 if (oe
->op
== error_mark_node
)
4341 /* Return true if STMT is a cast like:
4347 # _345 = PHI <_123(N), 1(...), 1(...)>
4348 where _234 has bool type, _123 has single use and
4349 bb N has a single successor M. This is commonly used in
4350 the last block of a range test.
4352 Also Return true if STMT is tcc_compare like:
4358 # _345 = PHI <_234(N), 1(...), 1(...)>
4360 where _234 has booltype, single use and
4361 bb N has a single successor M. This is commonly used in
4362 the last block of a range test. */
4365 final_range_test_p (gimple
*stmt
)
4367 basic_block bb
, rhs_bb
, lhs_bb
;
4370 use_operand_p use_p
;
4373 if (!gimple_assign_cast_p (stmt
)
4374 && (!is_gimple_assign (stmt
)
4375 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4376 != tcc_comparison
)))
4378 bb
= gimple_bb (stmt
);
4379 if (!single_succ_p (bb
))
4381 e
= single_succ_edge (bb
);
4382 if (e
->flags
& EDGE_COMPLEX
)
4385 lhs
= gimple_assign_lhs (stmt
);
4386 rhs
= gimple_assign_rhs1 (stmt
);
4387 if (gimple_assign_cast_p (stmt
)
4388 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4389 || TREE_CODE (rhs
) != SSA_NAME
4390 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
))
4393 if (!gimple_assign_cast_p (stmt
)
4394 && (TREE_CODE (TREE_TYPE (lhs
)) != BOOLEAN_TYPE
))
4397 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4398 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
4401 if (gimple_code (use_stmt
) != GIMPLE_PHI
4402 || gimple_bb (use_stmt
) != e
->dest
)
4405 /* And that the rhs is defined in the same loop. */
4406 if (gimple_assign_cast_p (stmt
))
4408 if (TREE_CODE (rhs
) != SSA_NAME
4409 || !(rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
)))
4410 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
4415 if (TREE_CODE (lhs
) != SSA_NAME
4416 || !(lhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
)))
4417 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), lhs_bb
))
4424 /* Return true if BB is suitable basic block for inter-bb range test
4425 optimization. If BACKWARD is true, BB should be the only predecessor
4426 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4427 or compared with to find a common basic block to which all conditions
4428 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4429 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4430 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4431 block points to an empty block that falls through into *OTHER_BB and
4432 the phi args match that path. */
4435 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
4436 bool *test_swapped_p
, bool backward
)
4438 edge_iterator ei
, ei2
;
4442 bool other_edge_seen
= false;
4447 /* Check last stmt first. */
4448 stmt
= last_nondebug_stmt (bb
);
4450 || (gimple_code (stmt
) != GIMPLE_COND
4451 && (backward
|| !final_range_test_p (stmt
)))
4452 || gimple_visited_p (stmt
)
4453 || stmt_could_throw_p (cfun
, stmt
)
4456 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
4459 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4460 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4461 to *OTHER_BB (if not set yet, try to find it out). */
4462 if (EDGE_COUNT (bb
->succs
) != 2)
4464 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
4466 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4468 if (e
->dest
== test_bb
)
4477 if (*other_bb
== NULL
)
4479 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
4480 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4482 else if (e
->dest
== e2
->dest
)
4483 *other_bb
= e
->dest
;
4484 if (*other_bb
== NULL
)
4487 if (e
->dest
== *other_bb
)
4488 other_edge_seen
= true;
4492 if (*other_bb
== NULL
|| !other_edge_seen
)
4495 else if (single_succ (bb
) != *other_bb
)
4498 /* Now check all PHIs of *OTHER_BB. */
4499 e
= find_edge (bb
, *other_bb
);
4500 e2
= find_edge (test_bb
, *other_bb
);
4502 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4504 gphi
*phi
= gsi
.phi ();
4505 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4506 corresponding to BB and TEST_BB predecessor must be the same. */
4507 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
4508 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
4510 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4511 one of the PHIs should have the lhs of the last stmt in
4512 that block as PHI arg and that PHI should have 0 or 1
4513 corresponding to it in all other range test basic blocks
4517 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
4518 == gimple_assign_lhs (stmt
)
4519 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
4520 || integer_onep (gimple_phi_arg_def (phi
,
4526 gimple
*test_last
= last_nondebug_stmt (test_bb
);
4527 if (gimple_code (test_last
) == GIMPLE_COND
)
4529 if (backward
? e2
->src
!= test_bb
: e
->src
!= bb
)
4532 /* For last_bb, handle also:
4534 goto <bb 6>; [34.00%]
4536 goto <bb 7>; [66.00%]
4538 <bb 6> [local count: 79512730]:
4540 <bb 7> [local count: 1073741824]:
4541 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4542 where bb 7 is *OTHER_BB, but the PHI values from the
4543 earlier bbs match the path through the empty bb
4547 e3
= EDGE_SUCC (test_bb
,
4548 e2
== EDGE_SUCC (test_bb
, 0) ? 1 : 0);
4551 e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4552 if (empty_block_p (e3
->dest
)
4553 && single_succ_p (e3
->dest
)
4554 && single_succ (e3
->dest
) == *other_bb
4555 && single_pred_p (e3
->dest
)
4556 && single_succ_edge (e3
->dest
)->flags
== EDGE_FALLTHRU
)
4559 e2
= single_succ_edge (e3
->dest
);
4561 e
= single_succ_edge (e3
->dest
);
4563 *test_swapped_p
= true;
4567 else if (gimple_phi_arg_def (phi
, e2
->dest_idx
)
4568 == gimple_assign_lhs (test_last
)
4569 && (integer_zerop (gimple_phi_arg_def (phi
,
4571 || integer_onep (gimple_phi_arg_def (phi
,
4582 /* Return true if BB doesn't have side-effects that would disallow
4583 range test optimization, all SSA_NAMEs set in the bb are consumed
4584 in the bb and there are no PHIs. */
4587 no_side_effect_bb (basic_block bb
)
4589 gimple_stmt_iterator gsi
;
4592 if (!gimple_seq_empty_p (phi_nodes (bb
)))
4594 last
= last_nondebug_stmt (bb
);
4595 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4597 gimple
*stmt
= gsi_stmt (gsi
);
4599 imm_use_iterator imm_iter
;
4600 use_operand_p use_p
;
4602 if (is_gimple_debug (stmt
))
4604 if (gimple_has_side_effects (stmt
))
4608 if (!is_gimple_assign (stmt
))
4610 lhs
= gimple_assign_lhs (stmt
);
4611 if (TREE_CODE (lhs
) != SSA_NAME
)
4613 if (gimple_assign_rhs_could_trap_p (stmt
))
4615 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
4617 gimple
*use_stmt
= USE_STMT (use_p
);
4618 if (is_gimple_debug (use_stmt
))
4620 if (gimple_bb (use_stmt
) != bb
)
4627 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4628 return true and fill in *OPS recursively. */
4631 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
4634 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4638 if (!is_reassociable_op (stmt
, code
, loop
))
4641 rhs
[0] = gimple_assign_rhs1 (stmt
);
4642 rhs
[1] = gimple_assign_rhs2 (stmt
);
4643 gimple_set_visited (stmt
, true);
4644 for (i
= 0; i
< 2; i
++)
4645 if (TREE_CODE (rhs
[i
]) == SSA_NAME
4646 && !get_ops (rhs
[i
], code
, ops
, loop
)
4647 && has_single_use (rhs
[i
]))
4649 operand_entry
*oe
= operand_entry_pool
.allocate ();
4655 oe
->stmt_to_insert
= NULL
;
4656 ops
->safe_push (oe
);
4661 /* Find the ops that were added by get_ops starting from VAR, see if
4662 they were changed during update_range_test and if yes, create new
4666 update_ops (tree var
, enum tree_code code
, const vec
<operand_entry
*> &ops
,
4667 unsigned int *pidx
, class loop
*loop
)
4669 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4673 if (!is_reassociable_op (stmt
, code
, loop
))
4676 rhs
[0] = gimple_assign_rhs1 (stmt
);
4677 rhs
[1] = gimple_assign_rhs2 (stmt
);
4680 for (i
= 0; i
< 2; i
++)
4681 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
4683 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
4684 if (rhs
[2 + i
] == NULL_TREE
)
4686 if (has_single_use (rhs
[i
]))
4687 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
4689 rhs
[2 + i
] = rhs
[i
];
4692 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
4693 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
4695 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4696 var
= make_ssa_name (TREE_TYPE (var
));
4697 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
4699 gimple_set_uid (g
, gimple_uid (stmt
));
4700 gimple_set_visited (g
, true);
4701 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4702 gimple_stmt_iterator gsi2
= gsi_for_stmt (g
);
4703 if (fold_stmt_inplace (&gsi2
))
4709 /* Structure to track the initial value passed to get_ops and
4710 the range in the ops vector for each basic block. */
4712 struct inter_bb_range_test_entry
4715 unsigned int first_idx
, last_idx
;
4718 /* Inter-bb range test optimization.
4720 Returns TRUE if a gimple conditional is optimized to a true/false,
4721 otherwise return FALSE.
4723 This indicates to the caller that it should run a CFG cleanup pass
4724 once reassociation is completed. */
4727 maybe_optimize_range_tests (gimple
*stmt
)
4729 basic_block first_bb
= gimple_bb (stmt
);
4730 basic_block last_bb
= first_bb
;
4731 basic_block other_bb
= NULL
;
4735 auto_vec
<operand_entry
*> ops
;
4736 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
4737 bool any_changes
= false;
4738 bool cfg_cleanup_needed
= false;
4740 /* Consider only basic blocks that end with GIMPLE_COND or
4741 a cast statement satisfying final_range_test_p. All
4742 but the last bb in the first_bb .. last_bb range
4743 should end with GIMPLE_COND. */
4744 if (gimple_code (stmt
) == GIMPLE_COND
)
4746 if (EDGE_COUNT (first_bb
->succs
) != 2)
4747 return cfg_cleanup_needed
;
4749 else if (final_range_test_p (stmt
))
4750 other_bb
= single_succ (first_bb
);
4752 return cfg_cleanup_needed
;
4754 if (stmt_could_throw_p (cfun
, stmt
))
4755 return cfg_cleanup_needed
;
4757 /* As relative ordering of post-dominator sons isn't fixed,
4758 maybe_optimize_range_tests can be called first on any
4759 bb in the range we want to optimize. So, start searching
4760 backwards, if first_bb can be set to a predecessor. */
4761 while (single_pred_p (first_bb
))
4763 basic_block pred_bb
= single_pred (first_bb
);
4764 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, NULL
, true))
4766 if (!no_side_effect_bb (first_bb
))
4770 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4771 Before starting forward search in last_bb successors, find
4772 out the other_bb. */
4773 if (first_bb
== last_bb
)
4776 /* As non-GIMPLE_COND last stmt always terminates the range,
4777 if forward search didn't discover anything, just give up. */
4778 if (gimple_code (stmt
) != GIMPLE_COND
)
4779 return cfg_cleanup_needed
;
4780 /* Look at both successors. Either it ends with a GIMPLE_COND
4781 and satisfies suitable_cond_bb, or ends with a cast and
4782 other_bb is that cast's successor. */
4783 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
4784 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
4785 || e
->dest
== first_bb
)
4786 return cfg_cleanup_needed
;
4787 else if (single_pred_p (e
->dest
))
4789 stmt
= last_nondebug_stmt (e
->dest
);
4791 && gimple_code (stmt
) == GIMPLE_COND
4792 && EDGE_COUNT (e
->dest
->succs
) == 2)
4794 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
,
4801 && final_range_test_p (stmt
)
4802 && find_edge (first_bb
, single_succ (e
->dest
)))
4804 other_bb
= single_succ (e
->dest
);
4805 if (other_bb
== first_bb
)
4809 if (other_bb
== NULL
)
4810 return cfg_cleanup_needed
;
4812 /* Now do the forward search, moving last_bb to successor bbs
4813 that aren't other_bb. */
4814 while (EDGE_COUNT (last_bb
->succs
) == 2)
4816 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
4817 if (e
->dest
!= other_bb
)
4821 if (!single_pred_p (e
->dest
))
4823 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, NULL
, false))
4825 if (!no_side_effect_bb (e
->dest
))
4829 if (first_bb
== last_bb
)
4830 return cfg_cleanup_needed
;
4831 /* Here basic blocks first_bb through last_bb's predecessor
4832 end with GIMPLE_COND, all of them have one of the edges to
4833 other_bb and another to another block in the range,
4834 all blocks except first_bb don't have side-effects and
4835 last_bb ends with either GIMPLE_COND, or cast satisfying
4836 final_range_test_p. */
4837 for (bb
= last_bb
; ; bb
= single_pred (bb
))
4839 enum tree_code code
;
4841 inter_bb_range_test_entry bb_ent
;
4843 bb_ent
.op
= NULL_TREE
;
4844 bb_ent
.first_idx
= ops
.length ();
4845 bb_ent
.last_idx
= bb_ent
.first_idx
;
4846 e
= find_edge (bb
, other_bb
);
4847 stmt
= last_nondebug_stmt (bb
);
4848 gimple_set_visited (stmt
, true);
4849 if (gimple_code (stmt
) != GIMPLE_COND
)
4851 use_operand_p use_p
;
4856 lhs
= gimple_assign_lhs (stmt
);
4857 rhs
= gimple_assign_rhs1 (stmt
);
4858 gcc_assert (bb
== last_bb
);
4867 # _345 = PHI <_123(N), 1(...), 1(...)>
4869 or 0 instead of 1. If it is 0, the _234
4870 range test is anded together with all the
4871 other range tests, if it is 1, it is ored with
4873 single_imm_use (lhs
, &use_p
, &phi
);
4874 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
4875 e2
= find_edge (first_bb
, other_bb
);
4877 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
4878 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
4879 code
= BIT_AND_EXPR
;
4882 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
4883 code
= BIT_IOR_EXPR
;
4886 /* If _234 SSA_NAME_DEF_STMT is
4888 (or &, corresponding to 1/0 in the phi arguments,
4889 push into ops the individual range test arguments
4890 of the bitwise or resp. and, recursively. */
4891 if (TREE_CODE (rhs
) == SSA_NAME
4892 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4894 && !get_ops (rhs
, code
, &ops
,
4895 loop_containing_stmt (stmt
))
4896 && has_single_use (rhs
))
4898 /* Otherwise, push the _234 range test itself. */
4899 operand_entry
*oe
= operand_entry_pool
.allocate ();
4905 oe
->stmt_to_insert
= NULL
;
4910 else if (is_gimple_assign (stmt
)
4911 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4913 && !get_ops (lhs
, code
, &ops
,
4914 loop_containing_stmt (stmt
))
4915 && has_single_use (lhs
))
4917 operand_entry
*oe
= operand_entry_pool
.allocate ();
4928 bb_ent
.last_idx
= ops
.length ();
4931 bbinfo
.safe_push (bb_ent
);
4932 for (unsigned int i
= bb_ent
.first_idx
; i
< bb_ent
.last_idx
; ++i
)
4933 ops
[i
]->id
= bb
->index
;
4936 else if (bb
== last_bb
)
4938 /* For last_bb, handle also:
4940 goto <bb 6>; [34.00%]
4942 goto <bb 7>; [66.00%]
4944 <bb 6> [local count: 79512730]:
4946 <bb 7> [local count: 1073741824]:
4947 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4948 where bb 7 is OTHER_BB, but the PHI values from the
4949 earlier bbs match the path through the empty bb
4951 bool test_swapped_p
= false;
4952 bool ok
= suitable_cond_bb (single_pred (last_bb
), last_bb
,
4953 &other_bb
, &test_swapped_p
, true);
4956 e
= EDGE_SUCC (bb
, e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4958 /* Otherwise stmt is GIMPLE_COND. */
4959 code
= gimple_cond_code (stmt
);
4960 lhs
= gimple_cond_lhs (stmt
);
4961 rhs
= gimple_cond_rhs (stmt
);
4962 if (TREE_CODE (lhs
) == SSA_NAME
4963 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4964 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4965 || rhs
!= boolean_false_node
4966 /* Either push into ops the individual bitwise
4967 or resp. and operands, depending on which
4968 edge is other_bb. */
4969 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
4970 ^ (code
== EQ_EXPR
))
4971 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
4972 loop_containing_stmt (stmt
))))
4974 /* Or push the GIMPLE_COND stmt itself. */
4975 operand_entry
*oe
= operand_entry_pool
.allocate ();
4978 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
4979 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
4980 /* oe->op = NULL signs that there is no SSA_NAME
4981 for the range test, and oe->id instead is the
4982 basic block number, at which's end the GIMPLE_COND
4986 oe
->stmt_to_insert
= NULL
;
4991 else if (ops
.length () > bb_ent
.first_idx
)
4994 bb_ent
.last_idx
= ops
.length ();
4996 bbinfo
.safe_push (bb_ent
);
4997 for (unsigned int i
= bb_ent
.first_idx
; i
< bb_ent
.last_idx
; ++i
)
4998 ops
[i
]->id
= bb
->index
;
5002 if (ops
.length () > 1)
5003 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
, first_bb
);
5006 unsigned int idx
, max_idx
= 0;
5007 /* update_ops relies on has_single_use predicates returning the
5008 same values as it did during get_ops earlier. Additionally it
5009 never removes statements, only adds new ones and it should walk
5010 from the single imm use and check the predicate already before
5011 making those changes.
5012 On the other side, the handling of GIMPLE_COND directly can turn
5013 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5014 it needs to be done in a separate loop afterwards. */
5015 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
5017 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
5018 && bbinfo
[idx
].op
!= NULL_TREE
)
5023 stmt
= last_nondebug_stmt (bb
);
5024 new_op
= update_ops (bbinfo
[idx
].op
,
5026 ops
[bbinfo
[idx
].first_idx
]->rank
,
5027 ops
, &bbinfo
[idx
].first_idx
,
5028 loop_containing_stmt (stmt
));
5029 if (new_op
== NULL_TREE
)
5031 gcc_assert (bb
== last_bb
);
5032 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
5034 if (bbinfo
[idx
].op
!= new_op
)
5036 imm_use_iterator iter
;
5037 use_operand_p use_p
;
5038 gimple
*use_stmt
, *cast_or_tcc_cmp_stmt
= NULL
;
5040 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
5041 if (is_gimple_debug (use_stmt
))
5043 else if (gimple_code (use_stmt
) == GIMPLE_COND
5044 || gimple_code (use_stmt
) == GIMPLE_PHI
)
5045 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
5046 SET_USE (use_p
, new_op
);
5047 else if ((is_gimple_assign (use_stmt
)
5049 (gimple_assign_rhs_code (use_stmt
))
5050 == tcc_comparison
)))
5051 cast_or_tcc_cmp_stmt
= use_stmt
;
5052 else if (gimple_assign_cast_p (use_stmt
))
5053 cast_or_tcc_cmp_stmt
= use_stmt
;
5057 if (cast_or_tcc_cmp_stmt
)
5059 gcc_assert (bb
== last_bb
);
5060 tree lhs
= gimple_assign_lhs (cast_or_tcc_cmp_stmt
);
5061 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
5062 enum tree_code rhs_code
5063 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt
)
5064 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt
)
5067 if (is_gimple_min_invariant (new_op
))
5069 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
5070 g
= gimple_build_assign (new_lhs
, new_op
);
5073 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
5074 gimple_stmt_iterator gsi
5075 = gsi_for_stmt (cast_or_tcc_cmp_stmt
);
5076 gimple_set_uid (g
, gimple_uid (cast_or_tcc_cmp_stmt
));
5077 gimple_set_visited (g
, true);
5078 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5079 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
5080 if (is_gimple_debug (use_stmt
))
5082 else if (gimple_code (use_stmt
) == GIMPLE_COND
5083 || gimple_code (use_stmt
) == GIMPLE_PHI
)
5084 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
5085 SET_USE (use_p
, new_lhs
);
5094 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
5096 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
5097 && bbinfo
[idx
].op
== NULL_TREE
5098 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
5100 gcond
*cond_stmt
= as_a
<gcond
*> (*gsi_last_bb (bb
));
5105 /* If we collapse the conditional to a true/false
5106 condition, then bubble that knowledge up to our caller. */
5107 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
5109 gimple_cond_make_false (cond_stmt
);
5110 cfg_cleanup_needed
= true;
5112 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
5114 gimple_cond_make_true (cond_stmt
);
5115 cfg_cleanup_needed
= true;
5119 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
5120 gimple_cond_set_lhs (cond_stmt
,
5121 ops
[bbinfo
[idx
].first_idx
]->op
);
5122 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
5124 update_stmt (cond_stmt
);
5130 /* The above changes could result in basic blocks after the first
5131 modified one, up to and including last_bb, to be executed even if
5132 they would not be in the original program. If the value ranges of
5133 assignment lhs' in those bbs were dependent on the conditions
5134 guarding those basic blocks which now can change, the VRs might
5135 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5136 are only used within the same bb, it should be not a big deal if
5137 we just reset all the VRs in those bbs. See PR68671. */
5138 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
5139 reset_flow_sensitive_info_in_bb (bb
);
5141 return cfg_cleanup_needed
;
5144 /* Remove def stmt of VAR if VAR has zero uses and recurse
5145 on rhs1 operand if so. */
5148 remove_visited_stmt_chain (tree var
)
5151 gimple_stmt_iterator gsi
;
5155 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
5157 stmt
= SSA_NAME_DEF_STMT (var
);
5158 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
5160 var
= gimple_assign_rhs1 (stmt
);
5161 gsi
= gsi_for_stmt (stmt
);
5162 reassoc_remove_stmt (&gsi
);
5163 release_defs (stmt
);
5170 /* This function checks three consequtive operands in
5171 passed operands vector OPS starting from OPINDEX and
5172 swaps two operands if it is profitable for binary operation
5173 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5175 We pair ops with the same rank if possible. */
5178 swap_ops_for_binary_stmt (const vec
<operand_entry
*> &ops
,
5179 unsigned int opindex
)
5181 operand_entry
*oe1
, *oe2
, *oe3
;
5184 oe2
= ops
[opindex
+ 1];
5185 oe3
= ops
[opindex
+ 2];
5187 if (oe1
->rank
== oe2
->rank
&& oe2
->rank
!= oe3
->rank
)
5188 std::swap (*oe1
, *oe3
);
5189 else if (oe1
->rank
== oe3
->rank
&& oe2
->rank
!= oe3
->rank
)
5190 std::swap (*oe1
, *oe2
);
5193 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5194 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5195 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5196 after the returned stmt. */
5198 static inline gimple
*
5199 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
, bool &insert_before
)
5201 insert_before
= true;
5202 if (TREE_CODE (rhs1
) == SSA_NAME
5203 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
5205 stmt
= SSA_NAME_DEF_STMT (rhs1
);
5206 insert_before
= false;
5208 if (TREE_CODE (rhs2
) == SSA_NAME
5209 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
5211 stmt
= SSA_NAME_DEF_STMT (rhs2
);
5212 insert_before
= false;
5217 /* If the stmt that defines operand has to be inserted, insert it
5220 insert_stmt_before_use (gimple
*stmt
, gimple
*stmt_to_insert
)
5222 gcc_assert (is_gimple_assign (stmt_to_insert
));
5223 tree rhs1
= gimple_assign_rhs1 (stmt_to_insert
);
5224 tree rhs2
= gimple_assign_rhs2 (stmt_to_insert
);
5226 gimple
*insert_point
= find_insert_point (stmt
, rhs1
, rhs2
, insert_before
);
5227 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_point
);
5228 gimple_set_uid (stmt_to_insert
, gimple_uid (insert_point
));
5230 /* If the insert point is not stmt, then insert_point would be
5231 the point where operand rhs1 or rhs2 is defined. In this case,
5232 stmt_to_insert has to be inserted afterwards. This would
5233 only happen when the stmt insertion point is flexible. */
5235 gsi_insert_before (&gsi
, stmt_to_insert
, GSI_NEW_STMT
);
5237 insert_stmt_after (stmt_to_insert
, insert_point
);
5241 /* Recursively rewrite our linearized statements so that the operators
5242 match those in OPS[OPINDEX], putting the computation in rank
5243 order. Return new lhs.
5244 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5245 the current stmt and during recursive invocations.
5246 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5247 recursive invocations. */
5250 rewrite_expr_tree (gimple
*stmt
, enum tree_code rhs_code
, unsigned int opindex
,
5251 const vec
<operand_entry
*> &ops
, bool changed
,
5254 tree rhs1
= gimple_assign_rhs1 (stmt
);
5255 tree rhs2
= gimple_assign_rhs2 (stmt
);
5256 tree lhs
= gimple_assign_lhs (stmt
);
5259 /* The final recursion case for this function is that you have
5260 exactly two operations left.
5261 If we had exactly one op in the entire list to start with, we
5262 would have never called this function, and the tail recursion
5263 rewrites them one at a time. */
5264 if (opindex
+ 2 == ops
.length ())
5266 operand_entry
*oe1
, *oe2
;
5269 oe2
= ops
[opindex
+ 1];
5271 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
5273 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5274 unsigned int uid
= gimple_uid (stmt
);
5276 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5278 fprintf (dump_file
, "Transforming ");
5279 print_gimple_stmt (dump_file
, stmt
, 0);
5282 /* If the stmt that defines operand has to be inserted, insert it
5284 if (oe1
->stmt_to_insert
)
5285 insert_stmt_before_use (stmt
, oe1
->stmt_to_insert
);
5286 if (oe2
->stmt_to_insert
)
5287 insert_stmt_before_use (stmt
, oe2
->stmt_to_insert
);
5288 /* Even when changed is false, reassociation could have e.g. removed
5289 some redundant operations, so unless we are just swapping the
5290 arguments or unless there is no change at all (then we just
5291 return lhs), force creation of a new SSA_NAME. */
5292 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
5295 gimple
*insert_point
5296 = find_insert_point (stmt
, oe1
->op
, oe2
->op
, insert_before
);
5297 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5299 = gimple_build_assign (lhs
, rhs_code
,
5301 gimple_set_uid (stmt
, uid
);
5302 gimple_set_visited (stmt
, true);
5304 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5306 insert_stmt_after (stmt
, insert_point
);
5311 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
,
5314 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
5315 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
5319 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
5320 remove_visited_stmt_chain (rhs1
);
5322 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5324 fprintf (dump_file
, " into ");
5325 print_gimple_stmt (dump_file
, stmt
, 0);
5331 /* If we hit here, we should have 3 or more ops left. */
5332 gcc_assert (opindex
+ 2 < ops
.length ());
5334 /* Rewrite the next operator. */
5337 /* If the stmt that defines operand has to be inserted, insert it
5339 if (oe
->stmt_to_insert
)
5340 insert_stmt_before_use (stmt
, oe
->stmt_to_insert
);
5342 /* Recurse on the LHS of the binary operator, which is guaranteed to
5343 be the non-leaf side. */
5345 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), rhs_code
, opindex
+ 1, ops
,
5346 changed
|| oe
->op
!= rhs2
|| next_changed
,
5349 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
5351 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5353 fprintf (dump_file
, "Transforming ");
5354 print_gimple_stmt (dump_file
, stmt
, 0);
5357 /* If changed is false, this is either opindex == 0
5358 or all outer rhs2's were equal to corresponding oe->op,
5359 and powi_result is NULL.
5360 That means lhs is equivalent before and after reassociation.
5361 Otherwise ensure the old lhs SSA_NAME is not reused and
5362 create a new stmt as well, so that any debug stmts will be
5363 properly adjusted. */
5366 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5367 unsigned int uid
= gimple_uid (stmt
);
5369 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
,
5372 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5373 stmt
= gimple_build_assign (lhs
, rhs_code
,
5375 gimple_set_uid (stmt
, uid
);
5376 gimple_set_visited (stmt
, true);
5378 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5380 insert_stmt_after (stmt
, insert_point
);
5385 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
,
5388 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
5389 gimple_assign_set_rhs2 (stmt
, oe
->op
);
5393 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5395 fprintf (dump_file
, " into ");
5396 print_gimple_stmt (dump_file
, stmt
, 0);
5402 /* Find out how many cycles we need to compute statements chain.
5403 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5404 maximum number of independent statements we may execute per cycle. */
5407 get_required_cycles (int ops_num
, int cpu_width
)
5413 /* While we have more than 2 * cpu_width operands
5414 we may reduce number of operands by cpu_width
5416 res
= ops_num
/ (2 * cpu_width
);
5418 /* Remained operands count may be reduced twice per cycle
5419 until we have only one operand. */
5420 rest
= (unsigned)(ops_num
- res
* cpu_width
);
5421 elog
= exact_log2 (rest
);
5425 res
+= floor_log2 (rest
) + 1;
5430 /* Returns an optimal number of registers to use for computation of
5431 given statements. */
5434 get_reassociation_width (int ops_num
, enum tree_code opc
,
5437 int param_width
= param_tree_reassoc_width
;
5442 if (param_width
> 0)
5443 width
= param_width
;
5445 width
= targetm
.sched
.reassociation_width (opc
, mode
);
5450 /* Get the minimal time required for sequence computation. */
5451 cycles_best
= get_required_cycles (ops_num
, width
);
5453 /* Check if we may use less width and still compute sequence for
5454 the same time. It will allow us to reduce registers usage.
5455 get_required_cycles is monotonically increasing with lower width
5456 so we can perform a binary search for the minimal width that still
5457 results in the optimal cycle count. */
5459 while (width
> width_min
)
5461 int width_mid
= (width
+ width_min
) / 2;
5463 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
5465 else if (width_min
< width_mid
)
5466 width_min
= width_mid
;
5474 #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops. */
5475 #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops. */
5476 #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops. */
5478 /* Rewrite statements with dependency chain with regard the chance to generate
5480 For the chain with FMA: Try to keep fma opportunity as much as possible.
5481 For the chain without FMA: Putting the computation in rank order and trying
5482 to allow operations to be executed in parallel.
5484 e + f + a * b + c * d;
5490 This reassociation approach preserves the chance of fma generation as much
5493 Another thing is to avoid adding loop-carried ops to long chains, otherwise
5494 the whole chain will have dependencies across the loop iteration. Just keep
5495 loop-carried ops in a separate chain.
5497 x_1 = phi (x_0, x_2)
5498 y_1 = phi (y_0, y_2)
5500 a + b + c + d + e + x1 + y1
5510 rewrite_expr_tree_parallel (gassign
*stmt
, int width
, bool has_fma
,
5511 const vec
<operand_entry
*> &ops
)
5513 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
5514 int op_num
= ops
.length ();
5515 int op_normal_num
= op_num
;
5516 gcc_assert (op_num
> 0);
5517 int stmt_num
= op_num
- 1;
5518 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
5520 tree tmp_op
[2], op1
;
5522 gimple
*stmt1
= NULL
;
5523 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
5524 int last_rhs1_stmt_index
= 0, last_rhs2_stmt_index
= 0;
5525 int width_active
= 0, width_count
= 0;
5526 bool has_biased
= false, ops_changed
= false;
5527 auto_vec
<operand_entry
*> ops_normal
;
5528 auto_vec
<operand_entry
*> ops_biased
;
5529 vec
<operand_entry
*> *ops1
;
5531 /* We start expression rewriting from the top statements.
5532 So, in this loop we create a full list of statements
5533 we will work with. */
5534 stmts
[stmt_num
- 1] = stmt
;
5535 for (i
= stmt_num
- 2; i
>= 0; i
--)
5536 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
5538 /* Avoid adding loop-carried ops to long chains, first filter out the
5539 loop-carried. But we need to make sure that the length of the remainder
5540 is not less than 4, which is the smallest ops length we can break the
5542 FOR_EACH_VEC_ELT (ops
, i
, oe
)
5544 if (TREE_CODE (oe
->op
) == SSA_NAME
5545 && bitmap_bit_p (biased_names
, SSA_NAME_VERSION (oe
->op
))
5546 && op_normal_num
> 4)
5548 ops_biased
.safe_push (oe
);
5553 ops_normal
.safe_push (oe
);
5556 /* Width should not be larger than ops length / 2, since we can not create
5557 more parallel dependency chains that exceeds such value. */
5558 int width_normal
= op_normal_num
/ 2;
5559 int width_biased
= (op_num
- op_normal_num
) / 2;
5560 width_normal
= width
<= width_normal
? width
: width_normal
;
5561 width_biased
= width
<= width_biased
? width
: width_biased
;
5564 width_count
= width_active
= width_normal
;
5566 /* Build parallel dependency chain according to width. */
5567 for (i
= 0; i
< stmt_num
; i
++)
5569 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5571 fprintf (dump_file
, "Transforming ");
5572 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5575 /* When the work of normal ops is over, but the loop is not over,
5576 continue to do biased ops. */
5577 if (width_count
== 0 && ops1
== &ops_normal
)
5580 width_count
= width_active
= width_biased
;
5584 /* Swap the operands if no FMA in the chain. */
5585 if (ops1
->length () > 2 && !has_fma
)
5586 swap_ops_for_binary_stmt (*ops1
, ops1
->length () - 3);
5588 if (i
< width_active
5589 || (ops_changed
&& i
<= (last_rhs1_stmt_index
+ width_active
)))
5591 for (j
= 0; j
< 2; j
++)
5595 /* If the stmt that defines operand has to be inserted, insert it
5597 stmt1
= oe
->stmt_to_insert
;
5599 insert_stmt_before_use (stmts
[i
], stmt1
);
5602 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
),
5606 gimple_set_visited (stmts
[i
], true);
5611 /* We keep original statement only for the last one. All others are
5613 if (!ops1
->length ())
5615 /* For biased length equal to 2. */
5616 if (width_count
== BIASED_END_STMT
&& !last_rhs2_stmt_index
)
5617 last_rhs2_stmt_index
= i
- 1;
5619 /* When width_count == 2 and there is no biased, just finish. */
5620 if (width_count
== NORMAL_END_STMT
&& !has_biased
)
5622 last_rhs1_stmt_index
= i
- 1;
5623 last_rhs2_stmt_index
= i
- 2;
5625 if (last_rhs1_stmt_index
&& (last_rhs2_stmt_index
|| !has_biased
))
5627 /* We keep original statement only for the last one. All
5628 others are recreated. */
5629 gimple_assign_set_rhs1 (stmts
[i
], gimple_assign_lhs
5630 (stmts
[last_rhs1_stmt_index
]));
5631 gimple_assign_set_rhs2 (stmts
[i
], gimple_assign_lhs
5632 (stmts
[last_rhs2_stmt_index
]));
5633 update_stmt (stmts
[i
]);
5638 build_and_add_sum (TREE_TYPE (last_rhs1
),
5639 gimple_assign_lhs (stmts
[i
-width_count
]),
5641 (stmts
[i
-width_count
+1]),
5643 gimple_set_visited (stmts
[i
], true);
5646 /* It is the end of normal or biased ops.
5647 last_rhs1_stmt_index used to record the last stmt index
5648 for normal ops. last_rhs2_stmt_index used to record the
5649 last stmt index for biased ops. */
5650 if (width_count
== BIASED_END_STMT
)
5652 gcc_assert (has_biased
);
5653 if (ops_biased
.length ())
5654 last_rhs1_stmt_index
= i
;
5656 last_rhs2_stmt_index
= i
;
5663 /* Attach the rest ops to the parallel dependency chain. */
5666 stmt1
= oe
->stmt_to_insert
;
5668 insert_stmt_before_use (stmts
[i
], stmt1
);
5671 /* For only one biased ops. */
5672 if (width_count
== SPECIAL_BIASED_END_STMT
)
5674 /* We keep original statement only for the last one. All
5675 others are recreated. */
5676 gcc_assert (has_biased
);
5677 gimple_assign_set_rhs1 (stmts
[i
], gimple_assign_lhs
5678 (stmts
[last_rhs1_stmt_index
]));
5679 gimple_assign_set_rhs2 (stmts
[i
], op1
);
5680 update_stmt (stmts
[i
]);
5684 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
),
5686 (stmts
[i
-width_active
]),
5689 gimple_set_visited (stmts
[i
], true);
5694 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5696 fprintf (dump_file
, " into ");
5697 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5701 remove_visited_stmt_chain (last_rhs1
);
5704 /* Transform STMT, which is really (A +B) + (C + D) into the left
5705 linear form, ((A+B)+C)+D.
5706 Recurse on D if necessary. */
5709 linearize_expr (gimple
*stmt
)
5711 gimple_stmt_iterator gsi
;
5712 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
5713 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5714 gimple
*oldbinrhs
= binrhs
;
5715 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5716 gimple
*newbinrhs
= NULL
;
5717 class loop
*loop
= loop_containing_stmt (stmt
);
5718 tree lhs
= gimple_assign_lhs (stmt
);
5720 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
5721 && is_reassociable_op (binrhs
, rhscode
, loop
));
5723 gsi
= gsi_for_stmt (stmt
);
5725 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
5726 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
5727 gimple_assign_rhs_code (binrhs
),
5728 gimple_assign_lhs (binlhs
),
5729 gimple_assign_rhs2 (binrhs
));
5730 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
5731 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
5732 gimple_set_uid (binrhs
, gimple_uid (stmt
));
5734 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
5735 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5737 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5739 fprintf (dump_file
, "Linearized: ");
5740 print_gimple_stmt (dump_file
, stmt
, 0);
5743 reassociate_stats
.linearized
++;
5746 gsi
= gsi_for_stmt (oldbinrhs
);
5747 reassoc_remove_stmt (&gsi
);
5748 release_defs (oldbinrhs
);
5750 gimple_set_visited (stmt
, true);
5751 gimple_set_visited (binlhs
, true);
5752 gimple_set_visited (binrhs
, true);
5754 /* Tail recurse on the new rhs if it still needs reassociation. */
5755 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
5756 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5757 want to change the algorithm while converting to tuples. */
5758 linearize_expr (stmt
);
5761 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5762 it. Otherwise, return NULL. */
5765 get_single_immediate_use (tree lhs
)
5767 use_operand_p immuse
;
5770 if (TREE_CODE (lhs
) == SSA_NAME
5771 && single_imm_use (lhs
, &immuse
, &immusestmt
)
5772 && is_gimple_assign (immusestmt
))
5778 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5779 representing the negated value. Insertions of any necessary
5780 instructions go before GSI.
5781 This function is recursive in that, if you hand it "a_5" as the
5782 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5783 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5786 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
5788 gimple
*negatedefstmt
= NULL
;
5789 tree resultofnegate
;
5790 gimple_stmt_iterator gsi
;
5793 /* If we are trying to negate a name, defined by an add, negate the
5794 add operands instead. */
5795 if (TREE_CODE (tonegate
) == SSA_NAME
)
5796 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
5797 if (TREE_CODE (tonegate
) == SSA_NAME
5798 && is_gimple_assign (negatedefstmt
)
5799 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
5800 && has_single_use (gimple_assign_lhs (negatedefstmt
))
5801 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
5803 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
5804 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
5805 tree lhs
= gimple_assign_lhs (negatedefstmt
);
5808 gsi
= gsi_for_stmt (negatedefstmt
);
5809 rhs1
= negate_value (rhs1
, &gsi
);
5811 gsi
= gsi_for_stmt (negatedefstmt
);
5812 rhs2
= negate_value (rhs2
, &gsi
);
5814 gsi
= gsi_for_stmt (negatedefstmt
);
5815 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5816 gimple_set_visited (negatedefstmt
, true);
5817 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
5818 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
5819 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5823 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
5824 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
5825 NULL_TREE
, true, GSI_SAME_STMT
);
5827 uid
= gimple_uid (gsi_stmt (gsi
));
5828 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
5830 gimple
*stmt
= gsi_stmt (gsi
);
5831 if (gimple_uid (stmt
) != 0)
5833 gimple_set_uid (stmt
, uid
);
5835 return resultofnegate
;
5838 /* Return true if we should break up the subtract in STMT into an add
5839 with negate. This is true when we the subtract operands are really
5840 adds, or the subtract itself is used in an add expression. In
5841 either case, breaking up the subtract into an add with negate
5842 exposes the adds to reassociation. */
5845 should_break_up_subtract (gimple
*stmt
)
5847 tree lhs
= gimple_assign_lhs (stmt
);
5848 tree binlhs
= gimple_assign_rhs1 (stmt
);
5849 tree binrhs
= gimple_assign_rhs2 (stmt
);
5851 class loop
*loop
= loop_containing_stmt (stmt
);
5853 if (TREE_CODE (binlhs
) == SSA_NAME
5854 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
5857 if (TREE_CODE (binrhs
) == SSA_NAME
5858 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
5861 if (TREE_CODE (lhs
) == SSA_NAME
5862 && (immusestmt
= get_single_immediate_use (lhs
))
5863 && is_gimple_assign (immusestmt
)
5864 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
5865 || (gimple_assign_rhs_code (immusestmt
) == MINUS_EXPR
5866 && gimple_assign_rhs1 (immusestmt
) == lhs
)
5867 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
5872 /* Transform STMT from A - B into A + -B. */
5875 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
5877 tree rhs1
= gimple_assign_rhs1 (stmt
);
5878 tree rhs2
= gimple_assign_rhs2 (stmt
);
5880 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5882 fprintf (dump_file
, "Breaking up subtract ");
5883 print_gimple_stmt (dump_file
, stmt
, 0);
5886 rhs2
= negate_value (rhs2
, gsip
);
5887 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
5891 /* Determine whether STMT is a builtin call that raises an SSA name
5892 to an integer power and has only one use. If so, and this is early
5893 reassociation and unsafe math optimizations are permitted, place
5894 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5895 If any of these conditions does not hold, return FALSE. */
5898 acceptable_pow_call (gcall
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
5901 REAL_VALUE_TYPE c
, cint
;
5903 switch (gimple_call_combined_fn (stmt
))
5906 if (flag_errno_math
)
5909 *base
= gimple_call_arg (stmt
, 0);
5910 arg1
= gimple_call_arg (stmt
, 1);
5912 if (TREE_CODE (arg1
) != REAL_CST
)
5915 c
= TREE_REAL_CST (arg1
);
5917 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
5920 *exponent
= real_to_integer (&c
);
5921 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
5922 if (!real_identical (&c
, &cint
))
5928 *base
= gimple_call_arg (stmt
, 0);
5929 arg1
= gimple_call_arg (stmt
, 1);
5931 if (!tree_fits_shwi_p (arg1
))
5934 *exponent
= tree_to_shwi (arg1
);
5941 /* Expanding negative exponents is generally unproductive, so we don't
5942 complicate matters with those. Exponents of zero and one should
5943 have been handled by expression folding. */
5944 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
5950 /* Try to derive and add operand entry for OP to *OPS. Return false if
5954 try_special_add_to_ops (vec
<operand_entry
*> *ops
,
5955 enum tree_code code
,
5956 tree op
, gimple
* def_stmt
)
5958 tree base
= NULL_TREE
;
5959 HOST_WIDE_INT exponent
= 0;
5961 if (TREE_CODE (op
) != SSA_NAME
5962 || ! has_single_use (op
))
5965 if (code
== MULT_EXPR
5966 && reassoc_insert_powi_p
5967 && flag_unsafe_math_optimizations
5968 && is_gimple_call (def_stmt
)
5969 && acceptable_pow_call (as_a
<gcall
*> (def_stmt
), &base
, &exponent
))
5971 add_repeat_to_ops_vec (ops
, base
, exponent
);
5972 gimple_set_visited (def_stmt
, true);
5975 else if (code
== MULT_EXPR
5976 && is_gimple_assign (def_stmt
)
5977 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
5978 && !HONOR_SNANS (TREE_TYPE (op
))
5979 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op
))
5980 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op
)))
5981 && (!FLOAT_TYPE_P (TREE_TYPE (op
))
5982 || !DECIMAL_FLOAT_MODE_P (element_mode (op
))))
5984 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5985 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
5986 add_to_ops_vec (ops
, rhs1
);
5987 add_to_ops_vec (ops
, cst
);
5988 gimple_set_visited (def_stmt
, true);
5995 /* Recursively linearize a binary expression that is the RHS of STMT.
5996 Place the operands of the expression tree in the vector named OPS. */
5999 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
6000 bool is_associative
, bool set_visited
)
6002 tree binlhs
= gimple_assign_rhs1 (stmt
);
6003 tree binrhs
= gimple_assign_rhs2 (stmt
);
6004 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
6005 bool binlhsisreassoc
= false;
6006 bool binrhsisreassoc
= false;
6007 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
6008 class loop
*loop
= loop_containing_stmt (stmt
);
6011 gimple_set_visited (stmt
, true);
6013 if (TREE_CODE (binlhs
) == SSA_NAME
)
6015 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
6016 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
6017 && !stmt_could_throw_p (cfun
, binlhsdef
));
6020 if (TREE_CODE (binrhs
) == SSA_NAME
)
6022 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
6023 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
6024 && !stmt_could_throw_p (cfun
, binrhsdef
));
6027 /* If the LHS is not reassociable, but the RHS is, we need to swap
6028 them. If neither is reassociable, there is nothing we can do, so
6029 just put them in the ops vector. If the LHS is reassociable,
6030 linearize it. If both are reassociable, then linearize the RHS
6033 if (!binlhsisreassoc
)
6035 /* If this is not a associative operation like division, give up. */
6036 if (!is_associative
)
6038 add_to_ops_vec (ops
, binrhs
);
6042 if (!binrhsisreassoc
)
6045 if (try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
6046 /* If we add ops for the rhs we expect to be able to recurse
6047 to it via the lhs during expression rewrite so swap
6051 add_to_ops_vec (ops
, binrhs
);
6053 if (!try_special_add_to_ops (ops
, rhscode
, binlhs
, binlhsdef
))
6054 add_to_ops_vec (ops
, binlhs
);
6060 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6062 fprintf (dump_file
, "swapping operands of ");
6063 print_gimple_stmt (dump_file
, stmt
, 0);
6066 swap_ssa_operands (stmt
,
6067 gimple_assign_rhs1_ptr (stmt
),
6068 gimple_assign_rhs2_ptr (stmt
));
6071 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6073 fprintf (dump_file
, " is now ");
6074 print_gimple_stmt (dump_file
, stmt
, 0);
6076 if (!binrhsisreassoc
)
6079 /* We want to make it so the lhs is always the reassociative op,
6081 std::swap (binlhs
, binrhs
);
6083 else if (binrhsisreassoc
)
6085 linearize_expr (stmt
);
6086 binlhs
= gimple_assign_rhs1 (stmt
);
6087 binrhs
= gimple_assign_rhs2 (stmt
);
6090 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
6091 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
6093 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
6094 is_associative
, set_visited
);
6096 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
6097 add_to_ops_vec (ops
, binrhs
);
6100 /* Repropagate the negates back into subtracts, since no other pass
6101 currently does it. */
6104 repropagate_negates (void)
6109 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
6111 gimple
*user
= get_single_immediate_use (negate
);
6112 if (!user
|| !is_gimple_assign (user
))
6115 tree negateop
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
6116 if (TREE_CODE (negateop
) == SSA_NAME
6117 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop
))
6120 /* The negate operand can be either operand of a PLUS_EXPR
6121 (it can be the LHS if the RHS is a constant for example).
6123 Force the negate operand to the RHS of the PLUS_EXPR, then
6124 transform the PLUS_EXPR into a MINUS_EXPR. */
6125 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
6127 /* If the negated operand appears on the LHS of the
6128 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6129 to force the negated operand to the RHS of the PLUS_EXPR. */
6130 if (gimple_assign_rhs1 (user
) == negate
)
6132 swap_ssa_operands (user
,
6133 gimple_assign_rhs1_ptr (user
),
6134 gimple_assign_rhs2_ptr (user
));
6137 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6138 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6139 if (gimple_assign_rhs2 (user
) == negate
)
6141 tree rhs1
= gimple_assign_rhs1 (user
);
6142 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
6143 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
,
6148 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
6150 if (gimple_assign_rhs1 (user
) == negate
)
6155 which we transform into
6158 This pushes down the negate which we possibly can merge
6159 into some other operation, hence insert it into the
6160 plus_negates vector. */
6161 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
6162 tree b
= gimple_assign_rhs2 (user
);
6163 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
6164 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
6165 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
6166 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, negateop
, b
);
6167 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
6168 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
6169 user
= gsi_stmt (gsi2
);
6171 reassoc_remove_stmt (&gsi
);
6172 release_defs (feed
);
6173 plus_negates
.safe_push (gimple_assign_lhs (user
));
6177 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6178 getting rid of one operation. */
6179 tree rhs1
= gimple_assign_rhs1 (user
);
6180 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
6181 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, negateop
);
6182 update_stmt (gsi_stmt (gsi
));
6188 /* Break up subtract operations in block BB.
6190 We do this top down because we don't know whether the subtract is
6191 part of a possible chain of reassociation except at the top.
6200 we want to break up k = t - q, but we won't until we've transformed q
6201 = b - r, which won't be broken up until we transform b = c - d.
6203 En passant, clear the GIMPLE visited flag on every statement
6204 and set UIDs within each basic block. */
6207 break_up_subtract_bb (basic_block bb
)
6209 gimple_stmt_iterator gsi
;
6211 unsigned int uid
= 1;
6213 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6215 gimple
*stmt
= gsi_stmt (gsi
);
6216 gimple_set_visited (stmt
, false);
6217 gimple_set_uid (stmt
, uid
++);
6219 if (!is_gimple_assign (stmt
)
6220 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt
)))
6221 || !can_reassociate_op_p (gimple_assign_lhs (stmt
)))
6224 /* Look for simple gimple subtract operations. */
6225 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
6227 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt
))
6228 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt
)))
6231 /* Check for a subtract used only in an addition. If this
6232 is the case, transform it into add of a negate for better
6233 reassociation. IE transform C = A-B into C = A + -B if C
6234 is only used in an addition. */
6235 if (should_break_up_subtract (stmt
))
6236 break_up_subtract (stmt
, &gsi
);
6238 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
6239 && can_reassociate_op_p (gimple_assign_rhs1 (stmt
)))
6240 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
6242 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
6244 son
= next_dom_son (CDI_DOMINATORS
, son
))
6245 break_up_subtract_bb (son
);
6248 /* Used for repeated factor analysis. */
6249 struct repeat_factor
6251 /* An SSA name that occurs in a multiply chain. */
6254 /* Cached rank of the factor. */
6257 /* Number of occurrences of the factor in the chain. */
6258 HOST_WIDE_INT count
;
6260 /* An SSA name representing the product of this factor and
6261 all factors appearing later in the repeated factor vector. */
6266 static vec
<repeat_factor
> repeat_factor_vec
;
6268 /* Used for sorting the repeat factor vector. Sort primarily by
6269 ascending occurrence count, secondarily by descending rank. */
6272 compare_repeat_factors (const void *x1
, const void *x2
)
6274 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
6275 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
6277 if (rf1
->count
!= rf2
->count
)
6278 return rf1
->count
- rf2
->count
;
6280 return rf2
->rank
- rf1
->rank
;
6283 /* Look for repeated operands in OPS in the multiply tree rooted at
6284 STMT. Replace them with an optimal sequence of multiplies and powi
6285 builtin calls, and remove the used operands from OPS. Return an
6286 SSA name representing the value of the replacement sequence. */
6289 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
6291 unsigned i
, j
, vec_len
;
6294 repeat_factor
*rf1
, *rf2
;
6295 repeat_factor rfnew
;
6296 tree result
= NULL_TREE
;
6297 tree target_ssa
, iter_result
;
6298 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
6299 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
6300 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
6301 gimple
*mul_stmt
, *pow_stmt
;
6303 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6304 target, unless type is integral. */
6305 if (!powi_fndecl
&& !INTEGRAL_TYPE_P (type
))
6308 /* Allocate the repeated factor vector. */
6309 repeat_factor_vec
.create (10);
6311 /* Scan the OPS vector for all SSA names in the product and build
6312 up a vector of occurrence counts for each factor. */
6313 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6315 if (TREE_CODE (oe
->op
) == SSA_NAME
)
6317 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6319 if (rf1
->factor
== oe
->op
)
6321 rf1
->count
+= oe
->count
;
6326 if (j
>= repeat_factor_vec
.length ())
6328 rfnew
.factor
= oe
->op
;
6329 rfnew
.rank
= oe
->rank
;
6330 rfnew
.count
= oe
->count
;
6331 rfnew
.repr
= NULL_TREE
;
6332 repeat_factor_vec
.safe_push (rfnew
);
6337 /* Sort the repeated factor vector by (a) increasing occurrence count,
6338 and (b) decreasing rank. */
6339 repeat_factor_vec
.qsort (compare_repeat_factors
);
6341 /* It is generally best to combine as many base factors as possible
6342 into a product before applying __builtin_powi to the result.
6343 However, the sort order chosen for the repeated factor vector
6344 allows us to cache partial results for the product of the base
6345 factors for subsequent use. When we already have a cached partial
6346 result from a previous iteration, it is best to make use of it
6347 before looking for another __builtin_pow opportunity.
6349 As an example, consider x * x * y * y * y * z * z * z * z.
6350 We want to first compose the product x * y * z, raise it to the
6351 second power, then multiply this by y * z, and finally multiply
6352 by z. This can be done in 5 multiplies provided we cache y * z
6353 for use in both expressions:
6361 If we instead ignored the cached y * z and first multiplied by
6362 the __builtin_pow opportunity z * z, we would get the inferior:
6371 vec_len
= repeat_factor_vec
.length ();
6373 /* Repeatedly look for opportunities to create a builtin_powi call. */
6376 HOST_WIDE_INT power
;
6378 /* First look for the largest cached product of factors from
6379 preceding iterations. If found, create a builtin_powi for
6380 it if the minimum occurrence count for its factors is at
6381 least 2, or just use this cached product as our next
6382 multiplicand if the minimum occurrence count is 1. */
6383 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6385 if (rf1
->repr
&& rf1
->count
> 0)
6395 iter_result
= rf1
->repr
;
6397 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6401 fputs ("Multiplying by cached product ", dump_file
);
6402 for (elt
= j
; elt
< vec_len
; elt
++)
6404 rf
= &repeat_factor_vec
[elt
];
6405 print_generic_expr (dump_file
, rf
->factor
);
6406 if (elt
< vec_len
- 1)
6407 fputs (" * ", dump_file
);
6409 fputs ("\n", dump_file
);
6414 if (INTEGRAL_TYPE_P (type
))
6416 gcc_assert (power
> 1);
6417 gimple_stmt_iterator gsip
= gsi
;
6419 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6421 gimple_stmt_iterator gsic
= gsi
;
6422 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6424 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6425 gimple_set_visited (gsi_stmt (gsic
), true);
6431 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6433 = gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6434 build_int_cst (integer_type_node
,
6436 gimple_call_set_lhs (pow_stmt
, iter_result
);
6437 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6438 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6439 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6442 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6446 fputs ("Building __builtin_pow call for cached product (",
6448 for (elt
= j
; elt
< vec_len
; elt
++)
6450 rf
= &repeat_factor_vec
[elt
];
6451 print_generic_expr (dump_file
, rf
->factor
);
6452 if (elt
< vec_len
- 1)
6453 fputs (" * ", dump_file
);
6455 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
6462 /* Otherwise, find the first factor in the repeated factor
6463 vector whose occurrence count is at least 2. If no such
6464 factor exists, there are no builtin_powi opportunities
6466 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6468 if (rf1
->count
>= 2)
6477 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6481 fputs ("Building __builtin_pow call for (", dump_file
);
6482 for (elt
= j
; elt
< vec_len
; elt
++)
6484 rf
= &repeat_factor_vec
[elt
];
6485 print_generic_expr (dump_file
, rf
->factor
);
6486 if (elt
< vec_len
- 1)
6487 fputs (" * ", dump_file
);
6489 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
6492 reassociate_stats
.pows_created
++;
6494 /* Visit each element of the vector in reverse order (so that
6495 high-occurrence elements are visited first, and within the
6496 same occurrence count, lower-ranked elements are visited
6497 first). Form a linear product of all elements in this order
6498 whose occurrencce count is at least that of element J.
6499 Record the SSA name representing the product of each element
6500 with all subsequent elements in the vector. */
6501 if (j
== vec_len
- 1)
6502 rf1
->repr
= rf1
->factor
;
6505 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
6509 rf1
= &repeat_factor_vec
[ii
];
6510 rf2
= &repeat_factor_vec
[ii
+ 1];
6512 /* Init the last factor's representative to be itself. */
6514 rf2
->repr
= rf2
->factor
;
6519 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6520 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
6522 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6523 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6524 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6525 rf1
->repr
= target_ssa
;
6527 /* Don't reprocess the multiply we just introduced. */
6528 gimple_set_visited (mul_stmt
, true);
6532 /* Form a call to __builtin_powi for the maximum product
6533 just formed, raised to the power obtained earlier. */
6534 rf1
= &repeat_factor_vec
[j
];
6535 if (INTEGRAL_TYPE_P (type
))
6537 gcc_assert (power
> 1);
6538 gimple_stmt_iterator gsip
= gsi
;
6540 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6542 gimple_stmt_iterator gsic
= gsi
;
6543 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6545 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6546 gimple_set_visited (gsi_stmt (gsic
), true);
6552 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6553 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6554 build_int_cst (integer_type_node
,
6556 gimple_call_set_lhs (pow_stmt
, iter_result
);
6557 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6558 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6559 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6563 /* If we previously formed at least one other builtin_powi call,
6564 form the product of this one and those others. */
6567 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6568 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
6569 result
, iter_result
);
6570 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6571 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6572 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6573 gimple_set_visited (mul_stmt
, true);
6574 result
= new_result
;
6577 result
= iter_result
;
6579 /* Decrement the occurrence count of each element in the product
6580 by the count found above, and remove this many copies of each
6582 for (i
= j
; i
< vec_len
; i
++)
6587 rf1
= &repeat_factor_vec
[i
];
6588 rf1
->count
-= power
;
6590 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
6592 if (oe
->op
== rf1
->factor
)
6596 ops
->ordered_remove (n
);
6612 /* At this point all elements in the repeated factor vector have a
6613 remaining occurrence count of 0 or 1, and those with a count of 1
6614 don't have cached representatives. Re-sort the ops vector and
6616 ops
->qsort (sort_by_operand_rank
);
6617 repeat_factor_vec
.release ();
6619 /* Return the final product computed herein. Note that there may
6620 still be some elements with single occurrence count left in OPS;
6621 those will be handled by the normal reassociation logic. */
6625 /* Attempt to optimize
6626 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6627 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6630 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
6634 unsigned int length
= ops
->length ();
6635 tree cst
= ops
->last ()->op
;
6637 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
6640 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6642 if (TREE_CODE (oe
->op
) == SSA_NAME
6643 && has_single_use (oe
->op
))
6645 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6646 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
6649 switch (gimple_call_combined_fn (old_call
))
6652 CASE_CFN_COPYSIGN_FN
:
6653 arg0
= gimple_call_arg (old_call
, 0);
6654 arg1
= gimple_call_arg (old_call
, 1);
6655 /* The first argument of copysign must be a constant,
6656 otherwise there's nothing to do. */
6657 if (TREE_CODE (arg0
) == REAL_CST
)
6659 tree type
= TREE_TYPE (arg0
);
6660 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
6661 /* If we couldn't fold to a single constant, skip it.
6662 That happens e.g. for inexact multiplication when
6664 if (mul
== NULL_TREE
)
6666 /* Instead of adjusting OLD_CALL, let's build a new
6667 call to not leak the LHS and prevent keeping bogus
6668 debug statements. DCE will clean up the old call. */
6670 if (gimple_call_internal_p (old_call
))
6671 new_call
= gimple_build_call_internal
6672 (IFN_COPYSIGN
, 2, mul
, arg1
);
6674 new_call
= gimple_build_call
6675 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
6676 tree lhs
= make_ssa_name (type
);
6677 gimple_call_set_lhs (new_call
, lhs
);
6678 gimple_set_location (new_call
,
6679 gimple_location (old_call
));
6680 insert_stmt_after (new_call
, old_call
);
6681 /* We've used the constant, get rid of it. */
6683 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
6684 /* Handle the CST1 < 0 case by negating the result. */
6687 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
6689 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
6690 insert_stmt_after (negate_stmt
, new_call
);
6695 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6697 fprintf (dump_file
, "Optimizing copysign: ");
6698 print_generic_expr (dump_file
, cst
);
6699 fprintf (dump_file
, " * COPYSIGN (");
6700 print_generic_expr (dump_file
, arg0
);
6701 fprintf (dump_file
, ", ");
6702 print_generic_expr (dump_file
, arg1
);
6703 fprintf (dump_file
, ") into %sCOPYSIGN (",
6704 cst1_neg
? "-" : "");
6705 print_generic_expr (dump_file
, mul
);
6706 fprintf (dump_file
, ", ");
6707 print_generic_expr (dump_file
, arg1
);
6708 fprintf (dump_file
, "\n");
6721 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6724 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
6728 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6730 fprintf (dump_file
, "Transforming ");
6731 print_gimple_stmt (dump_file
, stmt
, 0);
6734 rhs1
= gimple_assign_rhs1 (stmt
);
6735 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
6737 remove_visited_stmt_chain (rhs1
);
6739 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6741 fprintf (dump_file
, " into ");
6742 print_gimple_stmt (dump_file
, stmt
, 0);
6746 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6749 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
6750 tree rhs1
, tree rhs2
)
6752 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6754 fprintf (dump_file
, "Transforming ");
6755 print_gimple_stmt (dump_file
, stmt
, 0);
6758 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
6759 update_stmt (gsi_stmt (*gsi
));
6760 remove_visited_stmt_chain (rhs1
);
6762 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6764 fprintf (dump_file
, " into ");
6765 print_gimple_stmt (dump_file
, stmt
, 0);
6769 /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6770 Put no-mult ops and mult ops alternately at the end of the queue, which is
6771 conducive to generating more FMA and reducing the loss of FMA when breaking
6774 a * b + c * d + e generates:
6776 _4 = c_9(D) * d_10(D);
6777 _12 = .FMA (a_7(D), b_8(D), _4);
6780 Rearrange ops to -> e + a * b + c * d generates:
6782 _4 = .FMA (c_7(D), d_8(D), _3);
6783 _11 = .FMA (a_5(D), b_6(D), _4); */
6785 rank_ops_for_fma (vec
<operand_entry
*> *ops
)
6789 unsigned int ops_length
= ops
->length ();
6790 auto_vec
<operand_entry
*> ops_mult
;
6791 auto_vec
<operand_entry
*> ops_others
;
6793 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6795 if (TREE_CODE (oe
->op
) == SSA_NAME
)
6797 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6798 if (is_gimple_assign (def_stmt
)
6799 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
)
6800 ops_mult
.safe_push (oe
);
6802 ops_others
.safe_push (oe
);
6805 ops_others
.safe_push (oe
);
6807 /* 1. When ops_mult.length == 2, like the following case,
6811 we need to rearrange the ops.
6813 Putting ops that not def from mult in front can generate more FMAs.
6815 2. If all ops are defined with mult, we don't need to rearrange them. */
6816 if (ops_mult
.length () >= 2 && ops_mult
.length () != ops_length
)
6818 /* Put no-mult ops and mult ops alternately at the end of the
6819 queue, which is conducive to generating more FMA and reducing the
6820 loss of FMA when breaking the chain. */
6822 ops
->splice (ops_mult
);
6823 int j
, opindex
= ops
->length ();
6824 int others_length
= ops_others
.length ();
6825 for (j
= 0; j
< others_length
; j
++)
6827 oe
= ops_others
.pop ();
6828 ops
->quick_insert (opindex
, oe
);
6836 /* Reassociate expressions in basic block BB and its post-dominator as
6839 Bubble up return status from maybe_optimize_range_tests. */
6842 reassociate_bb (basic_block bb
)
6844 gimple_stmt_iterator gsi
;
6846 gimple
*stmt
= last_nondebug_stmt (bb
);
6847 bool cfg_cleanup_needed
= false;
6849 if (stmt
&& !gimple_visited_p (stmt
))
6850 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
6852 bool do_prev
= false;
6853 for (gsi
= gsi_last_bb (bb
);
6854 !gsi_end_p (gsi
); do_prev
? gsi_prev (&gsi
) : (void) 0)
6857 stmt
= gsi_stmt (gsi
);
6859 if (is_gimple_assign (stmt
)
6860 && !stmt_could_throw_p (cfun
, stmt
))
6862 tree lhs
, rhs1
, rhs2
;
6863 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
6865 /* If this was part of an already processed statement,
6866 we don't need to touch it again. */
6867 if (gimple_visited_p (stmt
))
6869 /* This statement might have become dead because of previous
6871 if (has_zero_uses (gimple_get_lhs (stmt
)))
6873 reassoc_remove_stmt (&gsi
);
6874 release_defs (stmt
);
6875 /* We might end up removing the last stmt above which
6876 places the iterator to the end of the sequence.
6877 Reset it to the last stmt in this case and make sure
6878 we don't do gsi_prev in that case. */
6879 if (gsi_end_p (gsi
))
6881 gsi
= gsi_last_bb (bb
);
6888 /* If this is not a gimple binary expression, there is
6889 nothing for us to do with it. */
6890 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
6893 lhs
= gimple_assign_lhs (stmt
);
6894 rhs1
= gimple_assign_rhs1 (stmt
);
6895 rhs2
= gimple_assign_rhs2 (stmt
);
6897 /* For non-bit or min/max operations we can't associate
6898 all types. Verify that here. */
6899 if ((rhs_code
!= BIT_IOR_EXPR
6900 && rhs_code
!= BIT_AND_EXPR
6901 && rhs_code
!= BIT_XOR_EXPR
6902 && rhs_code
!= MIN_EXPR
6903 && rhs_code
!= MAX_EXPR
6904 && !can_reassociate_type_p (TREE_TYPE (lhs
)))
6905 || !can_reassociate_op_p (rhs1
)
6906 || !can_reassociate_op_p (rhs2
))
6909 if (associative_tree_code (rhs_code
))
6911 auto_vec
<operand_entry
*> ops
;
6912 tree powi_result
= NULL_TREE
;
6913 bool is_vector
= VECTOR_TYPE_P (TREE_TYPE (lhs
));
6915 /* There may be no immediate uses left by the time we
6916 get here because we may have eliminated them all. */
6917 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
6920 gimple_set_visited (stmt
, true);
6921 linearize_expr_tree (&ops
, stmt
, true, true);
6922 ops
.qsort (sort_by_operand_rank
);
6923 int orig_len
= ops
.length ();
6924 optimize_ops_list (rhs_code
, &ops
);
6925 if (undistribute_ops_list (rhs_code
, &ops
,
6926 loop_containing_stmt (stmt
)))
6928 ops
.qsort (sort_by_operand_rank
);
6929 optimize_ops_list (rhs_code
, &ops
);
6931 if (undistribute_bitref_for_vector (rhs_code
, &ops
,
6932 loop_containing_stmt (stmt
)))
6934 ops
.qsort (sort_by_operand_rank
);
6935 optimize_ops_list (rhs_code
, &ops
);
6937 if (rhs_code
== PLUS_EXPR
6938 && transform_add_to_multiply (&ops
))
6939 ops
.qsort (sort_by_operand_rank
);
6941 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
6944 optimize_vec_cond_expr (rhs_code
, &ops
);
6946 optimize_range_tests (rhs_code
, &ops
, NULL
);
6949 if (rhs_code
== MULT_EXPR
&& !is_vector
)
6951 attempt_builtin_copysign (&ops
);
6953 if (reassoc_insert_powi_p
6954 && (flag_unsafe_math_optimizations
6955 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))))
6956 powi_result
= attempt_builtin_powi (stmt
, &ops
);
6959 operand_entry
*last
;
6960 bool negate_result
= false;
6961 if (ops
.length () > 1
6962 && rhs_code
== MULT_EXPR
)
6965 if ((integer_minus_onep (last
->op
)
6966 || real_minus_onep (last
->op
))
6967 && !HONOR_SNANS (TREE_TYPE (lhs
))
6968 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs
))
6969 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs
))))
6972 negate_result
= true;
6977 /* If the operand vector is now empty, all operands were
6978 consumed by the __builtin_powi optimization. */
6979 if (ops
.length () == 0)
6980 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
6981 else if (ops
.length () == 1)
6983 tree last_op
= ops
.last ()->op
;
6985 /* If the stmt that defines operand has to be inserted, insert it
6987 if (ops
.last ()->stmt_to_insert
)
6988 insert_stmt_before_use (stmt
, ops
.last ()->stmt_to_insert
);
6990 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
6993 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
6997 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
6998 int ops_num
= ops
.length ();
7000 bool has_fma
= false;
7002 /* For binary bit operations, if there are at least 3
7003 operands and the last operand in OPS is a constant,
7004 move it to the front. This helps ensure that we generate
7005 (X & Y) & C rather than (X & C) & Y. The former will
7006 often match a canonical bit test when we get to RTL. */
7007 if (ops
.length () > 2
7008 && (rhs_code
== BIT_AND_EXPR
7009 || rhs_code
== BIT_IOR_EXPR
7010 || rhs_code
== BIT_XOR_EXPR
)
7011 && TREE_CODE (ops
.last ()->op
) == INTEGER_CST
)
7012 std::swap (*ops
[0], *ops
[ops_num
- 1]);
7014 optimization_type opt_type
= bb_optimization_type (bb
);
7016 /* If the target support FMA, rank_ops_for_fma will detect if
7017 the chain has fmas and rearrange the ops if so. */
7018 if (direct_internal_fn_supported_p (IFN_FMA
,
7021 && (rhs_code
== PLUS_EXPR
|| rhs_code
== MINUS_EXPR
))
7023 has_fma
= rank_ops_for_fma (&ops
);
7026 /* Only rewrite the expression tree to parallel in the
7027 last reassoc pass to avoid useless work back-and-forth
7028 with initial linearization. */
7029 if (!reassoc_insert_powi_p
7030 && ops
.length () > 3
7031 && (width
= get_reassociation_width (ops_num
, rhs_code
,
7034 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7036 "Width = %d was chosen for reassociation\n",
7038 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
7045 /* When there are three operands left, we want
7046 to make sure the ones that get the double
7047 binary op are chosen wisely. */
7048 int len
= ops
.length ();
7049 if (len
>= 3 && !has_fma
)
7050 swap_ops_for_binary_stmt (ops
, len
- 3);
7052 new_lhs
= rewrite_expr_tree (stmt
, rhs_code
, 0, ops
,
7058 /* If we combined some repeated factors into a
7059 __builtin_powi call, multiply that result by the
7060 reassociated operands. */
7063 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
7064 tree type
= TREE_TYPE (lhs
);
7065 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
7067 gimple_set_lhs (lhs_stmt
, target_ssa
);
7068 update_stmt (lhs_stmt
);
7071 target_ssa
= new_lhs
;
7074 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
7075 powi_result
, target_ssa
);
7076 gimple_set_location (mul_stmt
, gimple_location (stmt
));
7077 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
7078 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
7084 stmt
= SSA_NAME_DEF_STMT (lhs
);
7085 tree tmp
= make_ssa_name (TREE_TYPE (lhs
));
7086 gimple_set_lhs (stmt
, tmp
);
7089 gassign
*neg_stmt
= gimple_build_assign (lhs
, NEGATE_EXPR
,
7091 gimple_set_uid (neg_stmt
, gimple_uid (stmt
));
7092 gsi_insert_after (&gsi
, neg_stmt
, GSI_NEW_STMT
);
7098 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
7100 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
7101 cfg_cleanup_needed
|= reassociate_bb (son
);
7103 return cfg_cleanup_needed
;
7106 /* Add jumps around shifts for range tests turned into bit tests.
7107 For each SSA_NAME VAR we have code like:
7108 VAR = ...; // final stmt of range comparison
7109 // bit test here...;
7110 OTHERVAR = ...; // final stmt of the bit test sequence
7111 RES = VAR | OTHERVAR;
7112 Turn the above into:
7119 // bit test here...;
7122 # RES = PHI<1(l1), OTHERVAR(l2)>; */
7130 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
7132 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
7135 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
7137 && is_gimple_assign (use_stmt
)
7138 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
7139 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
7141 basic_block cond_bb
= gimple_bb (def_stmt
);
7142 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
7143 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
7145 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
7146 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
7147 build_zero_cst (TREE_TYPE (var
)),
7148 NULL_TREE
, NULL_TREE
);
7149 location_t loc
= gimple_location (use_stmt
);
7150 gimple_set_location (g
, loc
);
7151 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
7153 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
7154 etrue
->probability
= profile_probability::even ();
7155 edge efalse
= find_edge (cond_bb
, then_bb
);
7156 efalse
->flags
= EDGE_FALSE_VALUE
;
7157 efalse
->probability
-= etrue
->probability
;
7158 then_bb
->count
-= etrue
->count ();
7160 tree othervar
= NULL_TREE
;
7161 if (gimple_assign_rhs1 (use_stmt
) == var
)
7162 othervar
= gimple_assign_rhs2 (use_stmt
);
7163 else if (gimple_assign_rhs2 (use_stmt
) == var
)
7164 othervar
= gimple_assign_rhs1 (use_stmt
);
7167 tree lhs
= gimple_assign_lhs (use_stmt
);
7168 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
7169 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
7170 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
7171 gsi
= gsi_for_stmt (use_stmt
);
7172 gsi_remove (&gsi
, true);
7174 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
7175 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
7177 reassoc_branch_fixups
.release ();
7180 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
7181 void debug_ops_vector (vec
<operand_entry
*> ops
);
7183 /* Dump the operand entry vector OPS to FILE. */
7186 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
7191 FOR_EACH_VEC_ELT (ops
, i
, oe
)
7193 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
7194 print_generic_expr (file
, oe
->op
);
7195 fprintf (file
, "\n");
7199 /* Dump the operand entry vector OPS to STDERR. */
7202 debug_ops_vector (vec
<operand_entry
*> ops
)
7204 dump_ops_vector (stderr
, ops
);
7207 /* Bubble up return status from reassociate_bb. */
7212 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
7213 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
7216 /* Initialize the reassociation pass. */
7223 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
7225 /* Find the loops, so that we can prevent moving calculations in
7227 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
7229 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
7231 next_operand_entry_id
= 0;
7233 /* Reverse RPO (Reverse Post Order) will give us something where
7234 deeper loops come later. */
7235 pre_and_rev_post_order_compute (NULL
, bbs
, false);
7236 bb_rank
= XCNEWVEC (int64_t, last_basic_block_for_fn (cfun
));
7237 operand_rank
= new hash_map
<tree
, int64_t>;
7239 /* Give each default definition a distinct rank. This includes
7240 parameters and the static chain. Walk backwards over all
7241 SSA names so that we get proper rank ordering according
7242 to tree_swap_operands_p. */
7243 for (i
= num_ssa_names
- 1; i
> 0; --i
)
7245 tree name
= ssa_name (i
);
7246 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
7247 insert_operand_rank (name
, ++rank
);
7250 /* Set up rank for each BB */
7251 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
7252 bb_rank
[bbs
[i
]] = ++rank
<< 16;
7255 calculate_dominance_info (CDI_POST_DOMINATORS
);
7256 plus_negates
= vNULL
;
7259 /* Cleanup after the reassociation pass, and print stats if
7265 statistics_counter_event (cfun
, "Linearized",
7266 reassociate_stats
.linearized
);
7267 statistics_counter_event (cfun
, "Constants eliminated",
7268 reassociate_stats
.constants_eliminated
);
7269 statistics_counter_event (cfun
, "Ops eliminated",
7270 reassociate_stats
.ops_eliminated
);
7271 statistics_counter_event (cfun
, "Statements rewritten",
7272 reassociate_stats
.rewritten
);
7273 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
7274 reassociate_stats
.pows_encountered
);
7275 statistics_counter_event (cfun
, "Built-in powi calls created",
7276 reassociate_stats
.pows_created
);
7278 delete operand_rank
;
7279 bitmap_clear (biased_names
);
7280 operand_entry_pool
.release ();
7282 plus_negates
.release ();
7283 free_dominance_info (CDI_POST_DOMINATORS
);
7284 loop_optimizer_finalize ();
7287 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7288 insertion of __builtin_powi calls.
7290 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7291 optimization of a gimple conditional. Otherwise returns zero. */
7294 execute_reassoc (bool insert_powi_p
, bool bias_loop_carried_phi_ranks_p
)
7296 reassoc_insert_powi_p
= insert_powi_p
;
7297 reassoc_bias_loop_carried_phi_ranks_p
= bias_loop_carried_phi_ranks_p
;
7301 bool cfg_cleanup_needed
;
7302 cfg_cleanup_needed
= do_reassoc ();
7303 repropagate_negates ();
7307 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
7312 const pass_data pass_data_reassoc
=
7314 GIMPLE_PASS
, /* type */
7315 "reassoc", /* name */
7316 OPTGROUP_NONE
, /* optinfo_flags */
7317 TV_TREE_REASSOC
, /* tv_id */
7318 ( PROP_cfg
| PROP_ssa
), /* properties_required */
7319 0, /* properties_provided */
7320 0, /* properties_destroyed */
7321 0, /* todo_flags_start */
7322 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
7325 class pass_reassoc
: public gimple_opt_pass
7328 pass_reassoc (gcc::context
*ctxt
)
7329 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
7332 /* opt_pass methods: */
7333 opt_pass
* clone () final override
{ return new pass_reassoc (m_ctxt
); }
7334 void set_pass_param (unsigned int n
, bool param
) final override
7336 gcc_assert (n
== 0);
7337 insert_powi_p
= param
;
7338 bias_loop_carried_phi_ranks_p
= !param
;
7340 bool gate (function
*) final override
{ return flag_tree_reassoc
!= 0; }
7341 unsigned int execute (function
*) final override
7343 return execute_reassoc (insert_powi_p
, bias_loop_carried_phi_ranks_p
);
7347 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7348 point 3a in the pass header comment. */
7350 bool bias_loop_carried_phi_ranks_p
;
7351 }; // class pass_reassoc
7356 make_pass_reassoc (gcc::context
*ctxt
)
7358 return new pass_reassoc (ctxt
);