1 /* Reassociation for trees.
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimple-fold.h"
44 #include "gimplify-me.h"
46 #include "tree-ssa-loop.h"
49 #include "langhooks.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
57 #include "internal-fn.h"
59 /* This is a simple global reassociation pass. It is, in part, based
60 on the LLVM pass of the same name (They do some things more/less
61 than we do, in different orders, etc).
63 It consists of five steps:
65 1. Breaking up subtract operations into addition + negate, where
66 it would promote the reassociation of adds.
68 2. Left linearization of the expression trees, so that (A+B)+(C+D)
69 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
70 During linearization, we place the operands of the binary
71 expressions into a vector of operand_entry_*
73 3. Optimization of the operand lists, eliminating things like a +
76 3a. Combine repeated factors with the same occurrence counts
77 into a __builtin_powi call that will later be optimized into
78 an optimal number of multiplies.
80 4. Rewrite the expression trees we linearized and optimized so
81 they are in proper rank order.
83 5. Repropagate negates, as nothing else will clean it up ATM.
85 A bit of theory on #4, since nobody seems to write anything down
86 about why it makes sense to do it the way they do it:
88 We could do this much nicer theoretically, but don't (for reasons
89 explained after how to do it theoretically nice :P).
91 In order to promote the most redundancy elimination, you want
92 binary expressions whose operands are the same rank (or
93 preferably, the same value) exposed to the redundancy eliminator,
94 for possible elimination.
96 So the way to do this if we really cared, is to build the new op
97 tree from the leaves to the roots, merging as you go, and putting the
98 new op on the end of the worklist, until you are left with one
99 thing on the worklist.
101 IE if you have to rewrite the following set of operands (listed with
102 rank in parentheses), with opcode PLUS_EXPR:
104 a (1), b (1), c (1), d (2), e (2)
107 We start with our merge worklist empty, and the ops list with all of
110 You want to first merge all leaves of the same rank, as much as
113 So first build a binary op of
115 mergetmp = a + b, and put "mergetmp" on the merge worklist.
117 Because there is no three operand form of PLUS_EXPR, c is not going to
118 be exposed to redundancy elimination as a rank 1 operand.
120 So you might as well throw it on the merge worklist (you could also
121 consider it to now be a rank two operand, and merge it with d and e,
122 but in this case, you then have evicted e from a binary op. So at
123 least in this situation, you can't win.)
125 Then build a binary op of d + e
128 and put mergetmp2 on the merge worklist.
130 so merge worklist = {mergetmp, c, mergetmp2}
132 Continue building binary ops of these operations until you have only
133 one operation left on the worklist.
138 mergetmp3 = mergetmp + c
140 worklist = {mergetmp2, mergetmp3}
142 mergetmp4 = mergetmp2 + mergetmp3
144 worklist = {mergetmp4}
146 because we have one operation left, we can now just set the original
147 statement equal to the result of that operation.
149 This will at least expose a + b and d + e to redundancy elimination
150 as binary operations.
152 For extra points, you can reuse the old statements to build the
153 mergetmps, since you shouldn't run out.
155 So why don't we do this?
157 Because it's expensive, and rarely will help. Most trees we are
158 reassociating have 3 or less ops. If they have 2 ops, they already
159 will be written into a nice single binary op. If you have 3 ops, a
160 single simple check suffices to tell you whether the first two are of the
161 same rank. If so, you know to order it
164 newstmt = mergetmp + op3
168 newstmt = mergetmp + op1
170 If all three are of the same rank, you can't expose them all in a
171 single binary operator anyway, so the above is *still* the best you
174 Thus, this is what we do. When we have three ops left, we check to see
175 what order to put them in, and call it a day. As a nod to vector sum
176 reduction, we check if any of the ops are really a phi node that is a
177 destructive update for the associating op, and keep the destructive
178 update together for vector sum reduction recognition. */
180 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
181 point 3a in the pass header comment. */
182 static bool reassoc_insert_powi_p
;
184 /* Enable biasing ranks of loop accumulators. We don't want this before
185 vectorization, since it interferes with reduction chains. */
186 static bool reassoc_bias_loop_carried_phi_ranks_p
;
192 int constants_eliminated
;
195 int pows_encountered
;
200 static object_allocator
<operand_entry
> operand_entry_pool
201 ("operand entry pool");
203 /* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205 static unsigned int next_operand_entry_id
;
207 /* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
210 static int64_t *bb_rank
;
212 /* Operand->rank hashtable. */
213 static hash_map
<tree
, int64_t> *operand_rank
;
215 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
217 static auto_bitmap biased_names
;
219 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
220 all basic blocks the CFG should be adjusted - basic blocks
221 split right after that SSA_NAME's definition statement and before
222 the only use, which must be a bit ior. */
223 static vec
<tree
> reassoc_branch_fixups
;
226 static int64_t get_rank (tree
);
227 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
229 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
230 possibly added by gsi_remove. */
233 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
235 gimple
*stmt
= gsi_stmt (*gsi
);
237 if (!MAY_HAVE_DEBUG_BIND_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
238 return gsi_remove (gsi
, true);
240 gimple_stmt_iterator prev
= *gsi
;
242 unsigned uid
= gimple_uid (stmt
);
243 basic_block bb
= gimple_bb (stmt
);
244 bool ret
= gsi_remove (gsi
, true);
245 if (!gsi_end_p (prev
))
248 prev
= gsi_start_bb (bb
);
249 gimple
*end_stmt
= gsi_stmt (*gsi
);
250 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
252 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
253 gimple_set_uid (stmt
, uid
);
259 /* Bias amount for loop-carried phis. We want this to be larger than
260 the depth of any reassociation tree we can see, but not larger than
261 the rank difference between two blocks. */
262 #define PHI_LOOP_BIAS (1 << 15)
264 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
265 operands to the STMT's left-hand side. The goal is to preserve bias in code
275 That is, we need to preserve bias along single-use chains originating from
276 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
277 uses, because only they participate in rank propagation. */
279 propagate_bias_p (gimple
*stmt
)
282 imm_use_iterator use_iter
;
283 gimple
*single_use_stmt
= NULL
;
285 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_reference
)
288 FOR_EACH_IMM_USE_FAST (use
, use_iter
, gimple_assign_lhs (stmt
))
290 gimple
*current_use_stmt
= USE_STMT (use
);
292 if (is_gimple_assign (current_use_stmt
)
293 && TREE_CODE (gimple_assign_lhs (current_use_stmt
)) == SSA_NAME
)
295 if (single_use_stmt
!= NULL
&& single_use_stmt
!= current_use_stmt
)
297 single_use_stmt
= current_use_stmt
;
301 if (single_use_stmt
== NULL
)
304 if (gimple_bb (stmt
)->loop_father
305 != gimple_bb (single_use_stmt
)->loop_father
)
311 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
312 an innermost loop, and the phi has only a single use which is inside
313 the loop, then the rank is the block rank of the loop latch plus an
314 extra bias for the loop-carried dependence. This causes expressions
315 calculated into an accumulator variable to be independent for each
316 iteration of the loop. If STMT is some other phi, the rank is the
317 block rank of its containing block. */
319 phi_rank (gimple
*stmt
)
321 basic_block bb
= gimple_bb (stmt
);
322 class loop
*father
= bb
->loop_father
;
328 if (!reassoc_bias_loop_carried_phi_ranks_p
)
329 return bb_rank
[bb
->index
];
331 /* We only care about real loops (those with a latch). */
333 return bb_rank
[bb
->index
];
335 /* Interesting phis must be in headers of innermost loops. */
336 if (bb
!= father
->header
338 return bb_rank
[bb
->index
];
340 /* Ignore virtual SSA_NAMEs. */
341 res
= gimple_phi_result (stmt
);
342 if (virtual_operand_p (res
))
343 return bb_rank
[bb
->index
];
345 /* The phi definition must have a single use, and that use must be
346 within the loop. Otherwise this isn't an accumulator pattern. */
347 if (!single_imm_use (res
, &use
, &use_stmt
)
348 || gimple_bb (use_stmt
)->loop_father
!= father
)
349 return bb_rank
[bb
->index
];
351 /* Look for phi arguments from within the loop. If found, bias this phi. */
352 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
354 tree arg
= gimple_phi_arg_def (stmt
, i
);
355 if (TREE_CODE (arg
) == SSA_NAME
356 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
358 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
359 if (gimple_bb (def_stmt
)->loop_father
== father
)
360 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
364 /* Must be an uninteresting phi. */
365 return bb_rank
[bb
->index
];
368 /* Return the maximum of RANK and the rank that should be propagated
369 from expression OP. For most operands, this is just the rank of OP.
370 For loop-carried phis, the value is zero to avoid undoing the bias
371 in favor of the phi. */
373 propagate_rank (int64_t rank
, tree op
, bool *maybe_biased_p
)
377 op_rank
= get_rank (op
);
379 /* Check whether op is biased after the get_rank () call, since it might have
380 updated biased_names. */
381 if (TREE_CODE (op
) == SSA_NAME
382 && bitmap_bit_p (biased_names
, SSA_NAME_VERSION (op
)))
384 if (maybe_biased_p
== NULL
)
386 *maybe_biased_p
= true;
389 return MAX (rank
, op_rank
);
392 /* Look up the operand rank structure for expression E. */
394 static inline int64_t
395 find_operand_rank (tree e
)
397 int64_t *slot
= operand_rank
->get (e
);
398 return slot
? *slot
: -1;
401 /* Insert {E,RANK} into the operand rank hashtable. */
404 insert_operand_rank (tree e
, int64_t rank
)
406 gcc_assert (rank
> 0);
407 gcc_assert (!operand_rank
->put (e
, rank
));
410 /* Given an expression E, return the rank of the expression. */
415 /* SSA_NAME's have the rank of the expression they are the result
417 For globals and uninitialized values, the rank is 0.
418 For function arguments, use the pre-setup rank.
419 For PHI nodes, stores, asm statements, etc, we use the rank of
421 For simple operations, the rank is the maximum rank of any of
422 its operands, or the bb_rank, whichever is less.
423 I make no claims that this is optimal, however, it gives good
426 /* We make an exception to the normal ranking system to break
427 dependences of accumulator variables in loops. Suppose we
428 have a simple one-block loop containing:
435 As shown, each iteration of the calculation into x is fully
436 dependent upon the iteration before it. We would prefer to
437 see this in the form:
444 If the loop is unrolled, the calculations of b and c from
445 different iterations can be interleaved.
447 To obtain this result during reassociation, we bias the rank
448 of the phi definition x_1 upward, when it is recognized as an
449 accumulator pattern. The artificial rank causes it to be
450 added last, providing the desired independence. */
452 if (TREE_CODE (e
) == SSA_NAME
)
459 /* If we already have a rank for this expression, use that. */
460 rank
= find_operand_rank (e
);
464 stmt
= SSA_NAME_DEF_STMT (e
);
465 if (gimple_code (stmt
) == GIMPLE_PHI
)
467 rank
= phi_rank (stmt
);
468 if (rank
!= bb_rank
[gimple_bb (stmt
)->index
])
469 bitmap_set_bit (biased_names
, SSA_NAME_VERSION (e
));
472 else if (!is_gimple_assign (stmt
))
473 rank
= bb_rank
[gimple_bb (stmt
)->index
];
477 bool biased_p
= false;
478 bool *maybe_biased_p
= propagate_bias_p (stmt
) ? &biased_p
: NULL
;
480 /* Otherwise, find the maximum rank for the operands. As an
481 exception, remove the bias from loop-carried phis when propagating
482 the rank so that dependent operations are not also biased. */
483 /* Simply walk over all SSA uses - this takes advatage of the
484 fact that non-SSA operands are is_gimple_min_invariant and
487 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
488 rank
= propagate_rank (rank
, op
, maybe_biased_p
);
492 bitmap_set_bit (biased_names
, SSA_NAME_VERSION (e
));
495 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
497 fprintf (dump_file
, "Rank for ");
498 print_generic_expr (dump_file
, e
);
499 fprintf (dump_file
, " is %" PRId64
"\n", rank
);
502 /* Note the rank in the hashtable so we don't recompute it. */
503 insert_operand_rank (e
, rank
);
507 /* Constants, globals, etc., are rank 0 */
512 /* We want integer ones to end up last no matter what, since they are
513 the ones we can do the most with. */
514 #define INTEGER_CONST_TYPE 1 << 4
515 #define FLOAT_ONE_CONST_TYPE 1 << 3
516 #define FLOAT_CONST_TYPE 1 << 2
517 #define OTHER_CONST_TYPE 1 << 1
519 /* Classify an invariant tree into integer, float, or other, so that
520 we can sort them to be near other constants of the same type. */
522 constant_type (tree t
)
524 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
525 return INTEGER_CONST_TYPE
;
526 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
528 /* Sort -1.0 and 1.0 constants last, while in some cases
529 const_binop can't optimize some inexact operations, multiplication
530 by -1.0 or 1.0 can be always merged with others. */
531 if (real_onep (t
) || real_minus_onep (t
))
532 return FLOAT_ONE_CONST_TYPE
;
533 return FLOAT_CONST_TYPE
;
536 return OTHER_CONST_TYPE
;
539 /* qsort comparison function to sort operand entries PA and PB by rank
540 so that the sorted array is ordered by rank in decreasing order. */
542 sort_by_operand_rank (const void *pa
, const void *pb
)
544 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
545 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
547 if (oeb
->rank
!= oea
->rank
)
548 return oeb
->rank
> oea
->rank
? 1 : -1;
550 /* It's nicer for optimize_expression if constants that are likely
551 to fold when added/multiplied/whatever are put next to each
552 other. Since all constants have rank 0, order them by type. */
555 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
556 return constant_type (oea
->op
) - constant_type (oeb
->op
);
558 /* To make sorting result stable, we use unique IDs to determine
560 return oeb
->id
> oea
->id
? 1 : -1;
563 if (TREE_CODE (oea
->op
) != SSA_NAME
)
565 if (TREE_CODE (oeb
->op
) != SSA_NAME
)
566 return oeb
->id
> oea
->id
? 1 : -1;
570 else if (TREE_CODE (oeb
->op
) != SSA_NAME
)
573 /* Lastly, make sure the versions that are the same go next to each
575 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
577 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
578 versions of removed SSA_NAMEs, so if possible, prefer to sort
579 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
581 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
582 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
583 basic_block bba
= gimple_bb (stmta
);
584 basic_block bbb
= gimple_bb (stmtb
);
587 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
588 but the other might not. */
593 /* If neither is, compare bb_rank. */
594 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
595 return (bb_rank
[bbb
->index
] >> 16) - (bb_rank
[bba
->index
] >> 16);
598 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
599 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
603 return SSA_NAME_VERSION (oeb
->op
) > SSA_NAME_VERSION (oea
->op
) ? 1 : -1;
606 return oeb
->id
> oea
->id
? 1 : -1;
609 /* Add an operand entry to *OPS for the tree operand OP. */
612 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
, gimple
*stmt_to_insert
= NULL
)
614 operand_entry
*oe
= operand_entry_pool
.allocate ();
617 oe
->rank
= get_rank (op
);
618 oe
->id
= next_operand_entry_id
++;
620 oe
->stmt_to_insert
= stmt_to_insert
;
624 /* Add an operand entry to *OPS for the tree operand OP with repeat
628 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
629 HOST_WIDE_INT repeat
)
631 operand_entry
*oe
= operand_entry_pool
.allocate ();
634 oe
->rank
= get_rank (op
);
635 oe
->id
= next_operand_entry_id
++;
637 oe
->stmt_to_insert
= NULL
;
640 reassociate_stats
.pows_encountered
++;
643 /* Returns true if we can associate the SSA def OP. */
646 can_reassociate_op_p (tree op
)
648 if (TREE_CODE (op
) == SSA_NAME
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op
))
650 /* Make sure asm goto outputs do not participate in reassociation since
651 we have no way to find an insertion place after asm goto. */
652 if (TREE_CODE (op
) == SSA_NAME
653 && gimple_code (SSA_NAME_DEF_STMT (op
)) == GIMPLE_ASM
654 && gimple_asm_nlabels (as_a
<gasm
*> (SSA_NAME_DEF_STMT (op
))) != 0)
659 /* Returns true if we can reassociate operations of TYPE.
660 That is for integral or non-saturating fixed-point types, and for
661 floating point type when associative-math is enabled. */
664 can_reassociate_type_p (tree type
)
666 if ((ANY_INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
667 || NON_SAT_FIXED_POINT_TYPE_P (type
)
668 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
673 /* Return true if STMT is reassociable operation containing a binary
674 operation with tree code CODE, and is inside LOOP. */
677 is_reassociable_op (gimple
*stmt
, enum tree_code code
, class loop
*loop
)
679 basic_block bb
= gimple_bb (stmt
);
681 if (gimple_bb (stmt
) == NULL
)
684 if (!flow_bb_inside_loop_p (loop
, bb
))
687 if (is_gimple_assign (stmt
)
688 && gimple_assign_rhs_code (stmt
) == code
689 && has_single_use (gimple_assign_lhs (stmt
)))
691 tree rhs1
= gimple_assign_rhs1 (stmt
);
692 tree rhs2
= gimple_assign_rhs2 (stmt
);
693 if (!can_reassociate_op_p (rhs1
)
694 || (rhs2
&& !can_reassociate_op_p (rhs2
)))
703 /* Return true if STMT is a nop-conversion. */
706 gimple_nop_conversion_p (gimple
*stmt
)
708 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
710 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
711 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
712 TREE_TYPE (gimple_assign_rhs1 (ass
))))
718 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
719 operand of the negate operation. Otherwise, return NULL. */
722 get_unary_op (tree name
, enum tree_code opcode
)
724 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
726 /* Look through nop conversions (sign changes). */
727 if (gimple_nop_conversion_p (stmt
)
728 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
729 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
731 if (!is_gimple_assign (stmt
))
734 if (gimple_assign_rhs_code (stmt
) == opcode
)
735 return gimple_assign_rhs1 (stmt
);
739 /* Return true if OP1 and OP2 have the same value if casted to either type. */
742 ops_equal_values_p (tree op1
, tree op2
)
748 if (TREE_CODE (op1
) == SSA_NAME
)
750 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
751 if (gimple_nop_conversion_p (stmt
))
753 op1
= gimple_assign_rhs1 (stmt
);
759 if (TREE_CODE (op2
) == SSA_NAME
)
761 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
762 if (gimple_nop_conversion_p (stmt
))
764 op2
= gimple_assign_rhs1 (stmt
);
775 /* If CURR and LAST are a pair of ops that OPCODE allows us to
776 eliminate through equivalences, do so, remove them from OPS, and
777 return true. Otherwise, return false. */
780 eliminate_duplicate_pair (enum tree_code opcode
,
781 vec
<operand_entry
*> *ops
,
788 /* If we have two of the same op, and the opcode is & |, min, or max,
789 we can eliminate one of them.
790 If we have two of the same op, and the opcode is ^, we can
791 eliminate both of them. */
793 if (last
&& last
->op
== curr
->op
)
801 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
803 fprintf (dump_file
, "Equivalence: ");
804 print_generic_expr (dump_file
, curr
->op
);
805 fprintf (dump_file
, " [&|minmax] ");
806 print_generic_expr (dump_file
, last
->op
);
807 fprintf (dump_file
, " -> ");
808 print_generic_stmt (dump_file
, last
->op
);
811 ops
->ordered_remove (i
);
812 reassociate_stats
.ops_eliminated
++;
817 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
819 fprintf (dump_file
, "Equivalence: ");
820 print_generic_expr (dump_file
, curr
->op
);
821 fprintf (dump_file
, " ^ ");
822 print_generic_expr (dump_file
, last
->op
);
823 fprintf (dump_file
, " -> nothing\n");
826 reassociate_stats
.ops_eliminated
+= 2;
828 if (ops
->length () == 2)
831 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
836 ops
->ordered_remove (i
-1);
837 ops
->ordered_remove (i
-1);
849 static vec
<tree
> plus_negates
;
851 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
852 expression, look in OPS for a corresponding positive operation to cancel
853 it out. If we find one, remove the other from OPS, replace
854 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
858 eliminate_plus_minus_pair (enum tree_code opcode
,
859 vec
<operand_entry
*> *ops
,
860 unsigned int currindex
,
868 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
871 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
872 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
873 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
876 /* Any non-negated version will have a rank that is one less than
877 the current rank. So once we hit those ranks, if we don't find
880 for (i
= currindex
+ 1;
881 ops
->iterate (i
, &oe
)
882 && oe
->rank
>= curr
->rank
- 1 ;
886 && ops_equal_values_p (oe
->op
, negateop
))
888 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
890 fprintf (dump_file
, "Equivalence: ");
891 print_generic_expr (dump_file
, negateop
);
892 fprintf (dump_file
, " + -");
893 print_generic_expr (dump_file
, oe
->op
);
894 fprintf (dump_file
, " -> 0\n");
897 ops
->ordered_remove (i
);
898 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
899 ops
->ordered_remove (currindex
);
900 reassociate_stats
.ops_eliminated
++;
905 && ops_equal_values_p (oe
->op
, notop
))
907 tree op_type
= TREE_TYPE (oe
->op
);
909 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
911 fprintf (dump_file
, "Equivalence: ");
912 print_generic_expr (dump_file
, notop
);
913 fprintf (dump_file
, " + ~");
914 print_generic_expr (dump_file
, oe
->op
);
915 fprintf (dump_file
, " -> -1\n");
918 ops
->ordered_remove (i
);
919 add_to_ops_vec (ops
, build_all_ones_cst (op_type
));
920 ops
->ordered_remove (currindex
);
921 reassociate_stats
.ops_eliminated
++;
927 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
928 save it for later inspection in repropagate_negates(). */
929 if (negateop
!= NULL_TREE
930 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
931 plus_negates
.safe_push (curr
->op
);
936 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
937 bitwise not expression, look in OPS for a corresponding operand to
938 cancel it out. If we find one, remove the other from OPS, replace
939 OPS[CURRINDEX] with 0, and return true. Otherwise, return
943 eliminate_not_pairs (enum tree_code opcode
,
944 vec
<operand_entry
*> *ops
,
945 unsigned int currindex
,
952 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
953 || TREE_CODE (curr
->op
) != SSA_NAME
)
956 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
957 if (notop
== NULL_TREE
)
960 /* Any non-not version will have a rank that is one less than
961 the current rank. So once we hit those ranks, if we don't find
964 for (i
= currindex
+ 1;
965 ops
->iterate (i
, &oe
)
966 && oe
->rank
>= curr
->rank
- 1;
971 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
973 fprintf (dump_file
, "Equivalence: ");
974 print_generic_expr (dump_file
, notop
);
975 if (opcode
== BIT_AND_EXPR
)
976 fprintf (dump_file
, " & ~");
977 else if (opcode
== BIT_IOR_EXPR
)
978 fprintf (dump_file
, " | ~");
979 print_generic_expr (dump_file
, oe
->op
);
980 if (opcode
== BIT_AND_EXPR
)
981 fprintf (dump_file
, " -> 0\n");
982 else if (opcode
== BIT_IOR_EXPR
)
983 fprintf (dump_file
, " -> -1\n");
986 if (opcode
== BIT_AND_EXPR
)
987 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
988 else if (opcode
== BIT_IOR_EXPR
)
989 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
991 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
993 ops
->quick_push (oe
);
1001 /* Use constant value that may be present in OPS to try to eliminate
1002 operands. Note that this function is only really used when we've
1003 eliminated ops for other reasons, or merged constants. Across
1004 single statements, fold already does all of this, plus more. There
1005 is little point in duplicating logic, so I've only included the
1006 identities that I could ever construct testcases to trigger. */
1009 eliminate_using_constants (enum tree_code opcode
,
1010 vec
<operand_entry
*> *ops
)
1012 operand_entry
*oelast
= ops
->last ();
1013 tree type
= TREE_TYPE (oelast
->op
);
1015 if (oelast
->rank
== 0
1016 && (ANY_INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
1021 if (integer_zerop (oelast
->op
))
1023 if (ops
->length () != 1)
1025 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1026 fprintf (dump_file
, "Found & 0, removing all other ops\n");
1028 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1031 ops
->quick_push (oelast
);
1035 else if (integer_all_onesp (oelast
->op
))
1037 if (ops
->length () != 1)
1039 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1040 fprintf (dump_file
, "Found & -1, removing\n");
1042 reassociate_stats
.ops_eliminated
++;
1047 if (integer_all_onesp (oelast
->op
))
1049 if (ops
->length () != 1)
1051 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1052 fprintf (dump_file
, "Found | -1, removing all other ops\n");
1054 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1057 ops
->quick_push (oelast
);
1061 else if (integer_zerop (oelast
->op
))
1063 if (ops
->length () != 1)
1065 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1066 fprintf (dump_file
, "Found | 0, removing\n");
1068 reassociate_stats
.ops_eliminated
++;
1073 if (integer_zerop (oelast
->op
)
1074 || (FLOAT_TYPE_P (type
)
1075 && !HONOR_NANS (type
)
1076 && !HONOR_SIGNED_ZEROS (type
)
1077 && real_zerop (oelast
->op
)))
1079 if (ops
->length () != 1)
1081 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1082 fprintf (dump_file
, "Found * 0, removing all other ops\n");
1084 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1086 ops
->quick_push (oelast
);
1090 else if (integer_onep (oelast
->op
)
1091 || (FLOAT_TYPE_P (type
)
1092 && !HONOR_SNANS (type
)
1093 && real_onep (oelast
->op
)))
1095 if (ops
->length () != 1)
1097 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1098 fprintf (dump_file
, "Found * 1, removing\n");
1100 reassociate_stats
.ops_eliminated
++;
1108 if (integer_zerop (oelast
->op
)
1109 || (FLOAT_TYPE_P (type
)
1110 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1111 && fold_real_zero_addition_p (type
, 0, oelast
->op
,
1112 opcode
== MINUS_EXPR
)))
1114 if (ops
->length () != 1)
1116 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1117 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1119 reassociate_stats
.ops_eliminated
++;
1131 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1134 /* Structure for tracking and counting operands. */
1138 enum tree_code oecode
;
1143 /* The heap for the oecount hashtable and the sorted list of operands. */
1144 static vec
<oecount
> cvec
;
1147 /* Oecount hashtable helpers. */
1149 struct oecount_hasher
: int_hash
<int, 0, 1>
1151 static inline hashval_t
hash (int);
1152 static inline bool equal (int, int);
1155 /* Hash function for oecount. */
1158 oecount_hasher::hash (int p
)
1160 const oecount
*c
= &cvec
[p
- 42];
1161 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1164 /* Comparison function for oecount. */
1167 oecount_hasher::equal (int p1
, int p2
)
1169 const oecount
*c1
= &cvec
[p1
- 42];
1170 const oecount
*c2
= &cvec
[p2
- 42];
1171 return c1
->oecode
== c2
->oecode
&& c1
->op
== c2
->op
;
1174 /* Comparison function for qsort sorting oecount elements by count. */
1177 oecount_cmp (const void *p1
, const void *p2
)
1179 const oecount
*c1
= (const oecount
*)p1
;
1180 const oecount
*c2
= (const oecount
*)p2
;
1181 if (c1
->cnt
!= c2
->cnt
)
1182 return c1
->cnt
> c2
->cnt
? 1 : -1;
1184 /* If counts are identical, use unique IDs to stabilize qsort. */
1185 return c1
->id
> c2
->id
? 1 : -1;
1188 /* Return TRUE iff STMT represents a builtin call that raises OP
1189 to some exponent. */
1192 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1194 if (!is_gimple_call (stmt
))
1197 switch (gimple_call_combined_fn (stmt
))
1201 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1208 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1209 in place and return the result. Assumes that stmt_is_power_of_op
1210 was previously called for STMT and returned TRUE. */
1212 static HOST_WIDE_INT
1213 decrement_power (gimple
*stmt
)
1215 REAL_VALUE_TYPE c
, cint
;
1216 HOST_WIDE_INT power
;
1219 switch (gimple_call_combined_fn (stmt
))
1222 arg1
= gimple_call_arg (stmt
, 1);
1223 c
= TREE_REAL_CST (arg1
);
1224 power
= real_to_integer (&c
) - 1;
1225 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1226 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1230 arg1
= gimple_call_arg (stmt
, 1);
1231 power
= TREE_INT_CST_LOW (arg1
) - 1;
1232 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1240 /* Replace SSA defined by STMT and replace all its uses with new
1241 SSA. Also return the new SSA. */
1244 make_new_ssa_for_def (gimple
*stmt
, enum tree_code opcode
, tree op
)
1248 imm_use_iterator iter
;
1249 tree new_lhs
, new_debug_lhs
= NULL_TREE
;
1250 tree lhs
= gimple_get_lhs (stmt
);
1252 new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
1253 gimple_set_lhs (stmt
, new_lhs
);
1255 /* Also need to update GIMPLE_DEBUGs. */
1256 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
1258 tree repl
= new_lhs
;
1259 if (is_gimple_debug (use_stmt
))
1261 if (new_debug_lhs
== NULL_TREE
)
1263 new_debug_lhs
= build_debug_expr_decl (TREE_TYPE (lhs
));
1265 = gimple_build_debug_bind (new_debug_lhs
,
1266 build2 (opcode
, TREE_TYPE (lhs
),
1269 gimple_set_uid (def_temp
, gimple_uid (stmt
));
1270 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1271 gsi_insert_after (&gsi
, def_temp
, GSI_SAME_STMT
);
1273 repl
= new_debug_lhs
;
1275 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1276 SET_USE (use
, repl
);
1277 update_stmt (use_stmt
);
1282 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1283 uses with new SSAs. Also do this for the stmt that defines DEF
1284 if *DEF is not OP. */
1287 make_new_ssa_for_all_defs (tree
*def
, enum tree_code opcode
, tree op
,
1288 vec
<gimple
*> &stmts_to_fix
)
1294 && TREE_CODE (*def
) == SSA_NAME
1295 && (stmt
= SSA_NAME_DEF_STMT (*def
))
1296 && gimple_code (stmt
) != GIMPLE_NOP
)
1297 *def
= make_new_ssa_for_def (stmt
, opcode
, op
);
1299 FOR_EACH_VEC_ELT (stmts_to_fix
, i
, stmt
)
1300 make_new_ssa_for_def (stmt
, opcode
, op
);
1303 /* Find the single immediate use of STMT's LHS, and replace it
1304 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1305 replace *DEF with OP as well. */
1308 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1313 gimple_stmt_iterator gsi
;
1315 if (is_gimple_call (stmt
))
1316 lhs
= gimple_call_lhs (stmt
);
1318 lhs
= gimple_assign_lhs (stmt
);
1320 gcc_assert (has_single_use (lhs
));
1321 single_imm_use (lhs
, &use
, &use_stmt
);
1325 if (TREE_CODE (op
) != SSA_NAME
)
1326 update_stmt (use_stmt
);
1327 gsi
= gsi_for_stmt (stmt
);
1328 unlink_stmt_vdef (stmt
);
1329 reassoc_remove_stmt (&gsi
);
1330 release_defs (stmt
);
1333 /* Walks the linear chain with result *DEF searching for an operation
1334 with operand OP and code OPCODE removing that from the chain. *DEF
1335 is updated if there is only one operand but no operation left. */
1338 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1340 tree orig_def
= *def
;
1341 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1342 /* PR72835 - Record the stmt chain that has to be updated such that
1343 we dont use the same LHS when the values computed are different. */
1344 auto_vec
<gimple
*, 64> stmts_to_fix
;
1350 if (opcode
== MULT_EXPR
)
1352 if (stmt_is_power_of_op (stmt
, op
))
1354 if (decrement_power (stmt
) == 1)
1356 if (stmts_to_fix
.length () > 0)
1357 stmts_to_fix
.pop ();
1358 propagate_op_to_single_use (op
, stmt
, def
);
1362 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
)
1364 if (gimple_assign_rhs1 (stmt
) == op
)
1366 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1367 if (stmts_to_fix
.length () > 0)
1368 stmts_to_fix
.pop ();
1369 propagate_op_to_single_use (cst
, stmt
, def
);
1372 else if (integer_minus_onep (op
)
1373 || real_minus_onep (op
))
1375 gimple_assign_set_rhs_code
1376 (stmt
, TREE_CODE (gimple_assign_rhs1 (stmt
)));
1382 name
= gimple_assign_rhs1 (stmt
);
1384 /* If this is the operation we look for and one of the operands
1385 is ours simply propagate the other operand into the stmts
1387 if (gimple_assign_rhs_code (stmt
) == opcode
1389 || gimple_assign_rhs2 (stmt
) == op
))
1392 name
= gimple_assign_rhs2 (stmt
);
1393 if (stmts_to_fix
.length () > 0)
1394 stmts_to_fix
.pop ();
1395 propagate_op_to_single_use (name
, stmt
, def
);
1399 /* We might have a multiply of two __builtin_pow* calls, and
1400 the operand might be hiding in the rightmost one. Likewise
1401 this can happen for a negate. */
1402 if (opcode
== MULT_EXPR
1403 && gimple_assign_rhs_code (stmt
) == opcode
1404 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1405 && has_single_use (gimple_assign_rhs2 (stmt
)))
1407 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1408 if (stmt_is_power_of_op (stmt2
, op
))
1410 if (decrement_power (stmt2
) == 1)
1411 propagate_op_to_single_use (op
, stmt2
, def
);
1413 stmts_to_fix
.safe_push (stmt2
);
1416 else if (is_gimple_assign (stmt2
)
1417 && gimple_assign_rhs_code (stmt2
) == NEGATE_EXPR
)
1419 if (gimple_assign_rhs1 (stmt2
) == op
)
1421 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1422 propagate_op_to_single_use (cst
, stmt2
, def
);
1425 else if (integer_minus_onep (op
)
1426 || real_minus_onep (op
))
1428 stmts_to_fix
.safe_push (stmt2
);
1429 gimple_assign_set_rhs_code
1430 (stmt2
, TREE_CODE (gimple_assign_rhs1 (stmt2
)));
1436 /* Continue walking the chain. */
1437 gcc_assert (name
!= op
1438 && TREE_CODE (name
) == SSA_NAME
);
1439 stmt
= SSA_NAME_DEF_STMT (name
);
1440 stmts_to_fix
.safe_push (stmt
);
1444 if (stmts_to_fix
.length () > 0 || *def
== orig_def
)
1445 make_new_ssa_for_all_defs (def
, opcode
, op
, stmts_to_fix
);
1448 /* Returns true if statement S1 dominates statement S2. Like
1449 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1452 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1454 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1456 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1457 SSA_NAME. Assume it lives at the beginning of function and
1458 thus dominates everything. */
1459 if (!bb1
|| s1
== s2
)
1462 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1468 /* PHIs in the same basic block are assumed to be
1469 executed all in parallel, if only one stmt is a PHI,
1470 it dominates the other stmt in the same basic block. */
1471 if (gimple_code (s1
) == GIMPLE_PHI
)
1474 if (gimple_code (s2
) == GIMPLE_PHI
)
1477 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1479 if (gimple_uid (s1
) < gimple_uid (s2
))
1482 if (gimple_uid (s1
) > gimple_uid (s2
))
1485 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1486 unsigned int uid
= gimple_uid (s1
);
1487 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1489 gimple
*s
= gsi_stmt (gsi
);
1490 if (gimple_uid (s
) != uid
)
1499 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1502 /* Insert STMT after INSERT_POINT. */
1505 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1507 gimple_stmt_iterator gsi
;
1510 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1511 bb
= gimple_bb (insert_point
);
1512 else if (!stmt_ends_bb_p (insert_point
))
1514 gsi
= gsi_for_stmt (insert_point
);
1515 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1516 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1519 else if (gimple_code (insert_point
) == GIMPLE_ASM
1520 && gimple_asm_nlabels (as_a
<gasm
*> (insert_point
)) != 0)
1521 /* We have no idea where to insert - it depends on where the
1522 uses will be placed. */
1525 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1526 thus if it must end a basic block, it should be a call that can
1527 throw, or some assignment that can throw. If it throws, the LHS
1528 of it will not be initialized though, so only valid places using
1529 the SSA_NAME should be dominated by the fallthru edge. */
1530 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1531 gsi
= gsi_after_labels (bb
);
1532 if (gsi_end_p (gsi
))
1534 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1535 gimple_set_uid (stmt
,
1536 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1539 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1540 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1543 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1544 the result. Places the statement after the definition of either
1545 OP1 or OP2. Returns the new statement. */
1548 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1550 gimple
*op1def
= NULL
, *op2def
= NULL
;
1551 gimple_stmt_iterator gsi
;
1555 /* Create the addition statement. */
1556 op
= make_ssa_name (type
);
1557 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1559 /* Find an insertion place and insert. */
1560 if (TREE_CODE (op1
) == SSA_NAME
)
1561 op1def
= SSA_NAME_DEF_STMT (op1
);
1562 if (TREE_CODE (op2
) == SSA_NAME
)
1563 op2def
= SSA_NAME_DEF_STMT (op2
);
1564 if ((!op1def
|| gimple_nop_p (op1def
))
1565 && (!op2def
|| gimple_nop_p (op2def
)))
1567 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1568 if (!gsi_end_p (gsi
)
1569 && is_gimple_call (gsi_stmt (gsi
))
1570 && (gimple_call_flags (gsi_stmt (gsi
)) & ECF_RETURNS_TWICE
))
1572 /* Don't add statements before a returns_twice call at the start
1574 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1575 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1577 if (gsi_end_p (gsi
))
1579 gimple_stmt_iterator gsi2
1580 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1581 gimple_set_uid (sum
,
1582 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1585 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1586 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1590 gimple
*insert_point
;
1591 if ((!op1def
|| gimple_nop_p (op1def
))
1592 || (op2def
&& !gimple_nop_p (op2def
)
1593 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1594 insert_point
= op2def
;
1596 insert_point
= op1def
;
1597 insert_stmt_after (sum
, insert_point
);
1604 /* Perform un-distribution of divisions and multiplications.
1605 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1606 to (A + B) / X for real X.
1608 The algorithm is organized as follows.
1610 - First we walk the addition chain *OPS looking for summands that
1611 are defined by a multiplication or a real division. This results
1612 in the candidates bitmap with relevant indices into *OPS.
1614 - Second we build the chains of multiplications or divisions for
1615 these candidates, counting the number of occurrences of (operand, code)
1616 pairs in all of the candidates chains.
1618 - Third we sort the (operand, code) pairs by number of occurrence and
1619 process them starting with the pair with the most uses.
1621 * For each such pair we walk the candidates again to build a
1622 second candidate bitmap noting all multiplication/division chains
1623 that have at least one occurrence of (operand, code).
1625 * We build an alternate addition chain only covering these
1626 candidates with one (operand, code) operation removed from their
1627 multiplication/division chain.
1629 * The first candidate gets replaced by the alternate addition chain
1630 multiplied/divided by the operand.
1632 * All candidate chains get disabled for further processing and
1633 processing of (operand, code) pairs continues.
1635 The alternate addition chains built are re-processed by the main
1636 reassociation algorithm which allows optimizing a * x * y + b * y * x
1637 to (a + b ) * x * y in one invocation of the reassociation pass. */
1640 undistribute_ops_list (enum tree_code opcode
,
1641 vec
<operand_entry
*> *ops
, class loop
*loop
)
1643 unsigned int length
= ops
->length ();
1646 unsigned nr_candidates
, nr_candidates2
;
1647 sbitmap_iterator sbi0
;
1648 vec
<operand_entry
*> *subops
;
1649 bool changed
= false;
1650 unsigned int next_oecount_id
= 0;
1653 || opcode
!= PLUS_EXPR
)
1656 /* Build a list of candidates to process. */
1657 auto_sbitmap
candidates (length
);
1658 bitmap_clear (candidates
);
1660 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1662 enum tree_code dcode
;
1665 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1667 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1668 if (!is_gimple_assign (oe1def
))
1670 dcode
= gimple_assign_rhs_code (oe1def
);
1671 if ((dcode
!= MULT_EXPR
1672 && dcode
!= RDIV_EXPR
)
1673 || !is_reassociable_op (oe1def
, dcode
, loop
))
1676 bitmap_set_bit (candidates
, i
);
1680 if (nr_candidates
< 2)
1683 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1685 fprintf (dump_file
, "searching for un-distribute opportunities ");
1686 print_generic_expr (dump_file
,
1687 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, TDF_NONE
);
1688 fprintf (dump_file
, " %d\n", nr_candidates
);
1691 /* Build linearized sub-operand lists and the counting table. */
1694 hash_table
<oecount_hasher
> ctable (15);
1696 /* ??? Macro arguments cannot have multi-argument template types in
1697 them. This typedef is needed to workaround that limitation. */
1698 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1699 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1700 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1703 enum tree_code oecode
;
1706 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1707 oecode
= gimple_assign_rhs_code (oedef
);
1708 linearize_expr_tree (&subops
[i
], oedef
,
1709 associative_tree_code (oecode
), false);
1711 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1718 c
.id
= next_oecount_id
++;
1721 idx
= cvec
.length () + 41;
1722 slot
= ctable
.find_slot (idx
, INSERT
);
1730 cvec
[*slot
- 42].cnt
++;
1735 /* Sort the counting table. */
1736 cvec
.qsort (oecount_cmp
);
1738 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1741 fprintf (dump_file
, "Candidates:\n");
1742 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1744 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1745 c
->oecode
== MULT_EXPR
1746 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1747 print_generic_expr (dump_file
, c
->op
);
1748 fprintf (dump_file
, "\n");
1752 /* Process the (operand, code) pairs in order of most occurrence. */
1753 auto_sbitmap
candidates2 (length
);
1754 while (!cvec
.is_empty ())
1756 oecount
*c
= &cvec
.last ();
1760 /* Now collect the operands in the outer chain that contain
1761 the common operand in their inner chain. */
1762 bitmap_clear (candidates2
);
1764 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1767 enum tree_code oecode
;
1769 tree op
= (*ops
)[i
]->op
;
1771 /* If we undistributed in this chain already this may be
1773 if (TREE_CODE (op
) != SSA_NAME
)
1776 oedef
= SSA_NAME_DEF_STMT (op
);
1777 oecode
= gimple_assign_rhs_code (oedef
);
1778 if (oecode
!= c
->oecode
)
1781 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1783 if (oe1
->op
== c
->op
)
1785 bitmap_set_bit (candidates2
, i
);
1792 if (nr_candidates2
>= 2)
1794 operand_entry
*oe1
, *oe2
;
1796 int first
= bitmap_first_set_bit (candidates2
);
1798 /* Build the new addition chain. */
1799 oe1
= (*ops
)[first
];
1800 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1802 fprintf (dump_file
, "Building (");
1803 print_generic_expr (dump_file
, oe1
->op
);
1805 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1806 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1810 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1812 fprintf (dump_file
, " + ");
1813 print_generic_expr (dump_file
, oe2
->op
);
1815 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1816 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1817 oe1
->op
, oe2
->op
, opcode
);
1818 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1820 oe1
->op
= gimple_get_lhs (sum
);
1823 /* Apply the multiplication/division. */
1824 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1825 oe1
->op
, c
->op
, c
->oecode
);
1826 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1828 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1829 print_generic_expr (dump_file
, c
->op
);
1830 fprintf (dump_file
, "\n");
1833 /* Record it in the addition chain and disable further
1834 undistribution with this op. */
1835 oe1
->op
= gimple_assign_lhs (prod
);
1836 oe1
->rank
= get_rank (oe1
->op
);
1837 subops
[first
].release ();
1845 for (i
= 0; i
< ops
->length (); ++i
)
1846 subops
[i
].release ();
1853 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1854 first: element index for each relevant BIT_FIELD_REF.
1855 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1856 typedef std::pair
<unsigned, unsigned> v_info_elem
;
1859 auto_vec
<v_info_elem
, 32> vec
;
1861 typedef v_info
*v_info_ptr
;
1863 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1865 sort_by_mach_mode (const void *p_i
, const void *p_j
)
1867 const tree tr1
= *((const tree
*) p_i
);
1868 const tree tr2
= *((const tree
*) p_j
);
1869 unsigned int mode1
= TYPE_MODE (TREE_TYPE (tr1
));
1870 unsigned int mode2
= TYPE_MODE (TREE_TYPE (tr2
));
1873 else if (mode1
< mode2
)
1875 if (SSA_NAME_VERSION (tr1
) < SSA_NAME_VERSION (tr2
))
1877 else if (SSA_NAME_VERSION (tr1
) > SSA_NAME_VERSION (tr2
))
1882 /* Cleanup hash map for VECTOR information. */
1884 cleanup_vinfo_map (hash_map
<tree
, v_info_ptr
> &info_map
)
1886 for (hash_map
<tree
, v_info_ptr
>::iterator it
= info_map
.begin ();
1887 it
!= info_map
.end (); ++it
)
1889 v_info_ptr info
= (*it
).second
;
1891 (*it
).second
= NULL
;
1895 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1896 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1898 Vs = (V1 + V2 + ... + Vn)
1899 Vs[0] + Vs[1] + ... + Vs[k]
1901 The basic steps are listed below:
1903 1) Check the addition chain *OPS by looking those summands coming from
1904 VECTOR bit_field_ref on VECTOR type. Put the information into
1905 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1907 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1908 continuous, they can cover the whole VECTOR perfectly without any holes.
1909 Obtain one VECTOR list which contain candidates to be transformed.
1911 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1912 candidates with same mode, build the addition statements for them and
1913 generate BIT_FIELD_REFs accordingly.
1916 The current implementation requires the whole VECTORs should be fully
1917 covered, but it can be extended to support partial, checking adjacent
1918 but not fill the whole, it may need some cost model to define the
1919 boundary to do or not.
1922 undistribute_bitref_for_vector (enum tree_code opcode
,
1923 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1925 if (ops
->length () <= 1)
1928 if (opcode
!= PLUS_EXPR
1929 && opcode
!= MULT_EXPR
1930 && opcode
!= BIT_XOR_EXPR
1931 && opcode
!= BIT_IOR_EXPR
1932 && opcode
!= BIT_AND_EXPR
)
1935 hash_map
<tree
, v_info_ptr
> v_info_map
;
1939 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1940 information into map. */
1941 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1943 enum tree_code dcode
;
1946 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1948 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1949 if (!is_gimple_assign (oe1def
))
1951 dcode
= gimple_assign_rhs_code (oe1def
);
1952 if (dcode
!= BIT_FIELD_REF
|| !is_reassociable_op (oe1def
, dcode
, loop
))
1955 tree rhs
= gimple_assign_rhs1 (oe1def
);
1956 tree vec
= TREE_OPERAND (rhs
, 0);
1957 tree vec_type
= TREE_TYPE (vec
);
1959 if (TREE_CODE (vec
) != SSA_NAME
|| !VECTOR_TYPE_P (vec_type
))
1962 /* Ignore it if target machine can't support this VECTOR type. */
1963 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1966 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1967 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
1970 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
1971 || !is_a
<scalar_mode
> (TYPE_MODE (TREE_TYPE (rhs
))))
1974 /* The type of BIT_FIELD_REF might not be equal to the element type of
1975 the vector. We want to use a vector type with element type the
1976 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1977 if (!useless_type_conversion_p (TREE_TYPE (rhs
), TREE_TYPE (vec_type
)))
1979 machine_mode simd_mode
;
1980 unsigned HOST_WIDE_INT size
, nunits
;
1981 unsigned HOST_WIDE_INT elem_size
1982 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs
)));
1983 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type
)).is_constant (&size
))
1985 if (size
<= elem_size
|| (size
% elem_size
) != 0)
1987 nunits
= size
/ elem_size
;
1988 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs
)),
1989 nunits
).exists (&simd_mode
))
1991 vec_type
= build_vector_type_for_mode (TREE_TYPE (rhs
), simd_mode
);
1993 /* Ignore it if target machine can't support this VECTOR type. */
1994 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1997 /* Check const vector type, constrain BIT_FIELD_REF offset and
1999 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
2002 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type
)),
2003 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec
)))))
2007 tree elem_type
= TREE_TYPE (vec_type
);
2008 unsigned HOST_WIDE_INT elem_size
= tree_to_uhwi (TYPE_SIZE (elem_type
));
2009 if (maybe_ne (bit_field_size (rhs
), elem_size
))
2013 if (!constant_multiple_p (bit_field_offset (rhs
), elem_size
, &idx
))
2016 /* Ignore it if target machine can't support this type of VECTOR
2018 optab op_tab
= optab_for_tree_code (opcode
, vec_type
, optab_vector
);
2019 if (optab_handler (op_tab
, TYPE_MODE (vec_type
)) == CODE_FOR_nothing
)
2023 v_info_ptr
&info
= v_info_map
.get_or_insert (vec
, &existed
);
2027 info
->vec_type
= vec_type
;
2029 else if (!types_compatible_p (vec_type
, info
->vec_type
))
2031 info
->vec
.safe_push (std::make_pair (idx
, i
));
2034 /* At least two VECTOR to combine. */
2035 if (v_info_map
.elements () <= 1)
2037 cleanup_vinfo_map (v_info_map
);
2041 /* Verify all VECTOR candidates by checking two conditions:
2042 1) sorted offsets are adjacent, no holes.
2043 2) can fill the whole VECTOR perfectly.
2044 And add the valid candidates to a vector for further handling. */
2045 auto_vec
<tree
> valid_vecs (v_info_map
.elements ());
2046 for (hash_map
<tree
, v_info_ptr
>::iterator it
= v_info_map
.begin ();
2047 it
!= v_info_map
.end (); ++it
)
2049 tree cand_vec
= (*it
).first
;
2050 v_info_ptr cand_info
= (*it
).second
;
2051 unsigned int num_elems
2052 = TYPE_VECTOR_SUBPARTS (cand_info
->vec_type
).to_constant ();
2053 if (cand_info
->vec
.length () != num_elems
)
2055 sbitmap holes
= sbitmap_alloc (num_elems
);
2056 bitmap_ones (holes
);
2059 FOR_EACH_VEC_ELT (cand_info
->vec
, i
, curr
)
2061 if (!bitmap_bit_p (holes
, curr
->first
))
2067 bitmap_clear_bit (holes
, curr
->first
);
2069 if (valid
&& bitmap_empty_p (holes
))
2070 valid_vecs
.quick_push (cand_vec
);
2071 sbitmap_free (holes
);
2074 /* At least two VECTOR to combine. */
2075 if (valid_vecs
.length () <= 1)
2077 cleanup_vinfo_map (v_info_map
);
2081 valid_vecs
.qsort (sort_by_mach_mode
);
2082 /* Go through all candidates by machine mode order, query the mode_to_total
2083 to get the total number for each mode and skip the single one. */
2084 for (unsigned i
= 0; i
< valid_vecs
.length () - 1; ++i
)
2086 tree tvec
= valid_vecs
[i
];
2087 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (tvec
));
2089 /* Skip modes with only a single candidate. */
2090 if (TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) != mode
)
2093 unsigned int idx
, j
;
2095 tree sum_vec
= tvec
;
2096 v_info_ptr info_ptr
= *(v_info_map
.get (tvec
));
2098 tree vec_type
= info_ptr
->vec_type
;
2100 /* Build the sum for all candidates with same mode. */
2103 sum
= build_and_add_sum (vec_type
, sum_vec
,
2104 valid_vecs
[i
+ 1], opcode
);
2105 /* Update the operands only after build_and_add_sum,
2106 so that we don't have to repeat the placement algorithm
2107 of build_and_add_sum. */
2109 && !useless_type_conversion_p (vec_type
, TREE_TYPE (sum_vec
)))
2111 gimple_stmt_iterator gsi
= gsi_for_stmt (sum
);
2112 tree vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
, sum_vec
);
2113 tree lhs
= make_ssa_name (vec_type
);
2114 gimple
*g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2115 gimple_set_uid (g
, gimple_uid (sum
));
2116 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2117 gimple_assign_set_rhs1 (sum
, lhs
);
2120 if (!useless_type_conversion_p (vec_type
,
2121 TREE_TYPE (valid_vecs
[i
+ 1])))
2123 gimple_stmt_iterator gsi
= gsi_for_stmt (sum
);
2124 tree vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
,
2126 tree lhs
= make_ssa_name (vec_type
);
2127 gimple
*g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2128 gimple_set_uid (g
, gimple_uid (sum
));
2129 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2130 gimple_assign_set_rhs2 (sum
, lhs
);
2133 sum_vec
= gimple_get_lhs (sum
);
2134 info_ptr
= *(v_info_map
.get (valid_vecs
[i
+ 1]));
2135 gcc_assert (types_compatible_p (vec_type
, info_ptr
->vec_type
));
2136 /* Update those related ops of current candidate VECTOR. */
2137 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2140 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2141 /* Set this then op definition will get DCEd later. */
2142 gimple_set_visited (def
, true);
2143 if (opcode
== PLUS_EXPR
2144 || opcode
== BIT_XOR_EXPR
2145 || opcode
== BIT_IOR_EXPR
)
2146 (*ops
)[idx
]->op
= build_zero_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2147 else if (opcode
== MULT_EXPR
)
2148 (*ops
)[idx
]->op
= build_one_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2151 gcc_assert (opcode
== BIT_AND_EXPR
);
2153 = build_all_ones_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2155 (*ops
)[idx
]->rank
= 0;
2157 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2159 fprintf (dump_file
, "Generating addition -> ");
2160 print_gimple_stmt (dump_file
, sum
, 0);
2164 while ((i
< valid_vecs
.length () - 1)
2165 && TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) == mode
);
2167 /* Referring to first valid VECTOR with this mode, generate the
2168 BIT_FIELD_REF statements accordingly. */
2169 info_ptr
= *(v_info_map
.get (tvec
));
2171 tree elem_type
= TREE_TYPE (vec_type
);
2172 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2175 tree dst
= make_ssa_name (elem_type
);
2176 tree pos
= bitsize_int (elem
->first
2177 * tree_to_uhwi (TYPE_SIZE (elem_type
)));
2178 tree bfr
= build3 (BIT_FIELD_REF
, elem_type
, sum_vec
,
2179 TYPE_SIZE (elem_type
), pos
);
2180 gimple
*gs
= gimple_build_assign (dst
, BIT_FIELD_REF
, bfr
);
2181 insert_stmt_after (gs
, sum
);
2182 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2183 /* Set this then op definition will get DCEd later. */
2184 gimple_set_visited (def
, true);
2185 (*ops
)[idx
]->op
= gimple_assign_lhs (gs
);
2186 (*ops
)[idx
]->rank
= get_rank ((*ops
)[idx
]->op
);
2187 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2189 fprintf (dump_file
, "Generating bit_field_ref -> ");
2190 print_gimple_stmt (dump_file
, gs
, 0);
2195 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2196 fprintf (dump_file
, "undistributiong bit_field_ref for vector done.\n");
2198 cleanup_vinfo_map (v_info_map
);
2203 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2204 expression, examine the other OPS to see if any of them are comparisons
2205 of the same values, which we may be able to combine or eliminate.
2206 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2209 eliminate_redundant_comparison (enum tree_code opcode
,
2210 vec
<operand_entry
*> *ops
,
2211 unsigned int currindex
,
2212 operand_entry
*curr
)
2215 enum tree_code lcode
, rcode
;
2216 gimple
*def1
, *def2
;
2220 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
2223 /* Check that CURR is a comparison. */
2224 if (TREE_CODE (curr
->op
) != SSA_NAME
)
2226 def1
= SSA_NAME_DEF_STMT (curr
->op
);
2227 if (!is_gimple_assign (def1
))
2229 lcode
= gimple_assign_rhs_code (def1
);
2230 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
2232 op1
= gimple_assign_rhs1 (def1
);
2233 op2
= gimple_assign_rhs2 (def1
);
2235 /* Now look for a similar comparison in the remaining OPS. */
2236 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
2240 if (TREE_CODE (oe
->op
) != SSA_NAME
)
2242 def2
= SSA_NAME_DEF_STMT (oe
->op
);
2243 if (!is_gimple_assign (def2
))
2245 rcode
= gimple_assign_rhs_code (def2
);
2246 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
2249 /* If we got here, we have a match. See if we can combine the
2251 tree type
= TREE_TYPE (gimple_assign_lhs (def1
));
2252 if (opcode
== BIT_IOR_EXPR
)
2253 t
= maybe_fold_or_comparisons (type
,
2255 rcode
, gimple_assign_rhs1 (def2
),
2256 gimple_assign_rhs2 (def2
));
2258 t
= maybe_fold_and_comparisons (type
,
2260 rcode
, gimple_assign_rhs1 (def2
),
2261 gimple_assign_rhs2 (def2
));
2265 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2266 always give us a boolean_type_node value back. If the original
2267 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2268 we need to convert. */
2269 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
2271 if (!fold_convertible_p (TREE_TYPE (curr
->op
), t
))
2273 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
2276 if (TREE_CODE (t
) != INTEGER_CST
2277 && !operand_equal_p (t
, curr
->op
, 0))
2279 enum tree_code subcode
;
2280 tree newop1
, newop2
;
2281 if (!COMPARISON_CLASS_P (t
))
2283 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2284 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2285 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2286 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
2288 if (lcode
== TREE_CODE (t
)
2289 && operand_equal_p (op1
, newop1
, 0)
2290 && operand_equal_p (op2
, newop2
, 0))
2292 else if ((TREE_CODE (newop1
) == SSA_NAME
2293 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1
))
2294 || (TREE_CODE (newop2
) == SSA_NAME
2295 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2
)))
2299 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2301 fprintf (dump_file
, "Equivalence: ");
2302 print_generic_expr (dump_file
, curr
->op
);
2303 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
2304 print_generic_expr (dump_file
, oe
->op
);
2305 fprintf (dump_file
, " -> ");
2306 print_generic_expr (dump_file
, t
);
2307 fprintf (dump_file
, "\n");
2310 /* Now we can delete oe, as it has been subsumed by the new combined
2312 ops
->ordered_remove (i
);
2313 reassociate_stats
.ops_eliminated
++;
2315 /* If t is the same as curr->op, we're done. Otherwise we must
2316 replace curr->op with t. Special case is if we got a constant
2317 back, in which case we add it to the end instead of in place of
2318 the current entry. */
2319 if (TREE_CODE (t
) == INTEGER_CST
)
2321 ops
->ordered_remove (currindex
);
2322 add_to_ops_vec (ops
, t
);
2324 else if (!operand_equal_p (t
, curr
->op
, 0))
2327 enum tree_code subcode
;
2330 gcc_assert (COMPARISON_CLASS_P (t
));
2331 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2332 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2333 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2334 gcc_checking_assert (is_gimple_val (newop1
)
2335 && is_gimple_val (newop2
));
2336 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
2337 curr
->op
= gimple_get_lhs (sum
);
2346 /* Transform repeated addition of same values into multiply with
2349 transform_add_to_multiply (vec
<operand_entry
*> *ops
)
2352 tree op
= NULL_TREE
;
2354 int i
, start
= -1, end
= 0, count
= 0;
2355 auto_vec
<std::pair
<int, int> > indxs
;
2356 bool changed
= false;
2358 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2359 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2360 || !flag_unsafe_math_optimizations
))
2363 /* Look for repeated operands. */
2364 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2372 else if (operand_equal_p (oe
->op
, op
, 0))
2380 indxs
.safe_push (std::make_pair (start
, end
));
2388 indxs
.safe_push (std::make_pair (start
, end
));
2390 for (j
= indxs
.length () - 1; j
>= 0; --j
)
2392 /* Convert repeated operand addition to multiplication. */
2393 start
= indxs
[j
].first
;
2394 end
= indxs
[j
].second
;
2395 op
= (*ops
)[start
]->op
;
2396 count
= end
- start
+ 1;
2397 for (i
= end
; i
>= start
; --i
)
2398 ops
->unordered_remove (i
);
2399 tree tmp
= make_ssa_name (TREE_TYPE (op
));
2400 tree cst
= build_int_cst (integer_type_node
, count
);
2402 = gimple_build_assign (tmp
, MULT_EXPR
,
2403 op
, fold_convert (TREE_TYPE (op
), cst
));
2404 gimple_set_visited (mul_stmt
, true);
2405 add_to_ops_vec (ops
, tmp
, mul_stmt
);
2413 /* Perform various identities and other optimizations on the list of
2414 operand entries, stored in OPS. The tree code for the binary
2415 operation between all the operands is OPCODE. */
2418 optimize_ops_list (enum tree_code opcode
,
2419 vec
<operand_entry
*> *ops
)
2421 unsigned int length
= ops
->length ();
2424 operand_entry
*oelast
= NULL
;
2425 bool iterate
= false;
2430 oelast
= ops
->last ();
2432 /* If the last two are constants, pop the constants off, merge them
2433 and try the next two. */
2434 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
2436 operand_entry
*oelm1
= (*ops
)[length
- 2];
2438 if (oelm1
->rank
== 0
2439 && is_gimple_min_invariant (oelm1
->op
)
2440 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
2441 TREE_TYPE (oelast
->op
)))
2443 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
2444 oelm1
->op
, oelast
->op
);
2446 if (folded
&& is_gimple_min_invariant (folded
))
2448 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2449 fprintf (dump_file
, "Merging constants\n");
2454 add_to_ops_vec (ops
, folded
);
2455 reassociate_stats
.constants_eliminated
++;
2457 optimize_ops_list (opcode
, ops
);
2463 eliminate_using_constants (opcode
, ops
);
2466 for (i
= 0; ops
->iterate (i
, &oe
);)
2470 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
2472 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
2473 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
2474 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
2487 optimize_ops_list (opcode
, ops
);
2490 /* The following functions are subroutines to optimize_range_tests and allow
2491 it to try to change a logical combination of comparisons into a range
2495 X == 2 || X == 5 || X == 3 || X == 4
2499 (unsigned) (X - 2) <= 3
2501 For more information see comments above fold_test_range in fold-const.cc,
2502 this implementation is for GIMPLE. */
2506 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2509 dump_range_entry (FILE *file
, struct range_entry
*r
, bool skip_exp
)
2512 print_generic_expr (file
, r
->exp
);
2513 fprintf (file
, " %c[", r
->in_p
? '+' : '-');
2514 print_generic_expr (file
, r
->low
);
2516 print_generic_expr (file
, r
->high
);
2520 /* Dump the range entry R to STDERR. */
2523 debug_range_entry (struct range_entry
*r
)
2525 dump_range_entry (stderr
, r
, false);
2526 fputc ('\n', stderr
);
2529 /* This is similar to make_range in fold-const.cc, but on top of
2530 GIMPLE instead of trees. If EXP is non-NULL, it should be
2531 an SSA_NAME and STMT argument is ignored, otherwise STMT
2532 argument should be a GIMPLE_COND. */
2535 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
2539 bool is_bool
, strict_overflow_p
;
2543 r
->strict_overflow_p
= false;
2545 r
->high
= NULL_TREE
;
2546 if (exp
!= NULL_TREE
2547 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
2550 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2551 and see if we can refine the range. Some of the cases below may not
2552 happen, but it doesn't seem worth worrying about this. We "continue"
2553 the outer loop when we've changed something; otherwise we "break"
2554 the switch, which will "break" the while. */
2555 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
2558 strict_overflow_p
= false;
2560 if (exp
== NULL_TREE
)
2562 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
2564 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
2569 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
2574 enum tree_code code
;
2575 tree arg0
, arg1
, exp_type
;
2579 if (exp
!= NULL_TREE
)
2581 if (TREE_CODE (exp
) != SSA_NAME
2582 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
2585 stmt
= SSA_NAME_DEF_STMT (exp
);
2586 if (!is_gimple_assign (stmt
))
2589 code
= gimple_assign_rhs_code (stmt
);
2590 arg0
= gimple_assign_rhs1 (stmt
);
2591 arg1
= gimple_assign_rhs2 (stmt
);
2592 exp_type
= TREE_TYPE (exp
);
2596 code
= gimple_cond_code (stmt
);
2597 arg0
= gimple_cond_lhs (stmt
);
2598 arg1
= gimple_cond_rhs (stmt
);
2599 exp_type
= boolean_type_node
;
2602 if (TREE_CODE (arg0
) != SSA_NAME
2603 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0
))
2605 loc
= gimple_location (stmt
);
2609 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
2610 /* Ensure the range is either +[-,0], +[0,0],
2611 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2612 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2613 or similar expression of unconditional true or
2614 false, it should not be negated. */
2615 && ((high
&& integer_zerop (high
))
2616 || (low
&& integer_onep (low
))))
2629 if ((TYPE_PRECISION (exp_type
) == 1
2630 || TREE_CODE (exp_type
) == BOOLEAN_TYPE
)
2631 && TYPE_PRECISION (TREE_TYPE (arg0
)) > 1)
2634 else if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
2636 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
2641 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2656 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2658 &strict_overflow_p
);
2659 if (nexp
!= NULL_TREE
)
2662 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2675 r
->strict_overflow_p
= strict_overflow_p
;
2679 /* Comparison function for qsort. Sort entries
2680 without SSA_NAME exp first, then with SSA_NAMEs sorted
2681 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2682 by increasing ->low and if ->low is the same, by increasing
2683 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2687 range_entry_cmp (const void *a
, const void *b
)
2689 const struct range_entry
*p
= (const struct range_entry
*) a
;
2690 const struct range_entry
*q
= (const struct range_entry
*) b
;
2692 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2694 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2696 /* Group range_entries for the same SSA_NAME together. */
2697 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2699 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2701 /* If ->low is different, NULL low goes first, then by
2703 if (p
->low
!= NULL_TREE
)
2705 if (q
->low
!= NULL_TREE
)
2707 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2709 if (tem
&& integer_onep (tem
))
2711 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2713 if (tem
&& integer_onep (tem
))
2719 else if (q
->low
!= NULL_TREE
)
2721 /* If ->high is different, NULL high goes last, before that by
2723 if (p
->high
!= NULL_TREE
)
2725 if (q
->high
!= NULL_TREE
)
2727 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2729 if (tem
&& integer_onep (tem
))
2731 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2733 if (tem
&& integer_onep (tem
))
2739 else if (q
->high
!= NULL_TREE
)
2741 /* If both ranges are the same, sort below by ascending idx. */
2746 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2749 if (p
->idx
< q
->idx
)
2753 gcc_checking_assert (p
->idx
> q
->idx
);
2758 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2759 insert needed statements BEFORE or after GSI. */
2762 force_into_ssa_name (gimple_stmt_iterator
*gsi
, tree expr
, bool before
)
2764 enum gsi_iterator_update m
= before
? GSI_SAME_STMT
: GSI_CONTINUE_LINKING
;
2765 tree ret
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
, before
, m
);
2766 if (TREE_CODE (ret
) != SSA_NAME
)
2768 gimple
*g
= gimple_build_assign (make_ssa_name (TREE_TYPE (ret
)), ret
);
2770 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2772 gsi_insert_after (gsi
, g
, GSI_CONTINUE_LINKING
);
2773 ret
= gimple_assign_lhs (g
);
2778 /* Helper routine of optimize_range_test.
2779 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2780 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2781 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2782 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2783 an array of COUNT pointers to other ranges. Return
2784 true if the range merge has been successful.
2785 If OPCODE is ERROR_MARK, this is called from within
2786 maybe_optimize_range_tests and is performing inter-bb range optimization.
2787 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2791 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2792 struct range_entry
**otherrangep
,
2793 unsigned int count
, enum tree_code opcode
,
2794 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2795 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2797 unsigned int idx
= range
->idx
;
2798 struct range_entry
*swap_with
= NULL
;
2799 basic_block rewrite_bb_first
= NULL
, rewrite_bb_last
= NULL
;
2800 if (opcode
== ERROR_MARK
)
2802 /* For inter-bb range test optimization, pick from the range tests
2803 the one which is tested in the earliest condition (one dominating
2804 the others), because otherwise there could be some UB (e.g. signed
2805 overflow) in following bbs that we'd expose which wasn't there in
2806 the original program. See PR104196. */
2807 basic_block orig_range_bb
= BASIC_BLOCK_FOR_FN (cfun
, (*ops
)[idx
]->id
);
2808 basic_block range_bb
= orig_range_bb
;
2809 for (unsigned int i
= 0; i
< count
; i
++)
2811 struct range_entry
*this_range
;
2813 this_range
= otherrange
+ i
;
2815 this_range
= otherrangep
[i
];
2816 operand_entry
*oe
= (*ops
)[this_range
->idx
];
2817 basic_block this_bb
= BASIC_BLOCK_FOR_FN (cfun
, oe
->id
);
2818 if (range_bb
!= this_bb
2819 && dominated_by_p (CDI_DOMINATORS
, range_bb
, this_bb
))
2821 swap_with
= this_range
;
2823 idx
= this_range
->idx
;
2826 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2827 only defined in later blocks. In this case we can't move the
2828 merged comparison earlier, so instead check if there are any stmts
2829 that might trigger signed integer overflow in between and rewrite
2830 them. But only after we check if the optimization is possible. */
2831 if (seq
&& swap_with
)
2833 rewrite_bb_first
= range_bb
;
2834 rewrite_bb_last
= orig_range_bb
;
2839 operand_entry
*oe
= (*ops
)[idx
];
2841 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2842 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2843 location_t loc
= gimple_location (stmt
);
2844 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2845 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2847 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2848 gimple_stmt_iterator gsi
;
2849 unsigned int i
, uid
;
2851 if (tem
== NULL_TREE
)
2854 /* If op is default def SSA_NAME, there is no place to insert the
2855 new comparison. Give up, unless we can use OP itself as the
2857 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2859 if (op
== range
->exp
2860 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2861 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2863 || (TREE_CODE (tem
) == EQ_EXPR
2864 && TREE_OPERAND (tem
, 0) == op
2865 && integer_onep (TREE_OPERAND (tem
, 1))))
2866 && opcode
!= BIT_IOR_EXPR
2867 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2877 std::swap (range
->idx
, swap_with
->idx
);
2879 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2880 warning_at (loc
, OPT_Wstrict_overflow
,
2881 "assuming signed overflow does not occur "
2882 "when simplifying range test");
2884 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2886 struct range_entry
*r
;
2887 fprintf (dump_file
, "Optimizing range tests ");
2888 dump_range_entry (dump_file
, range
, false);
2889 for (i
= 0; i
< count
; i
++)
2896 && r
->exp
!= range
->exp
2897 && TREE_CODE (r
->exp
) == SSA_NAME
)
2899 fprintf (dump_file
, " and ");
2900 dump_range_entry (dump_file
, r
, false);
2904 fprintf (dump_file
, " and");
2905 dump_range_entry (dump_file
, r
, true);
2908 fprintf (dump_file
, "\n into ");
2909 print_generic_expr (dump_file
, tem
);
2910 fprintf (dump_file
, "\n");
2913 /* In inter-bb range optimization mode, if we have a seq, we can't
2914 move the merged comparison to the earliest bb from the comparisons
2915 being replaced, so instead rewrite stmts that could trigger signed
2916 integer overflow. */
2917 for (basic_block bb
= rewrite_bb_last
;
2918 bb
!= rewrite_bb_first
; bb
= single_pred (bb
))
2919 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
);
2920 !gsi_end_p (gsi
); gsi_next (&gsi
))
2922 gimple
*stmt
= gsi_stmt (gsi
);
2923 if (is_gimple_assign (stmt
))
2924 if (tree lhs
= gimple_assign_lhs (stmt
))
2925 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2926 || POINTER_TYPE_P (TREE_TYPE (lhs
)))
2927 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
)))
2929 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2930 if (arith_code_with_undefined_signed_overflow (code
))
2932 gimple_stmt_iterator gsip
= gsi
;
2933 gimple_stmt_iterator gsin
= gsi
;
2936 rewrite_to_defined_overflow (&gsi
);
2937 unsigned uid
= gimple_uid (stmt
);
2938 if (gsi_end_p (gsip
))
2939 gsip
= gsi_after_labels (bb
);
2942 for (; gsi_stmt (gsip
) != gsi_stmt (gsin
);
2944 gimple_set_uid (gsi_stmt (gsip
), uid
);
2949 if (opcode
== BIT_IOR_EXPR
2950 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2951 tem
= invert_truthvalue_loc (loc
, tem
);
2953 tem
= fold_convert_loc (loc
, optype
, tem
);
2956 gsi
= gsi_for_stmt (stmt
);
2957 uid
= gimple_uid (stmt
);
2965 gcc_checking_assert (tem
== op
);
2966 /* In rare cases range->exp can be equal to lhs of stmt.
2967 In that case we have to insert after the stmt rather then before
2968 it. If stmt is a PHI, insert it at the start of the basic block. */
2969 else if (op
!= range
->exp
)
2971 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2972 tem
= force_into_ssa_name (&gsi
, tem
, true);
2975 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2977 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2978 tem
= force_into_ssa_name (&gsi
, tem
, false);
2982 gsi
= gsi_after_labels (gimple_bb (stmt
));
2983 if (!gsi_end_p (gsi
))
2984 uid
= gimple_uid (gsi_stmt (gsi
));
2987 gsi
= gsi_start_bb (gimple_bb (stmt
));
2989 while (!gsi_end_p (gsi
))
2991 uid
= gimple_uid (gsi_stmt (gsi
));
2995 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2996 tem
= force_into_ssa_name (&gsi
, tem
, true);
2997 if (gsi_end_p (gsi
))
2998 gsi
= gsi_last_bb (gimple_bb (stmt
));
3002 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
3003 if (gimple_uid (gsi_stmt (gsi
)))
3006 gimple_set_uid (gsi_stmt (gsi
), uid
);
3013 range
->strict_overflow_p
= false;
3015 for (i
= 0; i
< count
; i
++)
3018 range
= otherrange
+ i
;
3020 range
= otherrangep
[i
];
3021 oe
= (*ops
)[range
->idx
];
3022 /* Now change all the other range test immediate uses, so that
3023 those tests will be optimized away. */
3024 if (opcode
== ERROR_MARK
)
3027 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
3028 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
3030 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
3031 ? boolean_false_node
: boolean_true_node
);
3034 oe
->op
= error_mark_node
;
3035 range
->exp
= NULL_TREE
;
3036 range
->low
= NULL_TREE
;
3037 range
->high
= NULL_TREE
;
3042 /* Optimize X == CST1 || X == CST2
3043 if popcount (CST1 ^ CST2) == 1 into
3044 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3045 Similarly for ranges. E.g.
3046 X != 2 && X != 3 && X != 10 && X != 11
3047 will be transformed by the previous optimization into
3048 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3049 and this loop can transform that into
3050 !(((X & ~8) - 2U) <= 1U). */
3053 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
3054 tree lowi
, tree lowj
, tree highi
, tree highj
,
3055 vec
<operand_entry
*> *ops
,
3056 struct range_entry
*rangei
,
3057 struct range_entry
*rangej
)
3059 tree lowxor
, highxor
, tem
, exp
;
3060 /* Check lowi ^ lowj == highi ^ highj and
3061 popcount (lowi ^ lowj) == 1. */
3062 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
3063 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
3065 if (!integer_pow2p (lowxor
))
3067 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
3068 if (!tree_int_cst_equal (lowxor
, highxor
))
3072 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
3073 int prec
= GET_MODE_PRECISION (mode
);
3074 if (TYPE_PRECISION (type
) < prec
3075 || (wi::to_wide (TYPE_MIN_VALUE (type
))
3076 != wi::min_value (prec
, TYPE_SIGN (type
)))
3077 || (wi::to_wide (TYPE_MAX_VALUE (type
))
3078 != wi::max_value (prec
, TYPE_SIGN (type
))))
3080 type
= build_nonstandard_integer_type (prec
, TYPE_UNSIGNED (type
));
3081 exp
= fold_convert (type
, exp
);
3082 lowxor
= fold_convert (type
, lowxor
);
3083 lowi
= fold_convert (type
, lowi
);
3084 highi
= fold_convert (type
, highi
);
3086 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
3087 exp
= fold_build2 (BIT_AND_EXPR
, type
, exp
, tem
);
3088 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
3089 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
3090 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
3091 NULL
, rangei
->in_p
, lowj
, highj
,
3092 rangei
->strict_overflow_p
3093 || rangej
->strict_overflow_p
))
3098 /* Optimize X == CST1 || X == CST2
3099 if popcount (CST2 - CST1) == 1 into
3100 ((X - CST1) & ~(CST2 - CST1)) == 0.
3101 Similarly for ranges. E.g.
3102 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3103 || X == 75 || X == 45
3104 will be transformed by the previous optimization into
3105 (X - 43U) <= 3U || (X - 75U) <= 3U
3106 and this loop can transform that into
3107 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3109 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
3110 tree lowi
, tree lowj
, tree highi
, tree highj
,
3111 vec
<operand_entry
*> *ops
,
3112 struct range_entry
*rangei
,
3113 struct range_entry
*rangej
)
3115 tree tem1
, tem2
, mask
;
3116 /* Check highi - lowi == highj - lowj. */
3117 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
3118 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
3120 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
3121 if (!tree_int_cst_equal (tem1
, tem2
))
3123 /* Check popcount (lowj - lowi) == 1. */
3124 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
3125 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
3127 if (!integer_pow2p (tem1
))
3130 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
3131 int prec
= GET_MODE_PRECISION (mode
);
3132 if (TYPE_PRECISION (type
) < prec
3133 || (wi::to_wide (TYPE_MIN_VALUE (type
))
3134 != wi::min_value (prec
, TYPE_SIGN (type
)))
3135 || (wi::to_wide (TYPE_MAX_VALUE (type
))
3136 != wi::max_value (prec
, TYPE_SIGN (type
))))
3137 type
= build_nonstandard_integer_type (prec
, 1);
3139 type
= unsigned_type_for (type
);
3140 tem1
= fold_convert (type
, tem1
);
3141 tem2
= fold_convert (type
, tem2
);
3142 lowi
= fold_convert (type
, lowi
);
3143 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
3144 tem1
= fold_build2 (MINUS_EXPR
, type
,
3145 fold_convert (type
, rangei
->exp
), lowi
);
3146 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
3147 lowj
= build_int_cst (type
, 0);
3148 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
3149 NULL
, rangei
->in_p
, lowj
, tem2
,
3150 rangei
->strict_overflow_p
3151 || rangej
->strict_overflow_p
))
3156 /* It does some common checks for function optimize_range_tests_xor and
3157 optimize_range_tests_diff.
3158 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3159 Else it calls optimize_range_tests_diff. */
3162 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
3163 bool optimize_xor
, vec
<operand_entry
*> *ops
,
3164 struct range_entry
*ranges
)
3167 bool any_changes
= false;
3168 for (i
= first
; i
< length
; i
++)
3170 tree lowi
, highi
, lowj
, highj
, type
, tem
;
3172 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3174 type
= TREE_TYPE (ranges
[i
].exp
);
3175 if (!INTEGRAL_TYPE_P (type
))
3177 lowi
= ranges
[i
].low
;
3178 if (lowi
== NULL_TREE
)
3179 lowi
= TYPE_MIN_VALUE (type
);
3180 highi
= ranges
[i
].high
;
3181 if (highi
== NULL_TREE
)
3183 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
3186 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
3188 lowj
= ranges
[j
].low
;
3189 if (lowj
== NULL_TREE
)
3191 highj
= ranges
[j
].high
;
3192 if (highj
== NULL_TREE
)
3193 highj
= TYPE_MAX_VALUE (type
);
3194 /* Check lowj > highi. */
3195 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
3197 if (tem
== NULL_TREE
|| !integer_onep (tem
))
3200 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
3202 ranges
+ i
, ranges
+ j
);
3204 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
3206 ranges
+ i
, ranges
+ j
);
3217 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3218 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3219 EXP on success, NULL otherwise. */
3222 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
3223 wide_int
*mask
, tree
*totallowp
)
3225 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
3226 if (tem
== NULL_TREE
3227 || TREE_CODE (tem
) != INTEGER_CST
3228 || TREE_OVERFLOW (tem
)
3229 || tree_int_cst_sgn (tem
) == -1
3230 || compare_tree_int (tem
, prec
) != -1)
3233 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
3234 *mask
= wi::shifted_mask (0, max
, false, prec
);
3235 if (TREE_CODE (exp
) == BIT_AND_EXPR
3236 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3238 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
3239 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
3240 if (wi::popcount (msk
) == 1
3241 && wi::ltu_p (msk
, prec
- max
))
3243 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
3244 max
+= msk
.to_uhwi ();
3245 exp
= TREE_OPERAND (exp
, 0);
3246 if (integer_zerop (low
)
3247 && TREE_CODE (exp
) == PLUS_EXPR
3248 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3250 tree ret
= TREE_OPERAND (exp
, 0);
3253 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
3254 TYPE_PRECISION (TREE_TYPE (low
))));
3255 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
3261 else if (!tree_int_cst_lt (totallow
, tbias
))
3263 bias
= wi::to_widest (tbias
);
3264 bias
-= wi::to_widest (totallow
);
3265 if (bias
>= 0 && bias
< prec
- max
)
3267 *mask
= wi::lshift (*mask
, bias
);
3275 if (!tree_int_cst_lt (totallow
, low
))
3277 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
3278 if (tem
== NULL_TREE
3279 || TREE_CODE (tem
) != INTEGER_CST
3280 || TREE_OVERFLOW (tem
)
3281 || compare_tree_int (tem
, prec
- max
) == 1)
3284 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
3288 /* Attempt to optimize small range tests using bit test.
3290 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3291 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3292 has been by earlier optimizations optimized into:
3293 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3294 As all the 43 through 82 range is less than 64 numbers,
3295 for 64-bit word targets optimize that into:
3296 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3299 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
3300 vec
<operand_entry
*> *ops
,
3301 struct range_entry
*ranges
)
3304 bool any_changes
= false;
3305 int prec
= GET_MODE_BITSIZE (word_mode
);
3306 auto_vec
<struct range_entry
*, 64> candidates
;
3308 for (i
= first
; i
< length
- 1; i
++)
3310 tree lowi
, highi
, lowj
, highj
, type
;
3312 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3314 type
= TREE_TYPE (ranges
[i
].exp
);
3315 if (!INTEGRAL_TYPE_P (type
))
3317 lowi
= ranges
[i
].low
;
3318 if (lowi
== NULL_TREE
)
3319 lowi
= TYPE_MIN_VALUE (type
);
3320 highi
= ranges
[i
].high
;
3321 if (highi
== NULL_TREE
)
3324 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
3325 highi
, &mask
, &lowi
);
3326 if (exp
== NULL_TREE
)
3328 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3329 candidates
.truncate (0);
3330 int end
= MIN (i
+ 64, length
);
3331 for (j
= i
+ 1; j
< end
; j
++)
3334 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
3336 if (ranges
[j
].exp
== exp
)
3338 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
3340 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
3343 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
3345 exp2
= TREE_OPERAND (exp2
, 0);
3355 lowj
= ranges
[j
].low
;
3356 if (lowj
== NULL_TREE
)
3358 highj
= ranges
[j
].high
;
3359 if (highj
== NULL_TREE
)
3360 highj
= TYPE_MAX_VALUE (type
);
3362 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
3363 highj
, &mask2
, NULL
);
3367 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3368 candidates
.safe_push (&ranges
[j
]);
3371 /* If every possible relative value of the expression is a valid shift
3372 amount, then we can merge the entry test in the bit test. In this
3373 case, if we would need otherwise 2 or more comparisons, then use
3374 the bit test; in the other cases, the threshold is 3 comparisons. */
3375 bool entry_test_needed
;
3377 if (TREE_CODE (exp
) == SSA_NAME
3378 && get_range_query (cfun
)->range_of_expr (r
, exp
)
3379 && !r
.undefined_p ()
3381 && wi::leu_p (r
.upper_bound () - r
.lower_bound (), prec
- 1))
3383 wide_int min
= r
.lower_bound ();
3384 wide_int ilowi
= wi::to_wide (lowi
);
3385 if (wi::lt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3387 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3388 mask
= wi::lshift (mask
, ilowi
- min
);
3390 else if (wi::gt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3392 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3393 mask
= wi::lrshift (mask
, min
- ilowi
);
3395 entry_test_needed
= false;
3398 entry_test_needed
= true;
3399 if (candidates
.length () >= (entry_test_needed
? 2 : 1))
3401 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
3402 wi::to_widest (lowi
)
3403 + prec
- 1 - wi::clz (mask
));
3404 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3406 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
3407 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3409 location_t loc
= gimple_location (stmt
);
3410 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
3412 /* See if it isn't cheaper to pretend the minimum value of the
3413 range is 0, if maximum value is small enough.
3414 We can avoid then subtraction of the minimum value, but the
3415 mask constant could be perhaps more expensive. */
3416 if (compare_tree_int (lowi
, 0) > 0
3417 && compare_tree_int (high
, prec
) < 0)
3420 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
3421 rtx reg
= gen_raw_REG (word_mode
, 10000);
3422 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
3423 cost_diff
= set_src_cost (gen_rtx_PLUS (word_mode
, reg
,
3425 word_mode
, speed_p
);
3426 rtx r
= immed_wide_int_const (mask
, word_mode
);
3427 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3428 word_mode
, speed_p
);
3429 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
3430 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3431 word_mode
, speed_p
);
3434 mask
= wi::lshift (mask
, m
);
3435 lowi
= build_zero_cst (TREE_TYPE (lowi
));
3440 if (entry_test_needed
)
3442 tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
3444 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
3449 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
3450 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
3451 fold_convert_loc (loc
, etype
, exp
),
3452 fold_convert_loc (loc
, etype
, lowi
));
3453 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
3454 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
3455 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
3456 build_int_cst (word_type
, 1), exp
);
3457 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
3458 wide_int_to_tree (word_type
, mask
));
3459 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
3460 build_zero_cst (word_type
));
3461 if (is_gimple_val (exp
))
3464 /* The shift might have undefined behavior if TEM is true,
3465 but reassociate_bb isn't prepared to have basic blocks
3466 split when it is running. So, temporarily emit a code
3467 with BIT_IOR_EXPR instead of &&, and fix it up in
3469 gimple_seq seq
= NULL
;
3472 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
3473 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
3474 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
3477 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
3478 gimple_seq_add_seq_without_update (&seq
, seq2
);
3479 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
3480 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
3483 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
3484 BIT_IOR_EXPR
, tem
, exp
);
3485 gimple_set_location (g
, loc
);
3486 gimple_seq_add_stmt_without_update (&seq
, g
);
3487 exp
= gimple_assign_lhs (g
);
3489 tree val
= build_zero_cst (optype
);
3490 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
3491 candidates
.length (), opcode
, ops
, exp
,
3492 seq
, false, val
, val
, strict_overflow_p
))
3496 reassoc_branch_fixups
.safe_push (tem
);
3499 gimple_seq_discard (seq
);
3505 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3506 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3507 Also, handle x < C && y < C && z < C where C is power of two as
3508 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3509 as (x | y | z) < 0. */
3512 optimize_range_tests_cmp_bitwise (enum tree_code opcode
, int first
, int length
,
3513 vec
<operand_entry
*> *ops
,
3514 struct range_entry
*ranges
)
3518 bool any_changes
= false;
3519 auto_vec
<int, 128> buckets
;
3520 auto_vec
<int, 32> chains
;
3521 auto_vec
<struct range_entry
*, 32> candidates
;
3523 for (i
= first
; i
< length
; i
++)
3527 if (ranges
[i
].exp
== NULL_TREE
3528 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3529 || TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) <= 1
3530 || TREE_CODE (TREE_TYPE (ranges
[i
].exp
)) == BOOLEAN_TYPE
)
3533 if (ranges
[i
].low
!= NULL_TREE
3534 && ranges
[i
].high
!= NULL_TREE
3536 && tree_int_cst_equal (ranges
[i
].low
, ranges
[i
].high
))
3538 idx
= !integer_zerop (ranges
[i
].low
);
3539 if (idx
&& !integer_all_onesp (ranges
[i
].low
))
3542 else if (ranges
[i
].high
!= NULL_TREE
3543 && TREE_CODE (ranges
[i
].high
) == INTEGER_CST
3546 wide_int w
= wi::to_wide (ranges
[i
].high
);
3547 int prec
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
));
3548 int l
= wi::clz (w
);
3552 || w
!= wi::mask (prec
- l
, false, prec
))
3554 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3555 && ranges
[i
].low
== NULL_TREE
)
3557 && integer_zerop (ranges
[i
].low
))))
3560 else if (ranges
[i
].high
== NULL_TREE
3561 && ranges
[i
].low
!= NULL_TREE
3562 /* Perform this optimization only in the last
3563 reassoc pass, as it interferes with the reassociation
3564 itself or could also with VRP etc. which might not
3565 be able to virtually undo the optimization. */
3566 && !reassoc_insert_powi_p
3567 && !TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3568 && integer_zerop (ranges
[i
].low
))
3573 b
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) * 4 + idx
;
3574 if (buckets
.length () <= b
)
3575 buckets
.safe_grow_cleared (b
+ 1, true);
3576 if (chains
.length () <= (unsigned) i
)
3577 chains
.safe_grow (i
+ 1, true);
3578 chains
[i
] = buckets
[b
];
3582 FOR_EACH_VEC_ELT (buckets
, b
, i
)
3583 if (i
&& chains
[i
- 1])
3588 /* When ranges[X - 1].high + 1 is a power of two,
3589 we need to process the same bucket up to
3590 precision - 1 times, each time split the entries
3591 with the same high bound into one chain and the
3592 rest into another one to be processed later. */
3595 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3597 if (tree_int_cst_equal (ranges
[i
- 1].high
,
3598 ranges
[j
- 1].high
))
3600 chains
[this_prev
- 1] = j
;
3603 else if (other_prev
== 0)
3610 chains
[other_prev
- 1] = j
;
3614 chains
[this_prev
- 1] = 0;
3616 chains
[other_prev
- 1] = 0;
3617 if (chains
[i
- 1] == 0)
3624 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3626 gimple
*gk
= SSA_NAME_DEF_STMT (ranges
[k
- 1].exp
);
3627 gimple
*gj
= SSA_NAME_DEF_STMT (ranges
[j
- 1].exp
);
3628 if (reassoc_stmt_dominates_stmt_p (gk
, gj
))
3631 tree type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3632 tree type2
= NULL_TREE
;
3633 bool strict_overflow_p
= false;
3634 candidates
.truncate (0);
3635 if (POINTER_TYPE_P (type1
) || TREE_CODE (type1
) == OFFSET_TYPE
)
3636 type1
= pointer_sized_int_node
;
3637 for (j
= i
; j
; j
= chains
[j
- 1])
3639 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3640 strict_overflow_p
|= ranges
[j
- 1].strict_overflow_p
;
3641 if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == OFFSET_TYPE
)
3642 type
= pointer_sized_int_node
;
3645 /* For the signed < 0 cases, the types should be
3646 really compatible (all signed with the same precision,
3647 instead put ranges that have different in_p from
3649 if (!useless_type_conversion_p (type1
, type
))
3651 if (ranges
[j
- 1].in_p
!= ranges
[k
- 1].in_p
)
3652 candidates
.safe_push (&ranges
[j
- 1]);
3657 || useless_type_conversion_p (type1
, type
))
3659 else if (type2
== NULL_TREE
3660 || useless_type_conversion_p (type2
, type
))
3662 if (type2
== NULL_TREE
)
3664 candidates
.safe_push (&ranges
[j
- 1]);
3667 unsigned l
= candidates
.length ();
3668 for (j
= i
; j
; j
= chains
[j
- 1])
3670 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3673 if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == OFFSET_TYPE
)
3674 type
= pointer_sized_int_node
;
3677 if (!useless_type_conversion_p (type1
, type
))
3679 if (ranges
[j
- 1].in_p
== ranges
[k
- 1].in_p
)
3680 candidates
.safe_push (&ranges
[j
- 1]);
3683 if (useless_type_conversion_p (type1
, type
))
3685 else if (type2
== NULL_TREE
3686 || useless_type_conversion_p (type2
, type
))
3688 candidates
.safe_push (&ranges
[j
- 1]);
3690 gimple_seq seq
= NULL
;
3691 tree op
= NULL_TREE
;
3693 struct range_entry
*r
;
3694 candidates
.safe_push (&ranges
[k
- 1]);
3695 FOR_EACH_VEC_ELT (candidates
, id
, r
)
3698 enum tree_code code
;
3705 || POINTER_TYPE_P (TREE_TYPE (op
))
3706 || TREE_CODE (TREE_TYPE (op
)) == OFFSET_TYPE
)
3708 code
= (b
% 4) == 3 ? BIT_NOT_EXPR
: NOP_EXPR
;
3709 tree type3
= id
>= l
? type1
: pointer_sized_int_node
;
3710 if (code
== BIT_NOT_EXPR
3711 && TREE_CODE (TREE_TYPE (op
)) == OFFSET_TYPE
)
3713 g
= gimple_build_assign (make_ssa_name (type3
),
3715 gimple_seq_add_stmt_without_update (&seq
, g
);
3716 op
= gimple_assign_lhs (g
);
3718 g
= gimple_build_assign (make_ssa_name (type3
), code
, op
);
3719 gimple_seq_add_stmt_without_update (&seq
, g
);
3720 op
= gimple_assign_lhs (g
);
3722 tree type
= TREE_TYPE (r
->exp
);
3724 if (POINTER_TYPE_P (type
)
3725 || TREE_CODE (type
) == OFFSET_TYPE
3726 || (id
>= l
&& !useless_type_conversion_p (type1
, type
)))
3728 tree type3
= id
>= l
? type1
: pointer_sized_int_node
;
3729 g
= gimple_build_assign (make_ssa_name (type3
), NOP_EXPR
, exp
);
3730 gimple_seq_add_stmt_without_update (&seq
, g
);
3731 exp
= gimple_assign_lhs (g
);
3734 code
= r
->in_p
? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3736 code
= (b
% 4) == 1 ? BIT_AND_EXPR
: BIT_IOR_EXPR
;
3737 g
= gimple_build_assign (make_ssa_name (id
>= l
? type1
: type2
),
3739 gimple_seq_add_stmt_without_update (&seq
, g
);
3740 op
= gimple_assign_lhs (g
);
3742 type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3743 if (POINTER_TYPE_P (type1
) || TREE_CODE (type1
) == OFFSET_TYPE
)
3746 = gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, op
);
3747 gimple_seq_add_stmt_without_update (&seq
, g
);
3748 op
= gimple_assign_lhs (g
);
3751 if (update_range_test (&ranges
[k
- 1], NULL
, candidates
.address (),
3752 candidates
.length (), opcode
, ops
, op
,
3753 seq
, ranges
[k
- 1].in_p
, ranges
[k
- 1].low
,
3754 ranges
[k
- 1].high
, strict_overflow_p
))
3757 gimple_seq_discard (seq
);
3758 if ((b
% 4) == 2 && buckets
[b
] != i
)
3759 /* There is more work to do for this bucket. */
3766 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3767 a >= 0 && a < b into (unsigned) a < (unsigned) b
3768 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3771 optimize_range_tests_var_bound (enum tree_code opcode
, int first
, int length
,
3772 vec
<operand_entry
*> *ops
,
3773 struct range_entry
*ranges
,
3774 basic_block first_bb
)
3777 bool any_changes
= false;
3778 hash_map
<tree
, int> *map
= NULL
;
3780 for (i
= first
; i
< length
; i
++)
3782 if (ranges
[i
].exp
== NULL_TREE
3783 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3787 tree type
= TREE_TYPE (ranges
[i
].exp
);
3788 if (!INTEGRAL_TYPE_P (type
)
3789 || TYPE_UNSIGNED (type
)
3790 || ranges
[i
].low
== NULL_TREE
3791 || !integer_zerop (ranges
[i
].low
)
3792 || ranges
[i
].high
!= NULL_TREE
)
3794 /* EXP >= 0 here. */
3796 map
= new hash_map
<tree
, int>;
3797 map
->put (ranges
[i
].exp
, i
);
3803 for (i
= 0; i
< length
; i
++)
3805 bool in_p
= ranges
[i
].in_p
;
3806 if (ranges
[i
].low
== NULL_TREE
3807 || ranges
[i
].high
== NULL_TREE
)
3809 if (!integer_zerop (ranges
[i
].low
)
3810 || !integer_zerop (ranges
[i
].high
))
3813 && TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) == 1
3814 && TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3815 && integer_onep (ranges
[i
].low
)
3816 && integer_onep (ranges
[i
].high
))
3827 if (TREE_CODE (ranges
[i
].exp
) != SSA_NAME
)
3829 stmt
= SSA_NAME_DEF_STMT (ranges
[i
].exp
);
3830 if (!is_gimple_assign (stmt
))
3832 ccode
= gimple_assign_rhs_code (stmt
);
3833 rhs1
= gimple_assign_rhs1 (stmt
);
3834 rhs2
= gimple_assign_rhs2 (stmt
);
3838 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3839 stmt
= last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3840 if (gimple_code (stmt
) != GIMPLE_COND
)
3842 ccode
= gimple_cond_code (stmt
);
3843 rhs1
= gimple_cond_lhs (stmt
);
3844 rhs2
= gimple_cond_rhs (stmt
);
3847 if (TREE_CODE (rhs1
) != SSA_NAME
3848 || rhs2
== NULL_TREE
3849 || TREE_CODE (rhs2
) != SSA_NAME
)
3863 ccode
= invert_tree_comparison (ccode
, false);
3868 std::swap (rhs1
, rhs2
);
3869 ccode
= swap_tree_comparison (ccode
);
3878 int *idx
= map
->get (rhs1
);
3882 /* maybe_optimize_range_tests allows statements without side-effects
3883 in the basic blocks as long as they are consumed in the same bb.
3884 Make sure rhs2's def stmt is not among them, otherwise we can't
3885 use safely get_nonzero_bits on it. E.g. in:
3886 # RANGE [-83, 1] NONZERO 173
3887 # k_32 = PHI <k_47(13), k_12(9)>
3890 goto <bb 5>; [26.46%]
3892 goto <bb 9>; [73.54%]
3894 <bb 5> [local count: 140323371]:
3895 # RANGE [0, 1] NONZERO 1
3897 # RANGE [0, 4] NONZERO 4
3899 # RANGE [0, 4] NONZERO 4
3900 iftmp.0_44 = (char) _21;
3901 if (k_32 < iftmp.0_44)
3902 goto <bb 6>; [84.48%]
3904 goto <bb 9>; [15.52%]
3905 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3906 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3907 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3908 those stmts even for negative k_32 and the value ranges would be no
3909 longer guaranteed and so the optimization would be invalid. */
3910 while (opcode
== ERROR_MARK
)
3912 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3913 basic_block bb2
= gimple_bb (g
);
3916 && dominated_by_p (CDI_DOMINATORS
, bb2
, first_bb
))
3918 /* As an exception, handle a few common cases. */
3919 if (gimple_assign_cast_p (g
)
3920 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g
))))
3922 tree op0
= gimple_assign_rhs1 (g
);
3923 if (TYPE_UNSIGNED (TREE_TYPE (op0
))
3924 && (TYPE_PRECISION (TREE_TYPE (rhs2
))
3925 > TYPE_PRECISION (TREE_TYPE (op0
))))
3926 /* Zero-extension is always ok. */
3928 else if (TYPE_PRECISION (TREE_TYPE (rhs2
))
3929 == TYPE_PRECISION (TREE_TYPE (op0
))
3930 && TREE_CODE (op0
) == SSA_NAME
)
3932 /* Cast from signed to unsigned or vice versa. Retry
3933 with the op0 as new rhs2. */
3938 else if (is_gimple_assign (g
)
3939 && gimple_assign_rhs_code (g
) == BIT_AND_EXPR
3940 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
3941 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g
))))
3942 /* Masking with INTEGER_CST with MSB clear is always ok
3949 if (rhs2
== NULL_TREE
)
3952 wide_int nz
= get_nonzero_bits (rhs2
);
3956 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3957 and RHS2 is known to be RHS2 >= 0. */
3958 tree utype
= unsigned_type_for (TREE_TYPE (rhs1
));
3960 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
3961 if ((ranges
[*idx
].strict_overflow_p
3962 || ranges
[i
].strict_overflow_p
)
3963 && issue_strict_overflow_warning (wc
))
3964 warning_at (gimple_location (stmt
), OPT_Wstrict_overflow
,
3965 "assuming signed overflow does not occur "
3966 "when simplifying range test");
3968 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3970 struct range_entry
*r
= &ranges
[*idx
];
3971 fprintf (dump_file
, "Optimizing range test ");
3972 print_generic_expr (dump_file
, r
->exp
);
3973 fprintf (dump_file
, " +[");
3974 print_generic_expr (dump_file
, r
->low
);
3975 fprintf (dump_file
, ", ");
3976 print_generic_expr (dump_file
, r
->high
);
3977 fprintf (dump_file
, "] and comparison ");
3978 print_generic_expr (dump_file
, rhs1
);
3979 fprintf (dump_file
, " %s ", op_symbol_code (ccode
));
3980 print_generic_expr (dump_file
, rhs2
);
3981 fprintf (dump_file
, "\n into (");
3982 print_generic_expr (dump_file
, utype
);
3983 fprintf (dump_file
, ") ");
3984 print_generic_expr (dump_file
, rhs1
);
3985 fprintf (dump_file
, " %s (", op_symbol_code (ccode
));
3986 print_generic_expr (dump_file
, utype
);
3987 fprintf (dump_file
, ") ");
3988 print_generic_expr (dump_file
, rhs2
);
3989 fprintf (dump_file
, "\n");
3992 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3994 if (opcode
== BIT_IOR_EXPR
3995 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3998 ccode
= invert_tree_comparison (ccode
, false);
4001 unsigned int uid
= gimple_uid (stmt
);
4002 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4003 gimple
*g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs1
);
4004 gimple_set_uid (g
, uid
);
4005 rhs1
= gimple_assign_lhs (g
);
4006 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4007 if (!useless_type_conversion_p (utype
, TREE_TYPE (rhs2
)))
4009 g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs2
);
4010 gimple_set_uid (g
, uid
);
4011 rhs2
= gimple_assign_lhs (g
);
4012 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4014 if (tree_swap_operands_p (rhs1
, rhs2
))
4016 std::swap (rhs1
, rhs2
);
4017 ccode
= swap_tree_comparison (ccode
);
4019 if (gimple_code (stmt
) == GIMPLE_COND
)
4021 gcond
*c
= as_a
<gcond
*> (stmt
);
4022 gimple_cond_set_code (c
, ccode
);
4023 gimple_cond_set_lhs (c
, rhs1
);
4024 gimple_cond_set_rhs (c
, rhs2
);
4029 tree ctype
= oe
->op
? TREE_TYPE (oe
->op
) : boolean_type_node
;
4030 if (!INTEGRAL_TYPE_P (ctype
)
4031 || (TREE_CODE (ctype
) != BOOLEAN_TYPE
4032 && TYPE_PRECISION (ctype
) != 1))
4033 ctype
= boolean_type_node
;
4034 g
= gimple_build_assign (make_ssa_name (ctype
), ccode
, rhs1
, rhs2
);
4035 gimple_set_uid (g
, uid
);
4036 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4037 if (oe
->op
&& ctype
!= TREE_TYPE (oe
->op
))
4039 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (oe
->op
)),
4040 NOP_EXPR
, gimple_assign_lhs (g
));
4041 gimple_set_uid (g
, uid
);
4042 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4044 ranges
[i
].exp
= gimple_assign_lhs (g
);
4045 oe
->op
= ranges
[i
].exp
;
4046 ranges
[i
].low
= build_zero_cst (TREE_TYPE (ranges
[i
].exp
));
4047 ranges
[i
].high
= ranges
[i
].low
;
4049 ranges
[i
].strict_overflow_p
= false;
4050 oe
= (*ops
)[ranges
[*idx
].idx
];
4051 /* Now change all the other range test immediate uses, so that
4052 those tests will be optimized away. */
4053 if (opcode
== ERROR_MARK
)
4056 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
4057 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
4059 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
4060 ? boolean_false_node
: boolean_true_node
);
4063 oe
->op
= error_mark_node
;
4064 ranges
[*idx
].exp
= NULL_TREE
;
4065 ranges
[*idx
].low
= NULL_TREE
;
4066 ranges
[*idx
].high
= NULL_TREE
;
4074 /* Optimize range tests, similarly how fold_range_test optimizes
4075 it on trees. The tree code for the binary
4076 operation between all the operands is OPCODE.
4077 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4078 maybe_optimize_range_tests for inter-bb range optimization.
4079 In that case if oe->op is NULL, oe->id is bb->index whose
4080 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4082 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4085 optimize_range_tests (enum tree_code opcode
,
4086 vec
<operand_entry
*> *ops
, basic_block first_bb
)
4088 unsigned int length
= ops
->length (), i
, j
, first
;
4090 struct range_entry
*ranges
;
4091 bool any_changes
= false;
4096 ranges
= XNEWVEC (struct range_entry
, length
);
4097 for (i
= 0; i
< length
; i
++)
4101 init_range_entry (ranges
+ i
, oe
->op
,
4104 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
4105 /* For | invert it now, we will invert it again before emitting
4106 the optimized expression. */
4107 if (opcode
== BIT_IOR_EXPR
4108 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
4109 ranges
[i
].in_p
= !ranges
[i
].in_p
;
4112 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
4113 for (i
= 0; i
< length
; i
++)
4114 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
4117 /* Try to merge ranges. */
4118 for (first
= i
; i
< length
; i
++)
4120 tree low
= ranges
[i
].low
;
4121 tree high
= ranges
[i
].high
;
4122 int in_p
= ranges
[i
].in_p
;
4123 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
4124 int update_fail_count
= 0;
4126 for (j
= i
+ 1; j
< length
; j
++)
4128 if (ranges
[i
].exp
!= ranges
[j
].exp
)
4130 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
4131 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
4133 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
4139 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
4140 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
4141 low
, high
, strict_overflow_p
))
4146 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4147 while update_range_test would fail. */
4148 else if (update_fail_count
== 64)
4151 ++update_fail_count
;
4154 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
4157 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
4158 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
4160 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
4161 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
4163 any_changes
|= optimize_range_tests_var_bound (opcode
, first
, length
, ops
,
4165 any_changes
|= optimize_range_tests_cmp_bitwise (opcode
, first
, length
,
4168 if (any_changes
&& opcode
!= ERROR_MARK
)
4171 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4173 if (oe
->op
== error_mark_node
)
4182 XDELETEVEC (ranges
);
4186 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4187 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4188 otherwise the comparison code. TYPE is a return value that is set
4189 to type of comparison. */
4192 ovce_extract_ops (tree var
, gassign
**rets
, bool *reti
, tree
*type
,
4193 tree
*lhs
, tree
*rhs
, gassign
**vcond
)
4195 if (TREE_CODE (var
) != SSA_NAME
)
4198 gassign
*stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (var
));
4204 /* ??? If we start creating more COND_EXPR, we could perform
4205 this same optimization with them. For now, simplify. */
4206 if (gimple_assign_rhs_code (stmt
) != VEC_COND_EXPR
)
4209 tree cond
= gimple_assign_rhs1 (stmt
);
4210 tree_code cmp
= TREE_CODE (cond
);
4211 if (cmp
!= SSA_NAME
)
4214 gassign
*assign
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (cond
));
4216 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign
)) != tcc_comparison
)
4219 cmp
= gimple_assign_rhs_code (assign
);
4221 *lhs
= gimple_assign_rhs1 (assign
);
4223 *rhs
= gimple_assign_rhs2 (assign
);
4225 /* ??? For now, allow only canonical true and false result vectors.
4226 We could expand this to other constants should the need arise,
4227 but at the moment we don't create them. */
4228 tree t
= gimple_assign_rhs2 (stmt
);
4229 tree f
= gimple_assign_rhs3 (stmt
);
4231 if (integer_all_onesp (t
))
4233 else if (integer_all_onesp (f
))
4235 cmp
= invert_tree_comparison (cmp
, false);
4240 if (!integer_zerop (f
))
4249 *type
= TREE_TYPE (cond
);
4253 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4254 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4257 optimize_vec_cond_expr (tree_code opcode
, vec
<operand_entry
*> *ops
)
4259 unsigned int length
= ops
->length (), i
, j
;
4260 bool any_changes
= false;
4265 for (i
= 0; i
< length
; ++i
)
4267 tree elt0
= (*ops
)[i
]->op
;
4269 gassign
*stmt0
, *vcond0
;
4271 tree type
, lhs0
, rhs0
;
4272 tree_code cmp0
= ovce_extract_ops (elt0
, &stmt0
, &invert
, &type
, &lhs0
,
4274 if (cmp0
== ERROR_MARK
)
4277 for (j
= i
+ 1; j
< length
; ++j
)
4279 tree
&elt1
= (*ops
)[j
]->op
;
4281 gassign
*stmt1
, *vcond1
;
4283 tree_code cmp1
= ovce_extract_ops (elt1
, &stmt1
, NULL
, NULL
, &lhs1
,
4285 if (cmp1
== ERROR_MARK
)
4289 if (opcode
== BIT_AND_EXPR
)
4290 comb
= maybe_fold_and_comparisons (type
, cmp0
, lhs0
, rhs0
,
4292 else if (opcode
== BIT_IOR_EXPR
)
4293 comb
= maybe_fold_or_comparisons (type
, cmp0
, lhs0
, rhs0
,
4301 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4303 fprintf (dump_file
, "Transforming ");
4304 print_generic_expr (dump_file
, gimple_assign_lhs (stmt0
));
4305 fprintf (dump_file
, " %c ", opcode
== BIT_AND_EXPR
? '&' : '|');
4306 print_generic_expr (dump_file
, gimple_assign_lhs (stmt1
));
4307 fprintf (dump_file
, " into ");
4308 print_generic_expr (dump_file
, comb
);
4309 fputc ('\n', dump_file
);
4312 gimple_stmt_iterator gsi
= gsi_for_stmt (vcond0
);
4313 tree exp
= force_gimple_operand_gsi (&gsi
, comb
, true, NULL_TREE
,
4314 true, GSI_SAME_STMT
);
4316 swap_ssa_operands (vcond0
, gimple_assign_rhs2_ptr (vcond0
),
4317 gimple_assign_rhs3_ptr (vcond0
));
4318 gimple_assign_set_rhs1 (vcond0
, exp
);
4319 update_stmt (vcond0
);
4321 elt1
= error_mark_node
;
4330 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4332 if (oe
->op
== error_mark_node
)
4344 /* Return true if STMT is a cast like:
4350 # _345 = PHI <_123(N), 1(...), 1(...)>
4351 where _234 has bool type, _123 has single use and
4352 bb N has a single successor M. This is commonly used in
4353 the last block of a range test.
4355 Also Return true if STMT is tcc_compare like:
4361 # _345 = PHI <_234(N), 1(...), 1(...)>
4363 where _234 has booltype, single use and
4364 bb N has a single successor M. This is commonly used in
4365 the last block of a range test. */
4368 final_range_test_p (gimple
*stmt
)
4370 basic_block bb
, rhs_bb
, lhs_bb
;
4373 use_operand_p use_p
;
4376 if (!gimple_assign_cast_p (stmt
)
4377 && (!is_gimple_assign (stmt
)
4378 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4379 != tcc_comparison
)))
4381 bb
= gimple_bb (stmt
);
4382 if (!single_succ_p (bb
))
4384 e
= single_succ_edge (bb
);
4385 if (e
->flags
& EDGE_COMPLEX
)
4388 lhs
= gimple_assign_lhs (stmt
);
4389 rhs
= gimple_assign_rhs1 (stmt
);
4390 if (gimple_assign_cast_p (stmt
)
4391 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4392 || TREE_CODE (rhs
) != SSA_NAME
4393 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
))
4396 if (!gimple_assign_cast_p (stmt
)
4397 && (TREE_CODE (TREE_TYPE (lhs
)) != BOOLEAN_TYPE
))
4400 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4401 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
4404 if (gimple_code (use_stmt
) != GIMPLE_PHI
4405 || gimple_bb (use_stmt
) != e
->dest
)
4408 /* And that the rhs is defined in the same loop. */
4409 if (gimple_assign_cast_p (stmt
))
4411 if (TREE_CODE (rhs
) != SSA_NAME
4412 || !(rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
)))
4413 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
4418 if (TREE_CODE (lhs
) != SSA_NAME
4419 || !(lhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
)))
4420 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), lhs_bb
))
4427 /* Return true if BB is suitable basic block for inter-bb range test
4428 optimization. If BACKWARD is true, BB should be the only predecessor
4429 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4430 or compared with to find a common basic block to which all conditions
4431 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4432 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4433 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4434 block points to an empty block that falls through into *OTHER_BB and
4435 the phi args match that path. */
4438 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
4439 bool *test_swapped_p
, bool backward
)
4441 edge_iterator ei
, ei2
;
4445 bool other_edge_seen
= false;
4450 /* Check last stmt first. */
4451 stmt
= last_nondebug_stmt (bb
);
4453 || (gimple_code (stmt
) != GIMPLE_COND
4454 && (backward
|| !final_range_test_p (stmt
)))
4455 || gimple_visited_p (stmt
)
4456 || stmt_could_throw_p (cfun
, stmt
)
4459 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
4462 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4463 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4464 to *OTHER_BB (if not set yet, try to find it out). */
4465 if (EDGE_COUNT (bb
->succs
) != 2)
4467 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
4469 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4471 if (e
->dest
== test_bb
)
4480 if (*other_bb
== NULL
)
4482 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
4483 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4485 else if (e
->dest
== e2
->dest
)
4486 *other_bb
= e
->dest
;
4487 if (*other_bb
== NULL
)
4490 if (e
->dest
== *other_bb
)
4491 other_edge_seen
= true;
4495 if (*other_bb
== NULL
|| !other_edge_seen
)
4498 else if (single_succ (bb
) != *other_bb
)
4501 /* Now check all PHIs of *OTHER_BB. */
4502 e
= find_edge (bb
, *other_bb
);
4503 e2
= find_edge (test_bb
, *other_bb
);
4505 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4507 gphi
*phi
= gsi
.phi ();
4508 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4509 corresponding to BB and TEST_BB predecessor must be the same. */
4510 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
4511 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
4513 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4514 one of the PHIs should have the lhs of the last stmt in
4515 that block as PHI arg and that PHI should have 0 or 1
4516 corresponding to it in all other range test basic blocks
4520 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
4521 == gimple_assign_lhs (stmt
)
4522 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
4523 || integer_onep (gimple_phi_arg_def (phi
,
4529 gimple
*test_last
= last_nondebug_stmt (test_bb
);
4530 if (gimple_code (test_last
) == GIMPLE_COND
)
4532 if (backward
? e2
->src
!= test_bb
: e
->src
!= bb
)
4535 /* For last_bb, handle also:
4537 goto <bb 6>; [34.00%]
4539 goto <bb 7>; [66.00%]
4541 <bb 6> [local count: 79512730]:
4543 <bb 7> [local count: 1073741824]:
4544 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4545 where bb 7 is *OTHER_BB, but the PHI values from the
4546 earlier bbs match the path through the empty bb
4550 e3
= EDGE_SUCC (test_bb
,
4551 e2
== EDGE_SUCC (test_bb
, 0) ? 1 : 0);
4554 e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4555 if (empty_block_p (e3
->dest
)
4556 && single_succ_p (e3
->dest
)
4557 && single_succ (e3
->dest
) == *other_bb
4558 && single_pred_p (e3
->dest
)
4559 && single_succ_edge (e3
->dest
)->flags
== EDGE_FALLTHRU
)
4562 e2
= single_succ_edge (e3
->dest
);
4564 e
= single_succ_edge (e3
->dest
);
4566 *test_swapped_p
= true;
4570 else if (gimple_phi_arg_def (phi
, e2
->dest_idx
)
4571 == gimple_assign_lhs (test_last
)
4572 && (integer_zerop (gimple_phi_arg_def (phi
,
4574 || integer_onep (gimple_phi_arg_def (phi
,
4585 /* Return true if BB doesn't have side-effects that would disallow
4586 range test optimization, all SSA_NAMEs set in the bb are consumed
4587 in the bb and there are no PHIs. */
4590 no_side_effect_bb (basic_block bb
)
4592 gimple_stmt_iterator gsi
;
4595 if (!gimple_seq_empty_p (phi_nodes (bb
)))
4597 last
= last_nondebug_stmt (bb
);
4598 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4600 gimple
*stmt
= gsi_stmt (gsi
);
4602 imm_use_iterator imm_iter
;
4603 use_operand_p use_p
;
4605 if (is_gimple_debug (stmt
))
4607 if (gimple_has_side_effects (stmt
))
4611 if (!is_gimple_assign (stmt
))
4613 lhs
= gimple_assign_lhs (stmt
);
4614 if (TREE_CODE (lhs
) != SSA_NAME
)
4616 if (gimple_assign_rhs_could_trap_p (stmt
))
4618 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
4620 gimple
*use_stmt
= USE_STMT (use_p
);
4621 if (is_gimple_debug (use_stmt
))
4623 if (gimple_bb (use_stmt
) != bb
)
4630 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4631 return true and fill in *OPS recursively. */
4634 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
4637 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4641 if (!is_reassociable_op (stmt
, code
, loop
))
4644 rhs
[0] = gimple_assign_rhs1 (stmt
);
4645 rhs
[1] = gimple_assign_rhs2 (stmt
);
4646 gimple_set_visited (stmt
, true);
4647 for (i
= 0; i
< 2; i
++)
4648 if (TREE_CODE (rhs
[i
]) == SSA_NAME
4649 && !get_ops (rhs
[i
], code
, ops
, loop
)
4650 && has_single_use (rhs
[i
]))
4652 operand_entry
*oe
= operand_entry_pool
.allocate ();
4658 oe
->stmt_to_insert
= NULL
;
4659 ops
->safe_push (oe
);
4664 /* Find the ops that were added by get_ops starting from VAR, see if
4665 they were changed during update_range_test and if yes, create new
4669 update_ops (tree var
, enum tree_code code
, const vec
<operand_entry
*> &ops
,
4670 unsigned int *pidx
, class loop
*loop
)
4672 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4676 if (!is_reassociable_op (stmt
, code
, loop
))
4679 rhs
[0] = gimple_assign_rhs1 (stmt
);
4680 rhs
[1] = gimple_assign_rhs2 (stmt
);
4683 for (i
= 0; i
< 2; i
++)
4684 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
4686 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
4687 if (rhs
[2 + i
] == NULL_TREE
)
4689 if (has_single_use (rhs
[i
]))
4690 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
4692 rhs
[2 + i
] = rhs
[i
];
4695 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
4696 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
4698 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4699 var
= make_ssa_name (TREE_TYPE (var
));
4700 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
4702 gimple_set_uid (g
, gimple_uid (stmt
));
4703 gimple_set_visited (g
, true);
4704 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4705 gimple_stmt_iterator gsi2
= gsi_for_stmt (g
);
4706 if (fold_stmt_inplace (&gsi2
))
4712 /* Structure to track the initial value passed to get_ops and
4713 the range in the ops vector for each basic block. */
4715 struct inter_bb_range_test_entry
4718 unsigned int first_idx
, last_idx
;
4721 /* Inter-bb range test optimization.
4723 Returns TRUE if a gimple conditional is optimized to a true/false,
4724 otherwise return FALSE.
4726 This indicates to the caller that it should run a CFG cleanup pass
4727 once reassociation is completed. */
4730 maybe_optimize_range_tests (gimple
*stmt
)
4732 basic_block first_bb
= gimple_bb (stmt
);
4733 basic_block last_bb
= first_bb
;
4734 basic_block other_bb
= NULL
;
4738 auto_vec
<operand_entry
*> ops
;
4739 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
4740 bool any_changes
= false;
4741 bool cfg_cleanup_needed
= false;
4743 /* Consider only basic blocks that end with GIMPLE_COND or
4744 a cast statement satisfying final_range_test_p. All
4745 but the last bb in the first_bb .. last_bb range
4746 should end with GIMPLE_COND. */
4747 if (gimple_code (stmt
) == GIMPLE_COND
)
4749 if (EDGE_COUNT (first_bb
->succs
) != 2)
4750 return cfg_cleanup_needed
;
4752 else if (final_range_test_p (stmt
))
4753 other_bb
= single_succ (first_bb
);
4755 return cfg_cleanup_needed
;
4757 if (stmt_could_throw_p (cfun
, stmt
))
4758 return cfg_cleanup_needed
;
4760 /* As relative ordering of post-dominator sons isn't fixed,
4761 maybe_optimize_range_tests can be called first on any
4762 bb in the range we want to optimize. So, start searching
4763 backwards, if first_bb can be set to a predecessor. */
4764 while (single_pred_p (first_bb
))
4766 basic_block pred_bb
= single_pred (first_bb
);
4767 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, NULL
, true))
4769 if (!no_side_effect_bb (first_bb
))
4773 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4774 Before starting forward search in last_bb successors, find
4775 out the other_bb. */
4776 if (first_bb
== last_bb
)
4779 /* As non-GIMPLE_COND last stmt always terminates the range,
4780 if forward search didn't discover anything, just give up. */
4781 if (gimple_code (stmt
) != GIMPLE_COND
)
4782 return cfg_cleanup_needed
;
4783 /* Look at both successors. Either it ends with a GIMPLE_COND
4784 and satisfies suitable_cond_bb, or ends with a cast and
4785 other_bb is that cast's successor. */
4786 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
4787 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
4788 || e
->dest
== first_bb
)
4789 return cfg_cleanup_needed
;
4790 else if (single_pred_p (e
->dest
))
4792 stmt
= last_nondebug_stmt (e
->dest
);
4794 && gimple_code (stmt
) == GIMPLE_COND
4795 && EDGE_COUNT (e
->dest
->succs
) == 2)
4797 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
,
4804 && final_range_test_p (stmt
)
4805 && find_edge (first_bb
, single_succ (e
->dest
)))
4807 other_bb
= single_succ (e
->dest
);
4808 if (other_bb
== first_bb
)
4812 if (other_bb
== NULL
)
4813 return cfg_cleanup_needed
;
4815 /* Now do the forward search, moving last_bb to successor bbs
4816 that aren't other_bb. */
4817 while (EDGE_COUNT (last_bb
->succs
) == 2)
4819 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
4820 if (e
->dest
!= other_bb
)
4824 if (!single_pred_p (e
->dest
))
4826 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, NULL
, false))
4828 if (!no_side_effect_bb (e
->dest
))
4832 if (first_bb
== last_bb
)
4833 return cfg_cleanup_needed
;
4834 /* Here basic blocks first_bb through last_bb's predecessor
4835 end with GIMPLE_COND, all of them have one of the edges to
4836 other_bb and another to another block in the range,
4837 all blocks except first_bb don't have side-effects and
4838 last_bb ends with either GIMPLE_COND, or cast satisfying
4839 final_range_test_p. */
4840 for (bb
= last_bb
; ; bb
= single_pred (bb
))
4842 enum tree_code code
;
4844 inter_bb_range_test_entry bb_ent
;
4846 bb_ent
.op
= NULL_TREE
;
4847 bb_ent
.first_idx
= ops
.length ();
4848 bb_ent
.last_idx
= bb_ent
.first_idx
;
4849 e
= find_edge (bb
, other_bb
);
4850 stmt
= last_nondebug_stmt (bb
);
4851 gimple_set_visited (stmt
, true);
4852 if (gimple_code (stmt
) != GIMPLE_COND
)
4854 use_operand_p use_p
;
4859 lhs
= gimple_assign_lhs (stmt
);
4860 rhs
= gimple_assign_rhs1 (stmt
);
4861 gcc_assert (bb
== last_bb
);
4870 # _345 = PHI <_123(N), 1(...), 1(...)>
4872 or 0 instead of 1. If it is 0, the _234
4873 range test is anded together with all the
4874 other range tests, if it is 1, it is ored with
4876 single_imm_use (lhs
, &use_p
, &phi
);
4877 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
4878 e2
= find_edge (first_bb
, other_bb
);
4880 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
4881 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
4882 code
= BIT_AND_EXPR
;
4885 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
4886 code
= BIT_IOR_EXPR
;
4889 /* If _234 SSA_NAME_DEF_STMT is
4891 (or &, corresponding to 1/0 in the phi arguments,
4892 push into ops the individual range test arguments
4893 of the bitwise or resp. and, recursively. */
4894 if (TREE_CODE (rhs
) == SSA_NAME
4895 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4897 && !get_ops (rhs
, code
, &ops
,
4898 loop_containing_stmt (stmt
))
4899 && has_single_use (rhs
))
4901 /* Otherwise, push the _234 range test itself. */
4902 operand_entry
*oe
= operand_entry_pool
.allocate ();
4908 oe
->stmt_to_insert
= NULL
;
4913 else if (is_gimple_assign (stmt
)
4914 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4916 && !get_ops (lhs
, code
, &ops
,
4917 loop_containing_stmt (stmt
))
4918 && has_single_use (lhs
))
4920 operand_entry
*oe
= operand_entry_pool
.allocate ();
4931 bb_ent
.last_idx
= ops
.length ();
4934 bbinfo
.safe_push (bb_ent
);
4935 for (unsigned int i
= bb_ent
.first_idx
; i
< bb_ent
.last_idx
; ++i
)
4936 ops
[i
]->id
= bb
->index
;
4939 else if (bb
== last_bb
)
4941 /* For last_bb, handle also:
4943 goto <bb 6>; [34.00%]
4945 goto <bb 7>; [66.00%]
4947 <bb 6> [local count: 79512730]:
4949 <bb 7> [local count: 1073741824]:
4950 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4951 where bb 7 is OTHER_BB, but the PHI values from the
4952 earlier bbs match the path through the empty bb
4954 bool test_swapped_p
= false;
4955 bool ok
= suitable_cond_bb (single_pred (last_bb
), last_bb
,
4956 &other_bb
, &test_swapped_p
, true);
4959 e
= EDGE_SUCC (bb
, e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4961 /* Otherwise stmt is GIMPLE_COND. */
4962 code
= gimple_cond_code (stmt
);
4963 lhs
= gimple_cond_lhs (stmt
);
4964 rhs
= gimple_cond_rhs (stmt
);
4965 if (TREE_CODE (lhs
) == SSA_NAME
4966 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4967 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4968 || rhs
!= boolean_false_node
4969 /* Either push into ops the individual bitwise
4970 or resp. and operands, depending on which
4971 edge is other_bb. */
4972 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
4973 ^ (code
== EQ_EXPR
))
4974 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
4975 loop_containing_stmt (stmt
))))
4977 /* Or push the GIMPLE_COND stmt itself. */
4978 operand_entry
*oe
= operand_entry_pool
.allocate ();
4981 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
4982 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
4983 /* oe->op = NULL signs that there is no SSA_NAME
4984 for the range test, and oe->id instead is the
4985 basic block number, at which's end the GIMPLE_COND
4989 oe
->stmt_to_insert
= NULL
;
4994 else if (ops
.length () > bb_ent
.first_idx
)
4997 bb_ent
.last_idx
= ops
.length ();
4999 bbinfo
.safe_push (bb_ent
);
5000 for (unsigned int i
= bb_ent
.first_idx
; i
< bb_ent
.last_idx
; ++i
)
5001 ops
[i
]->id
= bb
->index
;
5005 if (ops
.length () > 1)
5006 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
, first_bb
);
5009 unsigned int idx
, max_idx
= 0;
5010 /* update_ops relies on has_single_use predicates returning the
5011 same values as it did during get_ops earlier. Additionally it
5012 never removes statements, only adds new ones and it should walk
5013 from the single imm use and check the predicate already before
5014 making those changes.
5015 On the other side, the handling of GIMPLE_COND directly can turn
5016 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5017 it needs to be done in a separate loop afterwards. */
5018 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
5020 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
5021 && bbinfo
[idx
].op
!= NULL_TREE
)
5026 stmt
= last_nondebug_stmt (bb
);
5027 new_op
= update_ops (bbinfo
[idx
].op
,
5029 ops
[bbinfo
[idx
].first_idx
]->rank
,
5030 ops
, &bbinfo
[idx
].first_idx
,
5031 loop_containing_stmt (stmt
));
5032 if (new_op
== NULL_TREE
)
5034 gcc_assert (bb
== last_bb
);
5035 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
5037 if (bbinfo
[idx
].op
!= new_op
)
5039 imm_use_iterator iter
;
5040 use_operand_p use_p
;
5041 gimple
*use_stmt
, *cast_or_tcc_cmp_stmt
= NULL
;
5043 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
5044 if (is_gimple_debug (use_stmt
))
5046 else if (gimple_code (use_stmt
) == GIMPLE_COND
5047 || gimple_code (use_stmt
) == GIMPLE_PHI
)
5048 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
5049 SET_USE (use_p
, new_op
);
5050 else if ((is_gimple_assign (use_stmt
)
5052 (gimple_assign_rhs_code (use_stmt
))
5053 == tcc_comparison
)))
5054 cast_or_tcc_cmp_stmt
= use_stmt
;
5055 else if (gimple_assign_cast_p (use_stmt
))
5056 cast_or_tcc_cmp_stmt
= use_stmt
;
5060 if (cast_or_tcc_cmp_stmt
)
5062 gcc_assert (bb
== last_bb
);
5063 tree lhs
= gimple_assign_lhs (cast_or_tcc_cmp_stmt
);
5064 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
5065 enum tree_code rhs_code
5066 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt
)
5067 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt
)
5070 if (is_gimple_min_invariant (new_op
))
5072 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
5073 g
= gimple_build_assign (new_lhs
, new_op
);
5076 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
5077 gimple_stmt_iterator gsi
5078 = gsi_for_stmt (cast_or_tcc_cmp_stmt
);
5079 gimple_set_uid (g
, gimple_uid (cast_or_tcc_cmp_stmt
));
5080 gimple_set_visited (g
, true);
5081 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5082 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
5083 if (is_gimple_debug (use_stmt
))
5085 else if (gimple_code (use_stmt
) == GIMPLE_COND
5086 || gimple_code (use_stmt
) == GIMPLE_PHI
)
5087 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
5088 SET_USE (use_p
, new_lhs
);
5097 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
5099 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
5100 && bbinfo
[idx
].op
== NULL_TREE
5101 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
5103 gcond
*cond_stmt
= as_a
<gcond
*> (*gsi_last_bb (bb
));
5108 /* If we collapse the conditional to a true/false
5109 condition, then bubble that knowledge up to our caller. */
5110 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
5112 gimple_cond_make_false (cond_stmt
);
5113 cfg_cleanup_needed
= true;
5115 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
5117 gimple_cond_make_true (cond_stmt
);
5118 cfg_cleanup_needed
= true;
5122 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
5123 gimple_cond_set_lhs (cond_stmt
,
5124 ops
[bbinfo
[idx
].first_idx
]->op
);
5125 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
5127 update_stmt (cond_stmt
);
5133 /* The above changes could result in basic blocks after the first
5134 modified one, up to and including last_bb, to be executed even if
5135 they would not be in the original program. If the value ranges of
5136 assignment lhs' in those bbs were dependent on the conditions
5137 guarding those basic blocks which now can change, the VRs might
5138 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5139 are only used within the same bb, it should be not a big deal if
5140 we just reset all the VRs in those bbs. See PR68671. */
5141 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
5142 reset_flow_sensitive_info_in_bb (bb
);
5144 return cfg_cleanup_needed
;
5147 /* Remove def stmt of VAR if VAR has zero uses and recurse
5148 on rhs1 operand if so. */
5151 remove_visited_stmt_chain (tree var
)
5154 gimple_stmt_iterator gsi
;
5158 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
5160 stmt
= SSA_NAME_DEF_STMT (var
);
5161 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
5163 var
= gimple_assign_rhs1 (stmt
);
5164 gsi
= gsi_for_stmt (stmt
);
5165 reassoc_remove_stmt (&gsi
);
5166 release_defs (stmt
);
5173 /* This function checks three consequtive operands in
5174 passed operands vector OPS starting from OPINDEX and
5175 swaps two operands if it is profitable for binary operation
5176 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5178 We pair ops with the same rank if possible. */
5181 swap_ops_for_binary_stmt (const vec
<operand_entry
*> &ops
,
5182 unsigned int opindex
)
5184 operand_entry
*oe1
, *oe2
, *oe3
;
5187 oe2
= ops
[opindex
+ 1];
5188 oe3
= ops
[opindex
+ 2];
5190 if (oe1
->rank
== oe2
->rank
&& oe2
->rank
!= oe3
->rank
)
5191 std::swap (*oe1
, *oe3
);
5192 else if (oe1
->rank
== oe3
->rank
&& oe2
->rank
!= oe3
->rank
)
5193 std::swap (*oe1
, *oe2
);
5196 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5197 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5198 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5199 after the returned stmt. */
5201 static inline gimple
*
5202 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
, bool &insert_before
)
5204 insert_before
= true;
5205 if (TREE_CODE (rhs1
) == SSA_NAME
5206 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
5208 stmt
= SSA_NAME_DEF_STMT (rhs1
);
5209 insert_before
= false;
5211 if (TREE_CODE (rhs2
) == SSA_NAME
5212 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
5214 stmt
= SSA_NAME_DEF_STMT (rhs2
);
5215 insert_before
= false;
5220 /* If the stmt that defines operand has to be inserted, insert it
5223 insert_stmt_before_use (gimple
*stmt
, gimple
*stmt_to_insert
)
5225 gcc_assert (is_gimple_assign (stmt_to_insert
));
5226 tree rhs1
= gimple_assign_rhs1 (stmt_to_insert
);
5227 tree rhs2
= gimple_assign_rhs2 (stmt_to_insert
);
5229 gimple
*insert_point
= find_insert_point (stmt
, rhs1
, rhs2
, insert_before
);
5230 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_point
);
5231 gimple_set_uid (stmt_to_insert
, gimple_uid (insert_point
));
5233 /* If the insert point is not stmt, then insert_point would be
5234 the point where operand rhs1 or rhs2 is defined. In this case,
5235 stmt_to_insert has to be inserted afterwards. This would
5236 only happen when the stmt insertion point is flexible. */
5238 gsi_insert_before (&gsi
, stmt_to_insert
, GSI_NEW_STMT
);
5240 insert_stmt_after (stmt_to_insert
, insert_point
);
5244 /* Recursively rewrite our linearized statements so that the operators
5245 match those in OPS[OPINDEX], putting the computation in rank
5246 order. Return new lhs.
5247 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5248 the current stmt and during recursive invocations.
5249 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5250 recursive invocations. */
5253 rewrite_expr_tree (gimple
*stmt
, enum tree_code rhs_code
, unsigned int opindex
,
5254 const vec
<operand_entry
*> &ops
, bool changed
,
5257 tree rhs1
= gimple_assign_rhs1 (stmt
);
5258 tree rhs2
= gimple_assign_rhs2 (stmt
);
5259 tree lhs
= gimple_assign_lhs (stmt
);
5262 /* The final recursion case for this function is that you have
5263 exactly two operations left.
5264 If we had exactly one op in the entire list to start with, we
5265 would have never called this function, and the tail recursion
5266 rewrites them one at a time. */
5267 if (opindex
+ 2 == ops
.length ())
5269 operand_entry
*oe1
, *oe2
;
5272 oe2
= ops
[opindex
+ 1];
5274 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
5276 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5277 unsigned int uid
= gimple_uid (stmt
);
5279 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5281 fprintf (dump_file
, "Transforming ");
5282 print_gimple_stmt (dump_file
, stmt
, 0);
5285 /* If the stmt that defines operand has to be inserted, insert it
5287 if (oe1
->stmt_to_insert
)
5288 insert_stmt_before_use (stmt
, oe1
->stmt_to_insert
);
5289 if (oe2
->stmt_to_insert
)
5290 insert_stmt_before_use (stmt
, oe2
->stmt_to_insert
);
5291 /* Even when changed is false, reassociation could have e.g. removed
5292 some redundant operations, so unless we are just swapping the
5293 arguments or unless there is no change at all (then we just
5294 return lhs), force creation of a new SSA_NAME. */
5295 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
5298 gimple
*insert_point
5299 = find_insert_point (stmt
, oe1
->op
, oe2
->op
, insert_before
);
5300 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5302 = gimple_build_assign (lhs
, rhs_code
,
5304 gimple_set_uid (stmt
, uid
);
5305 gimple_set_visited (stmt
, true);
5307 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5309 insert_stmt_after (stmt
, insert_point
);
5314 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
,
5317 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
5318 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
5322 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
5323 remove_visited_stmt_chain (rhs1
);
5325 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5327 fprintf (dump_file
, " into ");
5328 print_gimple_stmt (dump_file
, stmt
, 0);
5334 /* If we hit here, we should have 3 or more ops left. */
5335 gcc_assert (opindex
+ 2 < ops
.length ());
5337 /* Rewrite the next operator. */
5340 /* If the stmt that defines operand has to be inserted, insert it
5342 if (oe
->stmt_to_insert
)
5343 insert_stmt_before_use (stmt
, oe
->stmt_to_insert
);
5345 /* Recurse on the LHS of the binary operator, which is guaranteed to
5346 be the non-leaf side. */
5348 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), rhs_code
, opindex
+ 1, ops
,
5349 changed
|| oe
->op
!= rhs2
|| next_changed
,
5352 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
5354 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5356 fprintf (dump_file
, "Transforming ");
5357 print_gimple_stmt (dump_file
, stmt
, 0);
5360 /* If changed is false, this is either opindex == 0
5361 or all outer rhs2's were equal to corresponding oe->op,
5362 and powi_result is NULL.
5363 That means lhs is equivalent before and after reassociation.
5364 Otherwise ensure the old lhs SSA_NAME is not reused and
5365 create a new stmt as well, so that any debug stmts will be
5366 properly adjusted. */
5369 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5370 unsigned int uid
= gimple_uid (stmt
);
5372 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
,
5375 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5376 stmt
= gimple_build_assign (lhs
, rhs_code
,
5378 gimple_set_uid (stmt
, uid
);
5379 gimple_set_visited (stmt
, true);
5381 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5383 insert_stmt_after (stmt
, insert_point
);
5388 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
,
5391 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
5392 gimple_assign_set_rhs2 (stmt
, oe
->op
);
5396 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5398 fprintf (dump_file
, " into ");
5399 print_gimple_stmt (dump_file
, stmt
, 0);
5405 /* Find out how many cycles we need to compute statements chain.
5406 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5407 maximum number of independent statements we may execute per cycle. */
5410 get_required_cycles (int ops_num
, int cpu_width
)
5416 /* While we have more than 2 * cpu_width operands
5417 we may reduce number of operands by cpu_width
5419 res
= ops_num
/ (2 * cpu_width
);
5421 /* Remained operands count may be reduced twice per cycle
5422 until we have only one operand. */
5423 rest
= (unsigned)(ops_num
- res
* cpu_width
);
5424 elog
= exact_log2 (rest
);
5428 res
+= floor_log2 (rest
) + 1;
5433 /* Given that the target fully pipelines FMA instructions, return the latency
5434 of MULT_EXPRs that can't be hidden by the FMAs. WIDTH is the number of
5438 get_mult_latency_consider_fma (int ops_num
, int mult_num
, int width
)
5440 gcc_checking_assert (mult_num
&& mult_num
<= ops_num
);
5442 /* For each partition, if mult_num == ops_num, there's latency(MULT)*2.
5448 _2 = .FMA (C, D, _1);
5450 Otherwise there's latency(MULT)*1 in the first FMA. */
5451 return CEIL (ops_num
, width
) == CEIL (mult_num
, width
) ? 2 : 1;
5454 /* Returns an optimal number of registers to use for computation of
5457 LHS is the result ssa name of OPS. MULT_NUM is number of sub-expressions
5458 that are MULT_EXPRs, when OPS are PLUS_EXPRs or MINUS_EXPRs. */
5461 get_reassociation_width (vec
<operand_entry
*> *ops
, int mult_num
, tree lhs
,
5462 enum tree_code opc
, machine_mode mode
)
5464 int param_width
= param_tree_reassoc_width
;
5468 int ops_num
= ops
->length ();
5470 if (param_width
> 0)
5471 width
= param_width
;
5473 width
= targetm
.sched
.reassociation_width (opc
, mode
);
5478 /* Get the minimal time required for sequence computation. */
5479 cycles_best
= get_required_cycles (ops_num
, width
);
5481 /* Check if we may use less width and still compute sequence for
5482 the same time. It will allow us to reduce registers usage.
5483 get_required_cycles is monotonically increasing with lower width
5484 so we can perform a binary search for the minimal width that still
5485 results in the optimal cycle count. */
5488 /* If the target fully pipelines FMA instruction, the multiply part can start
5489 already if its operands are ready. Assuming symmetric pipes are used for
5490 FMUL/FADD/FMA, then for a sequence of FMA like:
5492 _8 = .FMA (_2, _3, _1);
5493 _9 = .FMA (_5, _4, _8);
5494 _10 = .FMA (_7, _6, _9);
5496 , if width=1, the latency is latency(MULT) + latency(ADD)*3.
5500 _9 = .FMA (_2, _3, _1);
5501 _10 = .FMA (_6, _7, _8);
5504 , it is latency(MULT)*2 + latency(ADD)*2. Assuming latency(MULT) >=
5505 latency(ADD), the first variant is preferred.
5507 Find out if we can get a smaller width considering FMA. */
5508 if (width
> 1 && mult_num
&& param_fully_pipelined_fma
)
5510 /* When param_fully_pipelined_fma is set, assume FMUL and FMA use the
5511 same units that can also do FADD. For other scenarios, such as when
5512 FMUL and FADD are using separated units, the following code may not
5514 int width_mult
= targetm
.sched
.reassociation_width (MULT_EXPR
, mode
);
5515 gcc_checking_assert (width_mult
<= width
);
5517 /* Latency of MULT_EXPRs. */
5519 = get_mult_latency_consider_fma (ops_num
, mult_num
, width_mult
);
5521 /* Quick search might not apply. So start from 1. */
5522 for (int i
= 1; i
< width_mult
; i
++)
5525 = get_mult_latency_consider_fma (ops_num
, mult_num
, i
);
5526 int lat_add_new
= get_required_cycles (ops_num
, i
);
5528 /* Assume latency(MULT) >= latency(ADD). */
5529 if (lat_mul
- lat_mul_new
>= lat_add_new
- cycles_best
)
5538 while (width
> width_min
)
5540 int width_mid
= (width
+ width_min
) / 2;
5542 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
5544 else if (width_min
< width_mid
)
5545 width_min
= width_mid
;
5551 /* If there's loop dependent FMA result, return width=2 to avoid it. This is
5552 better than skipping these FMA candidates in widening_mul. */
5554 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (lhs
))),
5555 param_avoid_fma_max_bits
))
5557 /* Look for cross backedge dependency:
5558 1. LHS is a phi argument in the same basic block it is defined.
5559 2. And the result of the phi node is used in OPS. */
5560 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
));
5562 use_operand_p use_p
;
5563 imm_use_iterator iter
;
5564 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
5565 if (gphi
*phi
= dyn_cast
<gphi
*> (USE_STMT (use_p
)))
5567 if (gimple_phi_arg_edge (phi
, phi_arg_index_from_use (use_p
))->src
5570 tree phi_result
= gimple_phi_result (phi
);
5573 FOR_EACH_VEC_ELT (*ops
, j
, oe
)
5575 if (TREE_CODE (oe
->op
) != SSA_NAME
)
5578 /* Result of phi is operand of PLUS_EXPR. */
5579 if (oe
->op
== phi_result
)
5582 /* Check is result of phi is operand of MULT_EXPR. */
5583 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
5584 if (is_gimple_assign (def_stmt
)
5585 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
)
5587 tree rhs
= gimple_assign_rhs1 (def_stmt
);
5588 if (TREE_CODE (rhs
) == SSA_NAME
)
5590 if (rhs
== phi_result
)
5592 def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5595 if (is_gimple_assign (def_stmt
)
5596 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
)
5598 if (gimple_assign_rhs1 (def_stmt
) == phi_result
5599 || gimple_assign_rhs2 (def_stmt
) == phi_result
)
5609 #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops. */
5610 #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops. */
5611 #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops. */
5613 /* Rewrite statements with dependency chain with regard the chance to generate
5615 For the chain with FMA: Try to keep fma opportunity as much as possible.
5616 For the chain without FMA: Putting the computation in rank order and trying
5617 to allow operations to be executed in parallel.
5619 e + f + a * b + c * d;
5625 This reassociation approach preserves the chance of fma generation as much
5628 Another thing is to avoid adding loop-carried ops to long chains, otherwise
5629 the whole chain will have dependencies across the loop iteration. Just keep
5630 loop-carried ops in a separate chain.
5632 x_1 = phi (x_0, x_2)
5633 y_1 = phi (y_0, y_2)
5635 a + b + c + d + e + x1 + y1
5645 rewrite_expr_tree_parallel (gassign
*stmt
, int width
, bool has_fma
,
5646 const vec
<operand_entry
*> &ops
)
5648 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
5649 int op_num
= ops
.length ();
5650 int op_normal_num
= op_num
;
5651 gcc_assert (op_num
> 0);
5652 int stmt_num
= op_num
- 1;
5653 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
5655 tree tmp_op
[2], op1
;
5657 gimple
*stmt1
= NULL
;
5658 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
5659 int last_rhs1_stmt_index
= 0, last_rhs2_stmt_index
= 0;
5660 int width_active
= 0, width_count
= 0;
5661 bool has_biased
= false, ops_changed
= false;
5662 auto_vec
<operand_entry
*> ops_normal
;
5663 auto_vec
<operand_entry
*> ops_biased
;
5664 vec
<operand_entry
*> *ops1
;
5666 /* We start expression rewriting from the top statements.
5667 So, in this loop we create a full list of statements
5668 we will work with. */
5669 stmts
[stmt_num
- 1] = stmt
;
5670 for (i
= stmt_num
- 2; i
>= 0; i
--)
5671 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
5673 /* Avoid adding loop-carried ops to long chains, first filter out the
5674 loop-carried. But we need to make sure that the length of the remainder
5675 is not less than 4, which is the smallest ops length we can break the
5677 FOR_EACH_VEC_ELT (ops
, i
, oe
)
5679 if (TREE_CODE (oe
->op
) == SSA_NAME
5680 && bitmap_bit_p (biased_names
, SSA_NAME_VERSION (oe
->op
))
5681 && op_normal_num
> 4)
5683 ops_biased
.safe_push (oe
);
5688 ops_normal
.safe_push (oe
);
5691 /* Width should not be larger than ops length / 2, since we can not create
5692 more parallel dependency chains that exceeds such value. */
5693 int width_normal
= op_normal_num
/ 2;
5694 int width_biased
= (op_num
- op_normal_num
) / 2;
5695 width_normal
= width
<= width_normal
? width
: width_normal
;
5696 width_biased
= width
<= width_biased
? width
: width_biased
;
5699 width_count
= width_active
= width_normal
;
5701 /* Build parallel dependency chain according to width. */
5702 for (i
= 0; i
< stmt_num
; i
++)
5704 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5706 fprintf (dump_file
, "Transforming ");
5707 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5710 /* When the work of normal ops is over, but the loop is not over,
5711 continue to do biased ops. */
5712 if (width_count
== 0 && ops1
== &ops_normal
)
5715 width_count
= width_active
= width_biased
;
5719 /* Swap the operands if no FMA in the chain. */
5720 if (ops1
->length () > 2 && !has_fma
)
5721 swap_ops_for_binary_stmt (*ops1
, ops1
->length () - 3);
5723 if (i
< width_active
5724 || (ops_changed
&& i
<= (last_rhs1_stmt_index
+ width_active
)))
5726 for (j
= 0; j
< 2; j
++)
5730 /* If the stmt that defines operand has to be inserted, insert it
5732 stmt1
= oe
->stmt_to_insert
;
5734 insert_stmt_before_use (stmts
[i
], stmt1
);
5737 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
),
5741 gimple_set_visited (stmts
[i
], true);
5746 /* We keep original statement only for the last one. All others are
5748 if (!ops1
->length ())
5750 /* For biased length equal to 2. */
5751 if (width_count
== BIASED_END_STMT
&& !last_rhs2_stmt_index
)
5752 last_rhs2_stmt_index
= i
- 1;
5754 /* When width_count == 2 and there is no biased, just finish. */
5755 if (width_count
== NORMAL_END_STMT
&& !has_biased
)
5757 last_rhs1_stmt_index
= i
- 1;
5758 last_rhs2_stmt_index
= i
- 2;
5760 if (last_rhs1_stmt_index
&& (last_rhs2_stmt_index
|| !has_biased
))
5762 /* We keep original statement only for the last one. All
5763 others are recreated. */
5764 gimple_assign_set_rhs1 (stmts
[i
], gimple_assign_lhs
5765 (stmts
[last_rhs1_stmt_index
]));
5766 gimple_assign_set_rhs2 (stmts
[i
], gimple_assign_lhs
5767 (stmts
[last_rhs2_stmt_index
]));
5768 update_stmt (stmts
[i
]);
5773 build_and_add_sum (TREE_TYPE (last_rhs1
),
5774 gimple_assign_lhs (stmts
[i
-width_count
]),
5776 (stmts
[i
-width_count
+1]),
5778 gimple_set_visited (stmts
[i
], true);
5781 /* It is the end of normal or biased ops.
5782 last_rhs1_stmt_index used to record the last stmt index
5783 for normal ops. last_rhs2_stmt_index used to record the
5784 last stmt index for biased ops. */
5785 if (width_count
== BIASED_END_STMT
)
5787 gcc_assert (has_biased
);
5788 if (ops_biased
.length ())
5789 last_rhs1_stmt_index
= i
;
5791 last_rhs2_stmt_index
= i
;
5798 /* Attach the rest ops to the parallel dependency chain. */
5801 stmt1
= oe
->stmt_to_insert
;
5803 insert_stmt_before_use (stmts
[i
], stmt1
);
5806 /* For only one biased ops. */
5807 if (width_count
== SPECIAL_BIASED_END_STMT
)
5809 /* We keep original statement only for the last one. All
5810 others are recreated. */
5811 gcc_assert (has_biased
);
5812 gimple_assign_set_rhs1 (stmts
[i
], gimple_assign_lhs
5813 (stmts
[last_rhs1_stmt_index
]));
5814 gimple_assign_set_rhs2 (stmts
[i
], op1
);
5815 update_stmt (stmts
[i
]);
5819 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
),
5821 (stmts
[i
-width_active
]),
5824 gimple_set_visited (stmts
[i
], true);
5829 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5831 fprintf (dump_file
, " into ");
5832 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5836 remove_visited_stmt_chain (last_rhs1
);
5839 /* Transform STMT, which is really (A +B) + (C + D) into the left
5840 linear form, ((A+B)+C)+D.
5841 Recurse on D if necessary. */
5844 linearize_expr (gimple
*stmt
)
5846 gimple_stmt_iterator gsi
;
5847 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
5848 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5849 gimple
*oldbinrhs
= binrhs
;
5850 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5851 gimple
*newbinrhs
= NULL
;
5852 class loop
*loop
= loop_containing_stmt (stmt
);
5853 tree lhs
= gimple_assign_lhs (stmt
);
5855 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
5856 && is_reassociable_op (binrhs
, rhscode
, loop
));
5858 gsi
= gsi_for_stmt (stmt
);
5860 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
5861 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
5862 gimple_assign_rhs_code (binrhs
),
5863 gimple_assign_lhs (binlhs
),
5864 gimple_assign_rhs2 (binrhs
));
5865 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
5866 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
5867 gimple_set_uid (binrhs
, gimple_uid (stmt
));
5869 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
5870 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5872 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5874 fprintf (dump_file
, "Linearized: ");
5875 print_gimple_stmt (dump_file
, stmt
, 0);
5878 reassociate_stats
.linearized
++;
5881 gsi
= gsi_for_stmt (oldbinrhs
);
5882 reassoc_remove_stmt (&gsi
);
5883 release_defs (oldbinrhs
);
5885 gimple_set_visited (stmt
, true);
5886 gimple_set_visited (binlhs
, true);
5887 gimple_set_visited (binrhs
, true);
5889 /* Tail recurse on the new rhs if it still needs reassociation. */
5890 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
5891 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5892 want to change the algorithm while converting to tuples. */
5893 linearize_expr (stmt
);
5896 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5897 it. Otherwise, return NULL. */
5900 get_single_immediate_use (tree lhs
)
5902 use_operand_p immuse
;
5905 if (TREE_CODE (lhs
) == SSA_NAME
5906 && single_imm_use (lhs
, &immuse
, &immusestmt
)
5907 && is_gimple_assign (immusestmt
))
5913 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5914 representing the negated value. Insertions of any necessary
5915 instructions go before GSI.
5916 This function is recursive in that, if you hand it "a_5" as the
5917 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5918 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5921 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
5923 gimple
*negatedefstmt
= NULL
;
5924 tree resultofnegate
;
5925 gimple_stmt_iterator gsi
;
5928 /* If we are trying to negate a name, defined by an add, negate the
5929 add operands instead. */
5930 if (TREE_CODE (tonegate
) == SSA_NAME
)
5931 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
5932 if (TREE_CODE (tonegate
) == SSA_NAME
5933 && is_gimple_assign (negatedefstmt
)
5934 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
5935 && has_single_use (gimple_assign_lhs (negatedefstmt
))
5936 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
5938 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
5939 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
5940 tree lhs
= gimple_assign_lhs (negatedefstmt
);
5943 gsi
= gsi_for_stmt (negatedefstmt
);
5944 rhs1
= negate_value (rhs1
, &gsi
);
5946 gsi
= gsi_for_stmt (negatedefstmt
);
5947 rhs2
= negate_value (rhs2
, &gsi
);
5949 gsi
= gsi_for_stmt (negatedefstmt
);
5950 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5951 gimple_set_visited (negatedefstmt
, true);
5952 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
5953 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
5954 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5958 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
5959 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
5960 NULL_TREE
, true, GSI_SAME_STMT
);
5962 uid
= gimple_uid (gsi_stmt (gsi
));
5963 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
5965 gimple
*stmt
= gsi_stmt (gsi
);
5966 if (gimple_uid (stmt
) != 0)
5968 gimple_set_uid (stmt
, uid
);
5970 return resultofnegate
;
5973 /* Return true if we should break up the subtract in STMT into an add
5974 with negate. This is true when we the subtract operands are really
5975 adds, or the subtract itself is used in an add expression. In
5976 either case, breaking up the subtract into an add with negate
5977 exposes the adds to reassociation. */
5980 should_break_up_subtract (gimple
*stmt
)
5982 tree lhs
= gimple_assign_lhs (stmt
);
5983 tree binlhs
= gimple_assign_rhs1 (stmt
);
5984 tree binrhs
= gimple_assign_rhs2 (stmt
);
5986 class loop
*loop
= loop_containing_stmt (stmt
);
5988 if (TREE_CODE (binlhs
) == SSA_NAME
5989 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
5992 if (TREE_CODE (binrhs
) == SSA_NAME
5993 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
5996 if (TREE_CODE (lhs
) == SSA_NAME
5997 && (immusestmt
= get_single_immediate_use (lhs
))
5998 && is_gimple_assign (immusestmt
)
5999 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
6000 || (gimple_assign_rhs_code (immusestmt
) == MINUS_EXPR
6001 && gimple_assign_rhs1 (immusestmt
) == lhs
)
6002 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
6007 /* Transform STMT from A - B into A + -B. */
6010 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
6012 tree rhs1
= gimple_assign_rhs1 (stmt
);
6013 tree rhs2
= gimple_assign_rhs2 (stmt
);
6015 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6017 fprintf (dump_file
, "Breaking up subtract ");
6018 print_gimple_stmt (dump_file
, stmt
, 0);
6021 rhs2
= negate_value (rhs2
, gsip
);
6022 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
6026 /* Determine whether STMT is a builtin call that raises an SSA name
6027 to an integer power and has only one use. If so, and this is early
6028 reassociation and unsafe math optimizations are permitted, place
6029 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
6030 If any of these conditions does not hold, return FALSE. */
6033 acceptable_pow_call (gcall
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
6036 REAL_VALUE_TYPE c
, cint
;
6038 switch (gimple_call_combined_fn (stmt
))
6041 if (flag_errno_math
)
6044 *base
= gimple_call_arg (stmt
, 0);
6045 arg1
= gimple_call_arg (stmt
, 1);
6047 if (TREE_CODE (arg1
) != REAL_CST
)
6050 c
= TREE_REAL_CST (arg1
);
6052 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
6055 *exponent
= real_to_integer (&c
);
6056 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
6057 if (!real_identical (&c
, &cint
))
6063 *base
= gimple_call_arg (stmt
, 0);
6064 arg1
= gimple_call_arg (stmt
, 1);
6066 if (!tree_fits_shwi_p (arg1
))
6069 *exponent
= tree_to_shwi (arg1
);
6076 /* Expanding negative exponents is generally unproductive, so we don't
6077 complicate matters with those. Exponents of zero and one should
6078 have been handled by expression folding. */
6079 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
6085 /* Try to derive and add operand entry for OP to *OPS. Return false if
6089 try_special_add_to_ops (vec
<operand_entry
*> *ops
,
6090 enum tree_code code
,
6091 tree op
, gimple
* def_stmt
)
6093 tree base
= NULL_TREE
;
6094 HOST_WIDE_INT exponent
= 0;
6096 if (TREE_CODE (op
) != SSA_NAME
6097 || ! has_single_use (op
))
6100 if (code
== MULT_EXPR
6101 && reassoc_insert_powi_p
6102 && flag_unsafe_math_optimizations
6103 && is_gimple_call (def_stmt
)
6104 && acceptable_pow_call (as_a
<gcall
*> (def_stmt
), &base
, &exponent
))
6106 add_repeat_to_ops_vec (ops
, base
, exponent
);
6107 gimple_set_visited (def_stmt
, true);
6110 else if (code
== MULT_EXPR
6111 && is_gimple_assign (def_stmt
)
6112 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
6113 && !HONOR_SNANS (TREE_TYPE (op
))
6114 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op
))
6115 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op
)))
6116 && (!FLOAT_TYPE_P (TREE_TYPE (op
))
6117 || !DECIMAL_FLOAT_MODE_P (element_mode (op
))))
6119 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6120 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
6121 add_to_ops_vec (ops
, rhs1
);
6122 add_to_ops_vec (ops
, cst
);
6123 gimple_set_visited (def_stmt
, true);
6130 /* Recursively linearize a binary expression that is the RHS of STMT.
6131 Place the operands of the expression tree in the vector named OPS. */
6134 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
6135 bool is_associative
, bool set_visited
)
6137 tree binlhs
= gimple_assign_rhs1 (stmt
);
6138 tree binrhs
= gimple_assign_rhs2 (stmt
);
6139 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
6140 bool binlhsisreassoc
= false;
6141 bool binrhsisreassoc
= false;
6142 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
6143 class loop
*loop
= loop_containing_stmt (stmt
);
6146 gimple_set_visited (stmt
, true);
6148 if (TREE_CODE (binlhs
) == SSA_NAME
)
6150 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
6151 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
6152 && !stmt_could_throw_p (cfun
, binlhsdef
));
6155 if (TREE_CODE (binrhs
) == SSA_NAME
)
6157 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
6158 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
6159 && !stmt_could_throw_p (cfun
, binrhsdef
));
6162 /* If the LHS is not reassociable, but the RHS is, we need to swap
6163 them. If neither is reassociable, there is nothing we can do, so
6164 just put them in the ops vector. If the LHS is reassociable,
6165 linearize it. If both are reassociable, then linearize the RHS
6168 if (!binlhsisreassoc
)
6170 /* If this is not a associative operation like division, give up. */
6171 if (!is_associative
)
6173 add_to_ops_vec (ops
, binrhs
);
6177 if (!binrhsisreassoc
)
6180 if (try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
6181 /* If we add ops for the rhs we expect to be able to recurse
6182 to it via the lhs during expression rewrite so swap
6186 add_to_ops_vec (ops
, binrhs
);
6188 if (!try_special_add_to_ops (ops
, rhscode
, binlhs
, binlhsdef
))
6189 add_to_ops_vec (ops
, binlhs
);
6195 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6197 fprintf (dump_file
, "swapping operands of ");
6198 print_gimple_stmt (dump_file
, stmt
, 0);
6201 swap_ssa_operands (stmt
,
6202 gimple_assign_rhs1_ptr (stmt
),
6203 gimple_assign_rhs2_ptr (stmt
));
6206 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6208 fprintf (dump_file
, " is now ");
6209 print_gimple_stmt (dump_file
, stmt
, 0);
6211 if (!binrhsisreassoc
)
6214 /* We want to make it so the lhs is always the reassociative op,
6216 std::swap (binlhs
, binrhs
);
6218 else if (binrhsisreassoc
)
6220 linearize_expr (stmt
);
6221 binlhs
= gimple_assign_rhs1 (stmt
);
6222 binrhs
= gimple_assign_rhs2 (stmt
);
6225 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
6226 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
6228 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
6229 is_associative
, set_visited
);
6231 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
6232 add_to_ops_vec (ops
, binrhs
);
6235 /* Repropagate the negates back into subtracts, since no other pass
6236 currently does it. */
6239 repropagate_negates (void)
6244 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
6246 gimple
*user
= get_single_immediate_use (negate
);
6247 if (!user
|| !is_gimple_assign (user
))
6250 tree negateop
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
6251 if (TREE_CODE (negateop
) == SSA_NAME
6252 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop
))
6255 /* The negate operand can be either operand of a PLUS_EXPR
6256 (it can be the LHS if the RHS is a constant for example).
6258 Force the negate operand to the RHS of the PLUS_EXPR, then
6259 transform the PLUS_EXPR into a MINUS_EXPR. */
6260 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
6262 /* If the negated operand appears on the LHS of the
6263 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6264 to force the negated operand to the RHS of the PLUS_EXPR. */
6265 if (gimple_assign_rhs1 (user
) == negate
)
6267 swap_ssa_operands (user
,
6268 gimple_assign_rhs1_ptr (user
),
6269 gimple_assign_rhs2_ptr (user
));
6272 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6273 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6274 if (gimple_assign_rhs2 (user
) == negate
)
6276 tree rhs1
= gimple_assign_rhs1 (user
);
6277 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
6278 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
,
6283 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
6285 if (gimple_assign_rhs1 (user
) == negate
)
6290 which we transform into
6293 This pushes down the negate which we possibly can merge
6294 into some other operation, hence insert it into the
6295 plus_negates vector. */
6296 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
6297 tree b
= gimple_assign_rhs2 (user
);
6298 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
6299 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
6300 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
6301 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, negateop
, b
);
6302 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
6303 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
6304 user
= gsi_stmt (gsi2
);
6306 reassoc_remove_stmt (&gsi
);
6307 release_defs (feed
);
6308 plus_negates
.safe_push (gimple_assign_lhs (user
));
6312 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6313 getting rid of one operation. */
6314 tree rhs1
= gimple_assign_rhs1 (user
);
6315 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
6316 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, negateop
);
6317 update_stmt (gsi_stmt (gsi
));
6323 /* Break up subtract operations in block BB.
6325 We do this top down because we don't know whether the subtract is
6326 part of a possible chain of reassociation except at the top.
6335 we want to break up k = t - q, but we won't until we've transformed q
6336 = b - r, which won't be broken up until we transform b = c - d.
6338 En passant, clear the GIMPLE visited flag on every statement
6339 and set UIDs within each basic block. */
6342 break_up_subtract_bb (basic_block bb
)
6344 gimple_stmt_iterator gsi
;
6346 unsigned int uid
= 1;
6348 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6350 gimple
*stmt
= gsi_stmt (gsi
);
6351 gimple_set_visited (stmt
, false);
6352 gimple_set_uid (stmt
, uid
++);
6354 if (!is_gimple_assign (stmt
)
6355 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt
)))
6356 || !can_reassociate_op_p (gimple_assign_lhs (stmt
)))
6359 /* Look for simple gimple subtract operations. */
6360 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
6362 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt
))
6363 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt
)))
6366 /* Check for a subtract used only in an addition. If this
6367 is the case, transform it into add of a negate for better
6368 reassociation. IE transform C = A-B into C = A + -B if C
6369 is only used in an addition. */
6370 if (should_break_up_subtract (stmt
))
6371 break_up_subtract (stmt
, &gsi
);
6373 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
6374 && can_reassociate_op_p (gimple_assign_rhs1 (stmt
)))
6375 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
6377 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
6379 son
= next_dom_son (CDI_DOMINATORS
, son
))
6380 break_up_subtract_bb (son
);
6383 /* Used for repeated factor analysis. */
6384 struct repeat_factor
6386 /* An SSA name that occurs in a multiply chain. */
6389 /* Cached rank of the factor. */
6392 /* Number of occurrences of the factor in the chain. */
6393 HOST_WIDE_INT count
;
6395 /* An SSA name representing the product of this factor and
6396 all factors appearing later in the repeated factor vector. */
6401 static vec
<repeat_factor
> repeat_factor_vec
;
6403 /* Used for sorting the repeat factor vector. Sort primarily by
6404 ascending occurrence count, secondarily by descending rank. */
6407 compare_repeat_factors (const void *x1
, const void *x2
)
6409 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
6410 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
6412 if (rf1
->count
!= rf2
->count
)
6413 return rf1
->count
- rf2
->count
;
6415 return rf2
->rank
- rf1
->rank
;
6418 /* Look for repeated operands in OPS in the multiply tree rooted at
6419 STMT. Replace them with an optimal sequence of multiplies and powi
6420 builtin calls, and remove the used operands from OPS. Return an
6421 SSA name representing the value of the replacement sequence. */
6424 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
6426 unsigned i
, j
, vec_len
;
6429 repeat_factor
*rf1
, *rf2
;
6430 repeat_factor rfnew
;
6431 tree result
= NULL_TREE
;
6432 tree target_ssa
, iter_result
;
6433 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
6434 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
6435 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
6436 gimple
*mul_stmt
, *pow_stmt
;
6438 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6439 target, unless type is integral. */
6440 if (!powi_fndecl
&& !INTEGRAL_TYPE_P (type
))
6443 /* Allocate the repeated factor vector. */
6444 repeat_factor_vec
.create (10);
6446 /* Scan the OPS vector for all SSA names in the product and build
6447 up a vector of occurrence counts for each factor. */
6448 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6450 if (TREE_CODE (oe
->op
) == SSA_NAME
)
6452 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6454 if (rf1
->factor
== oe
->op
)
6456 rf1
->count
+= oe
->count
;
6461 if (j
>= repeat_factor_vec
.length ())
6463 rfnew
.factor
= oe
->op
;
6464 rfnew
.rank
= oe
->rank
;
6465 rfnew
.count
= oe
->count
;
6466 rfnew
.repr
= NULL_TREE
;
6467 repeat_factor_vec
.safe_push (rfnew
);
6472 /* Sort the repeated factor vector by (a) increasing occurrence count,
6473 and (b) decreasing rank. */
6474 repeat_factor_vec
.qsort (compare_repeat_factors
);
6476 /* It is generally best to combine as many base factors as possible
6477 into a product before applying __builtin_powi to the result.
6478 However, the sort order chosen for the repeated factor vector
6479 allows us to cache partial results for the product of the base
6480 factors for subsequent use. When we already have a cached partial
6481 result from a previous iteration, it is best to make use of it
6482 before looking for another __builtin_pow opportunity.
6484 As an example, consider x * x * y * y * y * z * z * z * z.
6485 We want to first compose the product x * y * z, raise it to the
6486 second power, then multiply this by y * z, and finally multiply
6487 by z. This can be done in 5 multiplies provided we cache y * z
6488 for use in both expressions:
6496 If we instead ignored the cached y * z and first multiplied by
6497 the __builtin_pow opportunity z * z, we would get the inferior:
6506 vec_len
= repeat_factor_vec
.length ();
6508 /* Repeatedly look for opportunities to create a builtin_powi call. */
6511 HOST_WIDE_INT power
;
6513 /* First look for the largest cached product of factors from
6514 preceding iterations. If found, create a builtin_powi for
6515 it if the minimum occurrence count for its factors is at
6516 least 2, or just use this cached product as our next
6517 multiplicand if the minimum occurrence count is 1. */
6518 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6520 if (rf1
->repr
&& rf1
->count
> 0)
6530 iter_result
= rf1
->repr
;
6532 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6536 fputs ("Multiplying by cached product ", dump_file
);
6537 for (elt
= j
; elt
< vec_len
; elt
++)
6539 rf
= &repeat_factor_vec
[elt
];
6540 print_generic_expr (dump_file
, rf
->factor
);
6541 if (elt
< vec_len
- 1)
6542 fputs (" * ", dump_file
);
6544 fputs ("\n", dump_file
);
6549 if (INTEGRAL_TYPE_P (type
))
6551 gcc_assert (power
> 1);
6552 gimple_stmt_iterator gsip
= gsi
;
6554 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6556 gimple_stmt_iterator gsic
= gsi
;
6557 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6559 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6560 gimple_set_visited (gsi_stmt (gsic
), true);
6566 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6568 = gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6569 build_int_cst (integer_type_node
,
6571 gimple_call_set_lhs (pow_stmt
, iter_result
);
6572 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6573 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6574 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6577 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6581 fputs ("Building __builtin_pow call for cached product (",
6583 for (elt
= j
; elt
< vec_len
; elt
++)
6585 rf
= &repeat_factor_vec
[elt
];
6586 print_generic_expr (dump_file
, rf
->factor
);
6587 if (elt
< vec_len
- 1)
6588 fputs (" * ", dump_file
);
6590 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
6597 /* Otherwise, find the first factor in the repeated factor
6598 vector whose occurrence count is at least 2. If no such
6599 factor exists, there are no builtin_powi opportunities
6601 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6603 if (rf1
->count
>= 2)
6612 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6616 fputs ("Building __builtin_pow call for (", dump_file
);
6617 for (elt
= j
; elt
< vec_len
; elt
++)
6619 rf
= &repeat_factor_vec
[elt
];
6620 print_generic_expr (dump_file
, rf
->factor
);
6621 if (elt
< vec_len
- 1)
6622 fputs (" * ", dump_file
);
6624 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
6627 reassociate_stats
.pows_created
++;
6629 /* Visit each element of the vector in reverse order (so that
6630 high-occurrence elements are visited first, and within the
6631 same occurrence count, lower-ranked elements are visited
6632 first). Form a linear product of all elements in this order
6633 whose occurrencce count is at least that of element J.
6634 Record the SSA name representing the product of each element
6635 with all subsequent elements in the vector. */
6636 if (j
== vec_len
- 1)
6637 rf1
->repr
= rf1
->factor
;
6640 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
6644 rf1
= &repeat_factor_vec
[ii
];
6645 rf2
= &repeat_factor_vec
[ii
+ 1];
6647 /* Init the last factor's representative to be itself. */
6649 rf2
->repr
= rf2
->factor
;
6654 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6655 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
6657 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6658 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6659 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6660 rf1
->repr
= target_ssa
;
6662 /* Don't reprocess the multiply we just introduced. */
6663 gimple_set_visited (mul_stmt
, true);
6667 /* Form a call to __builtin_powi for the maximum product
6668 just formed, raised to the power obtained earlier. */
6669 rf1
= &repeat_factor_vec
[j
];
6670 if (INTEGRAL_TYPE_P (type
))
6672 gcc_assert (power
> 1);
6673 gimple_stmt_iterator gsip
= gsi
;
6675 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6677 gimple_stmt_iterator gsic
= gsi
;
6678 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6680 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6681 gimple_set_visited (gsi_stmt (gsic
), true);
6687 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6688 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6689 build_int_cst (integer_type_node
,
6691 gimple_call_set_lhs (pow_stmt
, iter_result
);
6692 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6693 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6694 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6698 /* If we previously formed at least one other builtin_powi call,
6699 form the product of this one and those others. */
6702 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6703 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
6704 result
, iter_result
);
6705 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6706 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6707 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6708 gimple_set_visited (mul_stmt
, true);
6709 result
= new_result
;
6712 result
= iter_result
;
6714 /* Decrement the occurrence count of each element in the product
6715 by the count found above, and remove this many copies of each
6717 for (i
= j
; i
< vec_len
; i
++)
6722 rf1
= &repeat_factor_vec
[i
];
6723 rf1
->count
-= power
;
6725 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
6727 if (oe
->op
== rf1
->factor
)
6731 ops
->ordered_remove (n
);
6747 /* At this point all elements in the repeated factor vector have a
6748 remaining occurrence count of 0 or 1, and those with a count of 1
6749 don't have cached representatives. Re-sort the ops vector and
6751 ops
->qsort (sort_by_operand_rank
);
6752 repeat_factor_vec
.release ();
6754 /* Return the final product computed herein. Note that there may
6755 still be some elements with single occurrence count left in OPS;
6756 those will be handled by the normal reassociation logic. */
6760 /* Attempt to optimize
6761 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6762 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6765 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
6769 unsigned int length
= ops
->length ();
6770 tree cst
= ops
->last ()->op
;
6772 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
6775 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6777 if (TREE_CODE (oe
->op
) == SSA_NAME
6778 && has_single_use (oe
->op
))
6780 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6781 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
6784 switch (gimple_call_combined_fn (old_call
))
6787 CASE_CFN_COPYSIGN_FN
:
6788 arg0
= gimple_call_arg (old_call
, 0);
6789 arg1
= gimple_call_arg (old_call
, 1);
6790 /* The first argument of copysign must be a constant,
6791 otherwise there's nothing to do. */
6792 if (TREE_CODE (arg0
) == REAL_CST
)
6794 tree type
= TREE_TYPE (arg0
);
6795 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
6796 /* If we couldn't fold to a single constant, skip it.
6797 That happens e.g. for inexact multiplication when
6799 if (mul
== NULL_TREE
)
6801 /* Instead of adjusting OLD_CALL, let's build a new
6802 call to not leak the LHS and prevent keeping bogus
6803 debug statements. DCE will clean up the old call. */
6805 if (gimple_call_internal_p (old_call
))
6806 new_call
= gimple_build_call_internal
6807 (IFN_COPYSIGN
, 2, mul
, arg1
);
6809 new_call
= gimple_build_call
6810 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
6811 tree lhs
= make_ssa_name (type
);
6812 gimple_call_set_lhs (new_call
, lhs
);
6813 gimple_set_location (new_call
,
6814 gimple_location (old_call
));
6815 insert_stmt_after (new_call
, old_call
);
6816 /* We've used the constant, get rid of it. */
6818 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
6819 /* Handle the CST1 < 0 case by negating the result. */
6822 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
6824 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
6825 insert_stmt_after (negate_stmt
, new_call
);
6830 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6832 fprintf (dump_file
, "Optimizing copysign: ");
6833 print_generic_expr (dump_file
, cst
);
6834 fprintf (dump_file
, " * COPYSIGN (");
6835 print_generic_expr (dump_file
, arg0
);
6836 fprintf (dump_file
, ", ");
6837 print_generic_expr (dump_file
, arg1
);
6838 fprintf (dump_file
, ") into %sCOPYSIGN (",
6839 cst1_neg
? "-" : "");
6840 print_generic_expr (dump_file
, mul
);
6841 fprintf (dump_file
, ", ");
6842 print_generic_expr (dump_file
, arg1
);
6843 fprintf (dump_file
, "\n");
6856 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6859 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
6863 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6865 fprintf (dump_file
, "Transforming ");
6866 print_gimple_stmt (dump_file
, stmt
, 0);
6869 rhs1
= gimple_assign_rhs1 (stmt
);
6870 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
6872 remove_visited_stmt_chain (rhs1
);
6874 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6876 fprintf (dump_file
, " into ");
6877 print_gimple_stmt (dump_file
, stmt
, 0);
6881 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6884 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
6885 tree rhs1
, tree rhs2
)
6887 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6889 fprintf (dump_file
, "Transforming ");
6890 print_gimple_stmt (dump_file
, stmt
, 0);
6893 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
6894 update_stmt (gsi_stmt (*gsi
));
6895 remove_visited_stmt_chain (rhs1
);
6897 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6899 fprintf (dump_file
, " into ");
6900 print_gimple_stmt (dump_file
, stmt
, 0);
6904 /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6905 Put no-mult ops and mult ops alternately at the end of the queue, which is
6906 conducive to generating more FMA and reducing the loss of FMA when breaking
6909 a * b + c * d + e generates:
6911 _4 = c_9(D) * d_10(D);
6912 _12 = .FMA (a_7(D), b_8(D), _4);
6915 Rearrange ops to -> e + a * b + c * d generates:
6917 _4 = .FMA (c_7(D), d_8(D), _3);
6918 _11 = .FMA (a_5(D), b_6(D), _4);
6920 Return the number of MULT_EXPRs in the chain. */
6922 rank_ops_for_fma (vec
<operand_entry
*> *ops
)
6926 unsigned int ops_length
= ops
->length ();
6927 auto_vec
<operand_entry
*> ops_mult
;
6928 auto_vec
<operand_entry
*> ops_others
;
6930 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6932 if (TREE_CODE (oe
->op
) == SSA_NAME
)
6934 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6935 if (is_gimple_assign (def_stmt
))
6937 if (gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
)
6938 ops_mult
.safe_push (oe
);
6939 /* A negate on the multiplication leads to FNMA. */
6940 else if (gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
6941 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == SSA_NAME
)
6943 gimple
*neg_def_stmt
6944 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt
));
6945 if (is_gimple_assign (neg_def_stmt
)
6946 && gimple_bb (neg_def_stmt
) == gimple_bb (def_stmt
)
6947 && gimple_assign_rhs_code (neg_def_stmt
) == MULT_EXPR
)
6948 ops_mult
.safe_push (oe
);
6950 ops_others
.safe_push (oe
);
6953 ops_others
.safe_push (oe
);
6956 ops_others
.safe_push (oe
);
6959 ops_others
.safe_push (oe
);
6961 /* 1. When ops_mult.length == 2, like the following case,
6965 we need to rearrange the ops.
6967 Putting ops that not def from mult in front can generate more FMAs.
6969 2. If all ops are defined with mult, we don't need to rearrange them. */
6970 unsigned mult_num
= ops_mult
.length ();
6971 if (mult_num
>= 2 && mult_num
!= ops_length
)
6973 /* Put no-mult ops and mult ops alternately at the end of the
6974 queue, which is conducive to generating more FMA and reducing the
6975 loss of FMA when breaking the chain. */
6977 ops
->splice (ops_mult
);
6978 int j
, opindex
= ops
->length ();
6979 int others_length
= ops_others
.length ();
6980 for (j
= 0; j
< others_length
; j
++)
6982 oe
= ops_others
.pop ();
6983 ops
->quick_insert (opindex
, oe
);
6990 /* Reassociate expressions in basic block BB and its post-dominator as
6993 Bubble up return status from maybe_optimize_range_tests. */
6996 reassociate_bb (basic_block bb
)
6998 gimple_stmt_iterator gsi
;
7000 gimple
*stmt
= last_nondebug_stmt (bb
);
7001 bool cfg_cleanup_needed
= false;
7003 if (stmt
&& !gimple_visited_p (stmt
))
7004 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
7006 bool do_prev
= false;
7007 for (gsi
= gsi_last_bb (bb
);
7008 !gsi_end_p (gsi
); do_prev
? gsi_prev (&gsi
) : (void) 0)
7011 stmt
= gsi_stmt (gsi
);
7013 if (is_gimple_assign (stmt
)
7014 && !stmt_could_throw_p (cfun
, stmt
))
7016 tree lhs
, rhs1
, rhs2
;
7017 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
7019 /* If this was part of an already processed statement,
7020 we don't need to touch it again. */
7021 if (gimple_visited_p (stmt
))
7023 /* This statement might have become dead because of previous
7025 if (has_zero_uses (gimple_get_lhs (stmt
)))
7027 reassoc_remove_stmt (&gsi
);
7028 release_defs (stmt
);
7029 /* We might end up removing the last stmt above which
7030 places the iterator to the end of the sequence.
7031 Reset it to the last stmt in this case and make sure
7032 we don't do gsi_prev in that case. */
7033 if (gsi_end_p (gsi
))
7035 gsi
= gsi_last_bb (bb
);
7042 /* If this is not a gimple binary expression, there is
7043 nothing for us to do with it. */
7044 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
7047 lhs
= gimple_assign_lhs (stmt
);
7048 rhs1
= gimple_assign_rhs1 (stmt
);
7049 rhs2
= gimple_assign_rhs2 (stmt
);
7051 /* For non-bit or min/max operations we can't associate
7052 all types. Verify that here. */
7053 if ((rhs_code
!= BIT_IOR_EXPR
7054 && rhs_code
!= BIT_AND_EXPR
7055 && rhs_code
!= BIT_XOR_EXPR
7056 && rhs_code
!= MIN_EXPR
7057 && rhs_code
!= MAX_EXPR
7058 && !can_reassociate_type_p (TREE_TYPE (lhs
)))
7059 || !can_reassociate_op_p (rhs1
)
7060 || !can_reassociate_op_p (rhs2
))
7063 if (associative_tree_code (rhs_code
))
7065 auto_vec
<operand_entry
*> ops
;
7066 tree powi_result
= NULL_TREE
;
7067 bool is_vector
= VECTOR_TYPE_P (TREE_TYPE (lhs
));
7069 /* There may be no immediate uses left by the time we
7070 get here because we may have eliminated them all. */
7071 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
7074 gimple_set_visited (stmt
, true);
7075 linearize_expr_tree (&ops
, stmt
, true, true);
7076 ops
.qsort (sort_by_operand_rank
);
7077 int orig_len
= ops
.length ();
7078 optimize_ops_list (rhs_code
, &ops
);
7079 if (undistribute_ops_list (rhs_code
, &ops
,
7080 loop_containing_stmt (stmt
)))
7082 ops
.qsort (sort_by_operand_rank
);
7083 optimize_ops_list (rhs_code
, &ops
);
7085 if (undistribute_bitref_for_vector (rhs_code
, &ops
,
7086 loop_containing_stmt (stmt
)))
7088 ops
.qsort (sort_by_operand_rank
);
7089 optimize_ops_list (rhs_code
, &ops
);
7091 if (rhs_code
== PLUS_EXPR
7092 && transform_add_to_multiply (&ops
))
7093 ops
.qsort (sort_by_operand_rank
);
7095 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
7098 optimize_vec_cond_expr (rhs_code
, &ops
);
7100 optimize_range_tests (rhs_code
, &ops
, NULL
);
7103 if (rhs_code
== MULT_EXPR
&& !is_vector
)
7105 attempt_builtin_copysign (&ops
);
7107 if (reassoc_insert_powi_p
7108 && (flag_unsafe_math_optimizations
7109 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))))
7110 powi_result
= attempt_builtin_powi (stmt
, &ops
);
7113 operand_entry
*last
;
7114 bool negate_result
= false;
7115 if (ops
.length () > 1
7116 && rhs_code
== MULT_EXPR
)
7119 if ((integer_minus_onep (last
->op
)
7120 || real_minus_onep (last
->op
))
7121 && !HONOR_SNANS (TREE_TYPE (lhs
))
7122 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs
))
7123 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs
))))
7126 negate_result
= true;
7131 /* If the operand vector is now empty, all operands were
7132 consumed by the __builtin_powi optimization. */
7133 if (ops
.length () == 0)
7134 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
7135 else if (ops
.length () == 1)
7137 tree last_op
= ops
.last ()->op
;
7139 /* If the stmt that defines operand has to be inserted, insert it
7141 if (ops
.last ()->stmt_to_insert
)
7142 insert_stmt_before_use (stmt
, ops
.last ()->stmt_to_insert
);
7144 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
7147 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
7151 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
7152 int ops_num
= ops
.length ();
7156 /* For binary bit operations, if there are at least 3
7157 operands and the last operand in OPS is a constant,
7158 move it to the front. This helps ensure that we generate
7159 (X & Y) & C rather than (X & C) & Y. The former will
7160 often match a canonical bit test when we get to RTL. */
7161 if (ops
.length () > 2
7162 && (rhs_code
== BIT_AND_EXPR
7163 || rhs_code
== BIT_IOR_EXPR
7164 || rhs_code
== BIT_XOR_EXPR
)
7165 && TREE_CODE (ops
.last ()->op
) == INTEGER_CST
)
7166 std::swap (*ops
[0], *ops
[ops_num
- 1]);
7168 optimization_type opt_type
= bb_optimization_type (bb
);
7170 /* If the target support FMA, rank_ops_for_fma will detect if
7171 the chain has fmas and rearrange the ops if so. */
7172 if (direct_internal_fn_supported_p (IFN_FMA
,
7175 && (rhs_code
== PLUS_EXPR
|| rhs_code
== MINUS_EXPR
))
7177 mult_num
= rank_ops_for_fma (&ops
);
7180 /* Only rewrite the expression tree to parallel in the
7181 last reassoc pass to avoid useless work back-and-forth
7182 with initial linearization. */
7183 bool has_fma
= mult_num
>= 2 && mult_num
!= ops_num
;
7184 if (!reassoc_insert_powi_p
7185 && ops
.length () > 3
7186 && (width
= get_reassociation_width (&ops
, mult_num
, lhs
,
7190 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7192 "Width = %d was chosen for reassociation\n",
7194 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
7201 /* When there are three operands left, we want
7202 to make sure the ones that get the double
7203 binary op are chosen wisely. */
7204 int len
= ops
.length ();
7207 /* width > 1 means ranking ops results in better
7208 parallelism. Check current value to avoid
7209 calling get_reassociation_width again. */
7211 && get_reassociation_width (
7212 &ops
, mult_num
, lhs
, rhs_code
, mode
)
7214 swap_ops_for_binary_stmt (ops
, len
- 3);
7216 new_lhs
= rewrite_expr_tree (stmt
, rhs_code
, 0, ops
,
7222 /* If we combined some repeated factors into a
7223 __builtin_powi call, multiply that result by the
7224 reassociated operands. */
7227 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
7228 tree type
= TREE_TYPE (lhs
);
7229 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
7231 gimple_set_lhs (lhs_stmt
, target_ssa
);
7232 update_stmt (lhs_stmt
);
7235 target_ssa
= new_lhs
;
7238 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
7239 powi_result
, target_ssa
);
7240 gimple_set_location (mul_stmt
, gimple_location (stmt
));
7241 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
7242 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
7248 stmt
= SSA_NAME_DEF_STMT (lhs
);
7249 tree tmp
= make_ssa_name (TREE_TYPE (lhs
));
7250 gimple_set_lhs (stmt
, tmp
);
7253 gassign
*neg_stmt
= gimple_build_assign (lhs
, NEGATE_EXPR
,
7255 gimple_set_uid (neg_stmt
, gimple_uid (stmt
));
7256 gsi_insert_after (&gsi
, neg_stmt
, GSI_NEW_STMT
);
7262 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
7264 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
7265 cfg_cleanup_needed
|= reassociate_bb (son
);
7267 return cfg_cleanup_needed
;
7270 /* Add jumps around shifts for range tests turned into bit tests.
7271 For each SSA_NAME VAR we have code like:
7272 VAR = ...; // final stmt of range comparison
7273 // bit test here...;
7274 OTHERVAR = ...; // final stmt of the bit test sequence
7275 RES = VAR | OTHERVAR;
7276 Turn the above into:
7283 // bit test here...;
7286 # RES = PHI<1(l1), OTHERVAR(l2)>; */
7294 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
7296 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
7299 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
7301 && is_gimple_assign (use_stmt
)
7302 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
7303 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
7305 basic_block cond_bb
= gimple_bb (def_stmt
);
7306 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
7307 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
7309 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
7310 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
7311 build_zero_cst (TREE_TYPE (var
)),
7312 NULL_TREE
, NULL_TREE
);
7313 location_t loc
= gimple_location (use_stmt
);
7314 gimple_set_location (g
, loc
);
7315 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
7317 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
7318 etrue
->probability
= profile_probability::even ();
7319 edge efalse
= find_edge (cond_bb
, then_bb
);
7320 efalse
->flags
= EDGE_FALSE_VALUE
;
7321 efalse
->probability
-= etrue
->probability
;
7322 then_bb
->count
-= etrue
->count ();
7324 tree othervar
= NULL_TREE
;
7325 if (gimple_assign_rhs1 (use_stmt
) == var
)
7326 othervar
= gimple_assign_rhs2 (use_stmt
);
7327 else if (gimple_assign_rhs2 (use_stmt
) == var
)
7328 othervar
= gimple_assign_rhs1 (use_stmt
);
7331 tree lhs
= gimple_assign_lhs (use_stmt
);
7332 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
7333 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
7334 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
7335 gsi
= gsi_for_stmt (use_stmt
);
7336 gsi_remove (&gsi
, true);
7338 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
7339 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
7341 reassoc_branch_fixups
.release ();
7344 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
7345 void debug_ops_vector (vec
<operand_entry
*> ops
);
7347 /* Dump the operand entry vector OPS to FILE. */
7350 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
7355 FOR_EACH_VEC_ELT (ops
, i
, oe
)
7357 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
7358 print_generic_expr (file
, oe
->op
);
7359 fprintf (file
, "\n");
7363 /* Dump the operand entry vector OPS to STDERR. */
7366 debug_ops_vector (vec
<operand_entry
*> ops
)
7368 dump_ops_vector (stderr
, ops
);
7371 /* Bubble up return status from reassociate_bb. */
7376 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
7377 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
7380 /* Initialize the reassociation pass. */
7387 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
7389 /* Find the loops, so that we can prevent moving calculations in
7391 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
7393 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
7395 next_operand_entry_id
= 0;
7397 /* Reverse RPO (Reverse Post Order) will give us something where
7398 deeper loops come later. */
7399 pre_and_rev_post_order_compute (NULL
, bbs
, false);
7400 bb_rank
= XCNEWVEC (int64_t, last_basic_block_for_fn (cfun
));
7401 operand_rank
= new hash_map
<tree
, int64_t>;
7403 /* Give each default definition a distinct rank. This includes
7404 parameters and the static chain. Walk backwards over all
7405 SSA names so that we get proper rank ordering according
7406 to tree_swap_operands_p. */
7407 for (i
= num_ssa_names
- 1; i
> 0; --i
)
7409 tree name
= ssa_name (i
);
7410 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
7411 insert_operand_rank (name
, ++rank
);
7414 /* Set up rank for each BB */
7415 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
7416 bb_rank
[bbs
[i
]] = ++rank
<< 16;
7419 calculate_dominance_info (CDI_POST_DOMINATORS
);
7420 plus_negates
= vNULL
;
7423 /* Cleanup after the reassociation pass, and print stats if
7429 statistics_counter_event (cfun
, "Linearized",
7430 reassociate_stats
.linearized
);
7431 statistics_counter_event (cfun
, "Constants eliminated",
7432 reassociate_stats
.constants_eliminated
);
7433 statistics_counter_event (cfun
, "Ops eliminated",
7434 reassociate_stats
.ops_eliminated
);
7435 statistics_counter_event (cfun
, "Statements rewritten",
7436 reassociate_stats
.rewritten
);
7437 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
7438 reassociate_stats
.pows_encountered
);
7439 statistics_counter_event (cfun
, "Built-in powi calls created",
7440 reassociate_stats
.pows_created
);
7442 delete operand_rank
;
7443 bitmap_clear (biased_names
);
7444 operand_entry_pool
.release ();
7446 plus_negates
.release ();
7447 free_dominance_info (CDI_POST_DOMINATORS
);
7448 loop_optimizer_finalize ();
7451 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7452 insertion of __builtin_powi calls.
7454 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7455 optimization of a gimple conditional. Otherwise returns zero. */
7458 execute_reassoc (bool insert_powi_p
, bool bias_loop_carried_phi_ranks_p
)
7460 reassoc_insert_powi_p
= insert_powi_p
;
7461 reassoc_bias_loop_carried_phi_ranks_p
= bias_loop_carried_phi_ranks_p
;
7465 bool cfg_cleanup_needed
;
7466 cfg_cleanup_needed
= do_reassoc ();
7467 repropagate_negates ();
7471 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
7476 const pass_data pass_data_reassoc
=
7478 GIMPLE_PASS
, /* type */
7479 "reassoc", /* name */
7480 OPTGROUP_NONE
, /* optinfo_flags */
7481 TV_TREE_REASSOC
, /* tv_id */
7482 ( PROP_cfg
| PROP_ssa
), /* properties_required */
7483 0, /* properties_provided */
7484 0, /* properties_destroyed */
7485 0, /* todo_flags_start */
7486 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
7489 class pass_reassoc
: public gimple_opt_pass
7492 pass_reassoc (gcc::context
*ctxt
)
7493 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
7496 /* opt_pass methods: */
7497 opt_pass
* clone () final override
{ return new pass_reassoc (m_ctxt
); }
7498 void set_pass_param (unsigned int n
, bool param
) final override
7500 gcc_assert (n
== 0);
7501 insert_powi_p
= param
;
7502 bias_loop_carried_phi_ranks_p
= !param
;
7504 bool gate (function
*) final override
{ return flag_tree_reassoc
!= 0; }
7505 unsigned int execute (function
*) final override
7507 return execute_reassoc (insert_powi_p
, bias_loop_carried_phi_ranks_p
);
7511 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7512 point 3a in the pass header comment. */
7514 bool bias_loop_carried_phi_ranks_p
;
7515 }; // class pass_reassoc
7520 make_pass_reassoc (gcc::context
*ctxt
)
7522 return new pass_reassoc (ctxt
);