1 /* Reassociation for trees.
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimple-fold.h"
44 #include "gimplify-me.h"
46 #include "tree-ssa-loop.h"
49 #include "langhooks.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
58 /* This is a simple global reassociation pass. It is, in part, based
59 on the LLVM pass of the same name (They do some things more/less
60 than we do, in different orders, etc).
62 It consists of five steps:
64 1. Breaking up subtract operations into addition + negate, where
65 it would promote the reassociation of adds.
67 2. Left linearization of the expression trees, so that (A+B)+(C+D)
68 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
69 During linearization, we place the operands of the binary
70 expressions into a vector of operand_entry_*
72 3. Optimization of the operand lists, eliminating things like a +
75 3a. Combine repeated factors with the same occurrence counts
76 into a __builtin_powi call that will later be optimized into
77 an optimal number of multiplies.
79 4. Rewrite the expression trees we linearized and optimized so
80 they are in proper rank order.
82 5. Repropagate negates, as nothing else will clean it up ATM.
84 A bit of theory on #4, since nobody seems to write anything down
85 about why it makes sense to do it the way they do it:
87 We could do this much nicer theoretically, but don't (for reasons
88 explained after how to do it theoretically nice :P).
90 In order to promote the most redundancy elimination, you want
91 binary expressions whose operands are the same rank (or
92 preferably, the same value) exposed to the redundancy eliminator,
93 for possible elimination.
95 So the way to do this if we really cared, is to build the new op
96 tree from the leaves to the roots, merging as you go, and putting the
97 new op on the end of the worklist, until you are left with one
98 thing on the worklist.
100 IE if you have to rewrite the following set of operands (listed with
101 rank in parentheses), with opcode PLUS_EXPR:
103 a (1), b (1), c (1), d (2), e (2)
106 We start with our merge worklist empty, and the ops list with all of
109 You want to first merge all leaves of the same rank, as much as
112 So first build a binary op of
114 mergetmp = a + b, and put "mergetmp" on the merge worklist.
116 Because there is no three operand form of PLUS_EXPR, c is not going to
117 be exposed to redundancy elimination as a rank 1 operand.
119 So you might as well throw it on the merge worklist (you could also
120 consider it to now be a rank two operand, and merge it with d and e,
121 but in this case, you then have evicted e from a binary op. So at
122 least in this situation, you can't win.)
124 Then build a binary op of d + e
127 and put mergetmp2 on the merge worklist.
129 so merge worklist = {mergetmp, c, mergetmp2}
131 Continue building binary ops of these operations until you have only
132 one operation left on the worklist.
137 mergetmp3 = mergetmp + c
139 worklist = {mergetmp2, mergetmp3}
141 mergetmp4 = mergetmp2 + mergetmp3
143 worklist = {mergetmp4}
145 because we have one operation left, we can now just set the original
146 statement equal to the result of that operation.
148 This will at least expose a + b and d + e to redundancy elimination
149 as binary operations.
151 For extra points, you can reuse the old statements to build the
152 mergetmps, since you shouldn't run out.
154 So why don't we do this?
156 Because it's expensive, and rarely will help. Most trees we are
157 reassociating have 3 or less ops. If they have 2 ops, they already
158 will be written into a nice single binary op. If you have 3 ops, a
159 single simple check suffices to tell you whether the first two are of the
160 same rank. If so, you know to order it
163 newstmt = mergetmp + op3
167 newstmt = mergetmp + op1
169 If all three are of the same rank, you can't expose them all in a
170 single binary operator anyway, so the above is *still* the best you
173 Thus, this is what we do. When we have three ops left, we check to see
174 what order to put them in, and call it a day. As a nod to vector sum
175 reduction, we check if any of the ops are really a phi node that is a
176 destructive update for the associating op, and keep the destructive
177 update together for vector sum reduction recognition. */
179 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
180 point 3a in the pass header comment. */
181 static bool reassoc_insert_powi_p
;
183 /* Enable biasing ranks of loop accumulators. We don't want this before
184 vectorization, since it interferes with reduction chains. */
185 static bool reassoc_bias_loop_carried_phi_ranks_p
;
191 int constants_eliminated
;
194 int pows_encountered
;
199 static object_allocator
<operand_entry
> operand_entry_pool
200 ("operand entry pool");
202 /* This is used to assign a unique ID to each struct operand_entry
203 so that qsort results are identical on different hosts. */
204 static unsigned int next_operand_entry_id
;
206 /* Starting rank number for a given basic block, so that we can rank
207 operations using unmovable instructions in that BB based on the bb
209 static int64_t *bb_rank
;
211 /* Operand->rank hashtable. */
212 static hash_map
<tree
, int64_t> *operand_rank
;
214 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
216 static auto_bitmap biased_names
;
218 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
219 all basic blocks the CFG should be adjusted - basic blocks
220 split right after that SSA_NAME's definition statement and before
221 the only use, which must be a bit ior. */
222 static vec
<tree
> reassoc_branch_fixups
;
225 static int64_t get_rank (tree
);
226 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
228 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
229 possibly added by gsi_remove. */
232 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
234 gimple
*stmt
= gsi_stmt (*gsi
);
236 if (!MAY_HAVE_DEBUG_BIND_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
237 return gsi_remove (gsi
, true);
239 gimple_stmt_iterator prev
= *gsi
;
241 unsigned uid
= gimple_uid (stmt
);
242 basic_block bb
= gimple_bb (stmt
);
243 bool ret
= gsi_remove (gsi
, true);
244 if (!gsi_end_p (prev
))
247 prev
= gsi_start_bb (bb
);
248 gimple
*end_stmt
= gsi_stmt (*gsi
);
249 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
251 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
252 gimple_set_uid (stmt
, uid
);
258 /* Bias amount for loop-carried phis. We want this to be larger than
259 the depth of any reassociation tree we can see, but not larger than
260 the rank difference between two blocks. */
261 #define PHI_LOOP_BIAS (1 << 15)
263 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
264 operands to the STMT's left-hand side. The goal is to preserve bias in code
274 That is, we need to preserve bias along single-use chains originating from
275 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
276 uses, because only they participate in rank propagation. */
278 propagate_bias_p (gimple
*stmt
)
281 imm_use_iterator use_iter
;
282 gimple
*single_use_stmt
= NULL
;
284 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_reference
)
287 FOR_EACH_IMM_USE_FAST (use
, use_iter
, gimple_assign_lhs (stmt
))
289 gimple
*current_use_stmt
= USE_STMT (use
);
291 if (is_gimple_assign (current_use_stmt
)
292 && TREE_CODE (gimple_assign_lhs (current_use_stmt
)) == SSA_NAME
)
294 if (single_use_stmt
!= NULL
&& single_use_stmt
!= current_use_stmt
)
296 single_use_stmt
= current_use_stmt
;
300 if (single_use_stmt
== NULL
)
303 if (gimple_bb (stmt
)->loop_father
304 != gimple_bb (single_use_stmt
)->loop_father
)
310 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
311 an innermost loop, and the phi has only a single use which is inside
312 the loop, then the rank is the block rank of the loop latch plus an
313 extra bias for the loop-carried dependence. This causes expressions
314 calculated into an accumulator variable to be independent for each
315 iteration of the loop. If STMT is some other phi, the rank is the
316 block rank of its containing block. */
318 phi_rank (gimple
*stmt
)
320 basic_block bb
= gimple_bb (stmt
);
321 class loop
*father
= bb
->loop_father
;
327 if (!reassoc_bias_loop_carried_phi_ranks_p
)
328 return bb_rank
[bb
->index
];
330 /* We only care about real loops (those with a latch). */
332 return bb_rank
[bb
->index
];
334 /* Interesting phis must be in headers of innermost loops. */
335 if (bb
!= father
->header
337 return bb_rank
[bb
->index
];
339 /* Ignore virtual SSA_NAMEs. */
340 res
= gimple_phi_result (stmt
);
341 if (virtual_operand_p (res
))
342 return bb_rank
[bb
->index
];
344 /* The phi definition must have a single use, and that use must be
345 within the loop. Otherwise this isn't an accumulator pattern. */
346 if (!single_imm_use (res
, &use
, &use_stmt
)
347 || gimple_bb (use_stmt
)->loop_father
!= father
)
348 return bb_rank
[bb
->index
];
350 /* Look for phi arguments from within the loop. If found, bias this phi. */
351 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
353 tree arg
= gimple_phi_arg_def (stmt
, i
);
354 if (TREE_CODE (arg
) == SSA_NAME
355 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
357 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
358 if (gimple_bb (def_stmt
)->loop_father
== father
)
359 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
363 /* Must be an uninteresting phi. */
364 return bb_rank
[bb
->index
];
367 /* Return the maximum of RANK and the rank that should be propagated
368 from expression OP. For most operands, this is just the rank of OP.
369 For loop-carried phis, the value is zero to avoid undoing the bias
370 in favor of the phi. */
372 propagate_rank (int64_t rank
, tree op
, bool *maybe_biased_p
)
376 op_rank
= get_rank (op
);
378 /* Check whether op is biased after the get_rank () call, since it might have
379 updated biased_names. */
380 if (TREE_CODE (op
) == SSA_NAME
381 && bitmap_bit_p (biased_names
, SSA_NAME_VERSION (op
)))
383 if (maybe_biased_p
== NULL
)
385 *maybe_biased_p
= true;
388 return MAX (rank
, op_rank
);
391 /* Look up the operand rank structure for expression E. */
393 static inline int64_t
394 find_operand_rank (tree e
)
396 int64_t *slot
= operand_rank
->get (e
);
397 return slot
? *slot
: -1;
400 /* Insert {E,RANK} into the operand rank hashtable. */
403 insert_operand_rank (tree e
, int64_t rank
)
405 gcc_assert (rank
> 0);
406 gcc_assert (!operand_rank
->put (e
, rank
));
409 /* Given an expression E, return the rank of the expression. */
414 /* SSA_NAME's have the rank of the expression they are the result
416 For globals and uninitialized values, the rank is 0.
417 For function arguments, use the pre-setup rank.
418 For PHI nodes, stores, asm statements, etc, we use the rank of
420 For simple operations, the rank is the maximum rank of any of
421 its operands, or the bb_rank, whichever is less.
422 I make no claims that this is optimal, however, it gives good
425 /* We make an exception to the normal ranking system to break
426 dependences of accumulator variables in loops. Suppose we
427 have a simple one-block loop containing:
434 As shown, each iteration of the calculation into x is fully
435 dependent upon the iteration before it. We would prefer to
436 see this in the form:
443 If the loop is unrolled, the calculations of b and c from
444 different iterations can be interleaved.
446 To obtain this result during reassociation, we bias the rank
447 of the phi definition x_1 upward, when it is recognized as an
448 accumulator pattern. The artificial rank causes it to be
449 added last, providing the desired independence. */
451 if (TREE_CODE (e
) == SSA_NAME
)
458 /* If we already have a rank for this expression, use that. */
459 rank
= find_operand_rank (e
);
463 stmt
= SSA_NAME_DEF_STMT (e
);
464 if (gimple_code (stmt
) == GIMPLE_PHI
)
466 rank
= phi_rank (stmt
);
467 if (rank
!= bb_rank
[gimple_bb (stmt
)->index
])
468 bitmap_set_bit (biased_names
, SSA_NAME_VERSION (e
));
471 else if (!is_gimple_assign (stmt
))
472 rank
= bb_rank
[gimple_bb (stmt
)->index
];
476 bool biased_p
= false;
477 bool *maybe_biased_p
= propagate_bias_p (stmt
) ? &biased_p
: NULL
;
479 /* Otherwise, find the maximum rank for the operands. As an
480 exception, remove the bias from loop-carried phis when propagating
481 the rank so that dependent operations are not also biased. */
482 /* Simply walk over all SSA uses - this takes advatage of the
483 fact that non-SSA operands are is_gimple_min_invariant and
486 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
487 rank
= propagate_rank (rank
, op
, maybe_biased_p
);
491 bitmap_set_bit (biased_names
, SSA_NAME_VERSION (e
));
494 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
496 fprintf (dump_file
, "Rank for ");
497 print_generic_expr (dump_file
, e
);
498 fprintf (dump_file
, " is %" PRId64
"\n", rank
);
501 /* Note the rank in the hashtable so we don't recompute it. */
502 insert_operand_rank (e
, rank
);
506 /* Constants, globals, etc., are rank 0 */
511 /* We want integer ones to end up last no matter what, since they are
512 the ones we can do the most with. */
513 #define INTEGER_CONST_TYPE 1 << 4
514 #define FLOAT_ONE_CONST_TYPE 1 << 3
515 #define FLOAT_CONST_TYPE 1 << 2
516 #define OTHER_CONST_TYPE 1 << 1
518 /* Classify an invariant tree into integer, float, or other, so that
519 we can sort them to be near other constants of the same type. */
521 constant_type (tree t
)
523 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
524 return INTEGER_CONST_TYPE
;
525 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
527 /* Sort -1.0 and 1.0 constants last, while in some cases
528 const_binop can't optimize some inexact operations, multiplication
529 by -1.0 or 1.0 can be always merged with others. */
530 if (real_onep (t
) || real_minus_onep (t
))
531 return FLOAT_ONE_CONST_TYPE
;
532 return FLOAT_CONST_TYPE
;
535 return OTHER_CONST_TYPE
;
538 /* qsort comparison function to sort operand entries PA and PB by rank
539 so that the sorted array is ordered by rank in decreasing order. */
541 sort_by_operand_rank (const void *pa
, const void *pb
)
543 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
544 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
546 if (oeb
->rank
!= oea
->rank
)
547 return oeb
->rank
> oea
->rank
? 1 : -1;
549 /* It's nicer for optimize_expression if constants that are likely
550 to fold when added/multiplied/whatever are put next to each
551 other. Since all constants have rank 0, order them by type. */
554 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
555 return constant_type (oea
->op
) - constant_type (oeb
->op
);
557 /* To make sorting result stable, we use unique IDs to determine
559 return oeb
->id
> oea
->id
? 1 : -1;
562 if (TREE_CODE (oea
->op
) != SSA_NAME
)
564 if (TREE_CODE (oeb
->op
) != SSA_NAME
)
565 return oeb
->id
> oea
->id
? 1 : -1;
569 else if (TREE_CODE (oeb
->op
) != SSA_NAME
)
572 /* Lastly, make sure the versions that are the same go next to each
574 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
576 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
577 versions of removed SSA_NAMEs, so if possible, prefer to sort
578 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
580 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
581 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
582 basic_block bba
= gimple_bb (stmta
);
583 basic_block bbb
= gimple_bb (stmtb
);
586 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
587 but the other might not. */
592 /* If neither is, compare bb_rank. */
593 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
594 return (bb_rank
[bbb
->index
] >> 16) - (bb_rank
[bba
->index
] >> 16);
597 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
598 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
602 return SSA_NAME_VERSION (oeb
->op
) > SSA_NAME_VERSION (oea
->op
) ? 1 : -1;
605 return oeb
->id
> oea
->id
? 1 : -1;
608 /* Add an operand entry to *OPS for the tree operand OP. */
611 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
, gimple
*stmt_to_insert
= NULL
)
613 operand_entry
*oe
= operand_entry_pool
.allocate ();
616 oe
->rank
= get_rank (op
);
617 oe
->id
= next_operand_entry_id
++;
619 oe
->stmt_to_insert
= stmt_to_insert
;
623 /* Add an operand entry to *OPS for the tree operand OP with repeat
627 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
628 HOST_WIDE_INT repeat
)
630 operand_entry
*oe
= operand_entry_pool
.allocate ();
633 oe
->rank
= get_rank (op
);
634 oe
->id
= next_operand_entry_id
++;
636 oe
->stmt_to_insert
= NULL
;
639 reassociate_stats
.pows_encountered
++;
642 /* Returns true if we can associate the SSA def OP. */
645 can_reassociate_op_p (tree op
)
647 if (TREE_CODE (op
) == SSA_NAME
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op
))
649 /* Make sure asm goto outputs do not participate in reassociation since
650 we have no way to find an insertion place after asm goto. */
651 if (TREE_CODE (op
) == SSA_NAME
652 && gimple_code (SSA_NAME_DEF_STMT (op
)) == GIMPLE_ASM
653 && gimple_asm_nlabels (as_a
<gasm
*> (SSA_NAME_DEF_STMT (op
))) != 0)
658 /* Returns true if we can reassociate operations of TYPE.
659 That is for integral or non-saturating fixed-point types, and for
660 floating point type when associative-math is enabled. */
663 can_reassociate_type_p (tree type
)
665 if ((ANY_INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
666 || NON_SAT_FIXED_POINT_TYPE_P (type
)
667 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
672 /* Return true if STMT is reassociable operation containing a binary
673 operation with tree code CODE, and is inside LOOP. */
676 is_reassociable_op (gimple
*stmt
, enum tree_code code
, class loop
*loop
)
678 basic_block bb
= gimple_bb (stmt
);
680 if (gimple_bb (stmt
) == NULL
)
683 if (!flow_bb_inside_loop_p (loop
, bb
))
686 if (is_gimple_assign (stmt
)
687 && gimple_assign_rhs_code (stmt
) == code
688 && has_single_use (gimple_assign_lhs (stmt
)))
690 tree rhs1
= gimple_assign_rhs1 (stmt
);
691 tree rhs2
= gimple_assign_rhs2 (stmt
);
692 if (!can_reassociate_op_p (rhs1
)
693 || (rhs2
&& !can_reassociate_op_p (rhs2
)))
702 /* Return true if STMT is a nop-conversion. */
705 gimple_nop_conversion_p (gimple
*stmt
)
707 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
709 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
710 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
711 TREE_TYPE (gimple_assign_rhs1 (ass
))))
717 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
718 operand of the negate operation. Otherwise, return NULL. */
721 get_unary_op (tree name
, enum tree_code opcode
)
723 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
725 /* Look through nop conversions (sign changes). */
726 if (gimple_nop_conversion_p (stmt
)
727 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
728 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
730 if (!is_gimple_assign (stmt
))
733 if (gimple_assign_rhs_code (stmt
) == opcode
)
734 return gimple_assign_rhs1 (stmt
);
738 /* Return true if OP1 and OP2 have the same value if casted to either type. */
741 ops_equal_values_p (tree op1
, tree op2
)
747 if (TREE_CODE (op1
) == SSA_NAME
)
749 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
750 if (gimple_nop_conversion_p (stmt
))
752 op1
= gimple_assign_rhs1 (stmt
);
758 if (TREE_CODE (op2
) == SSA_NAME
)
760 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
761 if (gimple_nop_conversion_p (stmt
))
763 op2
= gimple_assign_rhs1 (stmt
);
774 /* If CURR and LAST are a pair of ops that OPCODE allows us to
775 eliminate through equivalences, do so, remove them from OPS, and
776 return true. Otherwise, return false. */
779 eliminate_duplicate_pair (enum tree_code opcode
,
780 vec
<operand_entry
*> *ops
,
787 /* If we have two of the same op, and the opcode is & |, min, or max,
788 we can eliminate one of them.
789 If we have two of the same op, and the opcode is ^, we can
790 eliminate both of them. */
792 if (last
&& last
->op
== curr
->op
)
800 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
802 fprintf (dump_file
, "Equivalence: ");
803 print_generic_expr (dump_file
, curr
->op
);
804 fprintf (dump_file
, " [&|minmax] ");
805 print_generic_expr (dump_file
, last
->op
);
806 fprintf (dump_file
, " -> ");
807 print_generic_stmt (dump_file
, last
->op
);
810 ops
->ordered_remove (i
);
811 reassociate_stats
.ops_eliminated
++;
816 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
818 fprintf (dump_file
, "Equivalence: ");
819 print_generic_expr (dump_file
, curr
->op
);
820 fprintf (dump_file
, " ^ ");
821 print_generic_expr (dump_file
, last
->op
);
822 fprintf (dump_file
, " -> nothing\n");
825 reassociate_stats
.ops_eliminated
+= 2;
827 if (ops
->length () == 2)
830 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
835 ops
->ordered_remove (i
-1);
836 ops
->ordered_remove (i
-1);
848 static vec
<tree
> plus_negates
;
850 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
851 expression, look in OPS for a corresponding positive operation to cancel
852 it out. If we find one, remove the other from OPS, replace
853 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
857 eliminate_plus_minus_pair (enum tree_code opcode
,
858 vec
<operand_entry
*> *ops
,
859 unsigned int currindex
,
867 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
870 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
871 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
872 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
875 /* Any non-negated version will have a rank that is one less than
876 the current rank. So once we hit those ranks, if we don't find
879 for (i
= currindex
+ 1;
880 ops
->iterate (i
, &oe
)
881 && oe
->rank
>= curr
->rank
- 1 ;
885 && ops_equal_values_p (oe
->op
, negateop
))
887 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
889 fprintf (dump_file
, "Equivalence: ");
890 print_generic_expr (dump_file
, negateop
);
891 fprintf (dump_file
, " + -");
892 print_generic_expr (dump_file
, oe
->op
);
893 fprintf (dump_file
, " -> 0\n");
896 ops
->ordered_remove (i
);
897 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
898 ops
->ordered_remove (currindex
);
899 reassociate_stats
.ops_eliminated
++;
904 && ops_equal_values_p (oe
->op
, notop
))
906 tree op_type
= TREE_TYPE (oe
->op
);
908 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
910 fprintf (dump_file
, "Equivalence: ");
911 print_generic_expr (dump_file
, notop
);
912 fprintf (dump_file
, " + ~");
913 print_generic_expr (dump_file
, oe
->op
);
914 fprintf (dump_file
, " -> -1\n");
917 ops
->ordered_remove (i
);
918 add_to_ops_vec (ops
, build_all_ones_cst (op_type
));
919 ops
->ordered_remove (currindex
);
920 reassociate_stats
.ops_eliminated
++;
926 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
927 save it for later inspection in repropagate_negates(). */
928 if (negateop
!= NULL_TREE
929 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
930 plus_negates
.safe_push (curr
->op
);
935 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
936 bitwise not expression, look in OPS for a corresponding operand to
937 cancel it out. If we find one, remove the other from OPS, replace
938 OPS[CURRINDEX] with 0, and return true. Otherwise, return
942 eliminate_not_pairs (enum tree_code opcode
,
943 vec
<operand_entry
*> *ops
,
944 unsigned int currindex
,
951 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
952 || TREE_CODE (curr
->op
) != SSA_NAME
)
955 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
956 if (notop
== NULL_TREE
)
959 /* Any non-not version will have a rank that is one less than
960 the current rank. So once we hit those ranks, if we don't find
963 for (i
= currindex
+ 1;
964 ops
->iterate (i
, &oe
)
965 && oe
->rank
>= curr
->rank
- 1;
970 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
972 fprintf (dump_file
, "Equivalence: ");
973 print_generic_expr (dump_file
, notop
);
974 if (opcode
== BIT_AND_EXPR
)
975 fprintf (dump_file
, " & ~");
976 else if (opcode
== BIT_IOR_EXPR
)
977 fprintf (dump_file
, " | ~");
978 print_generic_expr (dump_file
, oe
->op
);
979 if (opcode
== BIT_AND_EXPR
)
980 fprintf (dump_file
, " -> 0\n");
981 else if (opcode
== BIT_IOR_EXPR
)
982 fprintf (dump_file
, " -> -1\n");
985 if (opcode
== BIT_AND_EXPR
)
986 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
987 else if (opcode
== BIT_IOR_EXPR
)
988 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
990 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
992 ops
->quick_push (oe
);
1000 /* Use constant value that may be present in OPS to try to eliminate
1001 operands. Note that this function is only really used when we've
1002 eliminated ops for other reasons, or merged constants. Across
1003 single statements, fold already does all of this, plus more. There
1004 is little point in duplicating logic, so I've only included the
1005 identities that I could ever construct testcases to trigger. */
1008 eliminate_using_constants (enum tree_code opcode
,
1009 vec
<operand_entry
*> *ops
)
1011 operand_entry
*oelast
= ops
->last ();
1012 tree type
= TREE_TYPE (oelast
->op
);
1014 if (oelast
->rank
== 0
1015 && (ANY_INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
1020 if (integer_zerop (oelast
->op
))
1022 if (ops
->length () != 1)
1024 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1025 fprintf (dump_file
, "Found & 0, removing all other ops\n");
1027 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1030 ops
->quick_push (oelast
);
1034 else if (integer_all_onesp (oelast
->op
))
1036 if (ops
->length () != 1)
1038 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1039 fprintf (dump_file
, "Found & -1, removing\n");
1041 reassociate_stats
.ops_eliminated
++;
1046 if (integer_all_onesp (oelast
->op
))
1048 if (ops
->length () != 1)
1050 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1051 fprintf (dump_file
, "Found | -1, removing all other ops\n");
1053 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1056 ops
->quick_push (oelast
);
1060 else if (integer_zerop (oelast
->op
))
1062 if (ops
->length () != 1)
1064 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1065 fprintf (dump_file
, "Found | 0, removing\n");
1067 reassociate_stats
.ops_eliminated
++;
1072 if (integer_zerop (oelast
->op
)
1073 || (FLOAT_TYPE_P (type
)
1074 && !HONOR_NANS (type
)
1075 && !HONOR_SIGNED_ZEROS (type
)
1076 && real_zerop (oelast
->op
)))
1078 if (ops
->length () != 1)
1080 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1081 fprintf (dump_file
, "Found * 0, removing all other ops\n");
1083 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1085 ops
->quick_push (oelast
);
1089 else if (integer_onep (oelast
->op
)
1090 || (FLOAT_TYPE_P (type
)
1091 && !HONOR_SNANS (type
)
1092 && real_onep (oelast
->op
)))
1094 if (ops
->length () != 1)
1096 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1097 fprintf (dump_file
, "Found * 1, removing\n");
1099 reassociate_stats
.ops_eliminated
++;
1107 if (integer_zerop (oelast
->op
)
1108 || (FLOAT_TYPE_P (type
)
1109 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1110 && fold_real_zero_addition_p (type
, 0, oelast
->op
,
1111 opcode
== MINUS_EXPR
)))
1113 if (ops
->length () != 1)
1115 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1116 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1118 reassociate_stats
.ops_eliminated
++;
1130 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1133 /* Structure for tracking and counting operands. */
1137 enum tree_code oecode
;
1142 /* The heap for the oecount hashtable and the sorted list of operands. */
1143 static vec
<oecount
> cvec
;
1146 /* Oecount hashtable helpers. */
1148 struct oecount_hasher
: int_hash
<int, 0, 1>
1150 static inline hashval_t
hash (int);
1151 static inline bool equal (int, int);
1154 /* Hash function for oecount. */
1157 oecount_hasher::hash (int p
)
1159 const oecount
*c
= &cvec
[p
- 42];
1160 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1163 /* Comparison function for oecount. */
1166 oecount_hasher::equal (int p1
, int p2
)
1168 const oecount
*c1
= &cvec
[p1
- 42];
1169 const oecount
*c2
= &cvec
[p2
- 42];
1170 return c1
->oecode
== c2
->oecode
&& c1
->op
== c2
->op
;
1173 /* Comparison function for qsort sorting oecount elements by count. */
1176 oecount_cmp (const void *p1
, const void *p2
)
1178 const oecount
*c1
= (const oecount
*)p1
;
1179 const oecount
*c2
= (const oecount
*)p2
;
1180 if (c1
->cnt
!= c2
->cnt
)
1181 return c1
->cnt
> c2
->cnt
? 1 : -1;
1183 /* If counts are identical, use unique IDs to stabilize qsort. */
1184 return c1
->id
> c2
->id
? 1 : -1;
1187 /* Return TRUE iff STMT represents a builtin call that raises OP
1188 to some exponent. */
1191 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1193 if (!is_gimple_call (stmt
))
1196 switch (gimple_call_combined_fn (stmt
))
1200 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1207 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1208 in place and return the result. Assumes that stmt_is_power_of_op
1209 was previously called for STMT and returned TRUE. */
1211 static HOST_WIDE_INT
1212 decrement_power (gimple
*stmt
)
1214 REAL_VALUE_TYPE c
, cint
;
1215 HOST_WIDE_INT power
;
1218 switch (gimple_call_combined_fn (stmt
))
1221 arg1
= gimple_call_arg (stmt
, 1);
1222 c
= TREE_REAL_CST (arg1
);
1223 power
= real_to_integer (&c
) - 1;
1224 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1225 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1229 arg1
= gimple_call_arg (stmt
, 1);
1230 power
= TREE_INT_CST_LOW (arg1
) - 1;
1231 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1239 /* Replace SSA defined by STMT and replace all its uses with new
1240 SSA. Also return the new SSA. */
1243 make_new_ssa_for_def (gimple
*stmt
, enum tree_code opcode
, tree op
)
1247 imm_use_iterator iter
;
1248 tree new_lhs
, new_debug_lhs
= NULL_TREE
;
1249 tree lhs
= gimple_get_lhs (stmt
);
1251 new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
1252 gimple_set_lhs (stmt
, new_lhs
);
1254 /* Also need to update GIMPLE_DEBUGs. */
1255 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
1257 tree repl
= new_lhs
;
1258 if (is_gimple_debug (use_stmt
))
1260 if (new_debug_lhs
== NULL_TREE
)
1262 new_debug_lhs
= build_debug_expr_decl (TREE_TYPE (lhs
));
1264 = gimple_build_debug_bind (new_debug_lhs
,
1265 build2 (opcode
, TREE_TYPE (lhs
),
1268 gimple_set_uid (def_temp
, gimple_uid (stmt
));
1269 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1270 gsi_insert_after (&gsi
, def_temp
, GSI_SAME_STMT
);
1272 repl
= new_debug_lhs
;
1274 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1275 SET_USE (use
, repl
);
1276 update_stmt (use_stmt
);
1281 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1282 uses with new SSAs. Also do this for the stmt that defines DEF
1283 if *DEF is not OP. */
1286 make_new_ssa_for_all_defs (tree
*def
, enum tree_code opcode
, tree op
,
1287 vec
<gimple
*> &stmts_to_fix
)
1293 && TREE_CODE (*def
) == SSA_NAME
1294 && (stmt
= SSA_NAME_DEF_STMT (*def
))
1295 && gimple_code (stmt
) != GIMPLE_NOP
)
1296 *def
= make_new_ssa_for_def (stmt
, opcode
, op
);
1298 FOR_EACH_VEC_ELT (stmts_to_fix
, i
, stmt
)
1299 make_new_ssa_for_def (stmt
, opcode
, op
);
1302 /* Find the single immediate use of STMT's LHS, and replace it
1303 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1304 replace *DEF with OP as well. */
1307 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1312 gimple_stmt_iterator gsi
;
1314 if (is_gimple_call (stmt
))
1315 lhs
= gimple_call_lhs (stmt
);
1317 lhs
= gimple_assign_lhs (stmt
);
1319 gcc_assert (has_single_use (lhs
));
1320 single_imm_use (lhs
, &use
, &use_stmt
);
1324 if (TREE_CODE (op
) != SSA_NAME
)
1325 update_stmt (use_stmt
);
1326 gsi
= gsi_for_stmt (stmt
);
1327 unlink_stmt_vdef (stmt
);
1328 reassoc_remove_stmt (&gsi
);
1329 release_defs (stmt
);
1332 /* Walks the linear chain with result *DEF searching for an operation
1333 with operand OP and code OPCODE removing that from the chain. *DEF
1334 is updated if there is only one operand but no operation left. */
1337 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1339 tree orig_def
= *def
;
1340 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1341 /* PR72835 - Record the stmt chain that has to be updated such that
1342 we dont use the same LHS when the values computed are different. */
1343 auto_vec
<gimple
*, 64> stmts_to_fix
;
1349 if (opcode
== MULT_EXPR
)
1351 if (stmt_is_power_of_op (stmt
, op
))
1353 if (decrement_power (stmt
) == 1)
1355 if (stmts_to_fix
.length () > 0)
1356 stmts_to_fix
.pop ();
1357 propagate_op_to_single_use (op
, stmt
, def
);
1361 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
)
1363 if (gimple_assign_rhs1 (stmt
) == op
)
1365 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1366 if (stmts_to_fix
.length () > 0)
1367 stmts_to_fix
.pop ();
1368 propagate_op_to_single_use (cst
, stmt
, def
);
1371 else if (integer_minus_onep (op
)
1372 || real_minus_onep (op
))
1374 gimple_assign_set_rhs_code
1375 (stmt
, TREE_CODE (gimple_assign_rhs1 (stmt
)));
1381 name
= gimple_assign_rhs1 (stmt
);
1383 /* If this is the operation we look for and one of the operands
1384 is ours simply propagate the other operand into the stmts
1386 if (gimple_assign_rhs_code (stmt
) == opcode
1388 || gimple_assign_rhs2 (stmt
) == op
))
1391 name
= gimple_assign_rhs2 (stmt
);
1392 if (stmts_to_fix
.length () > 0)
1393 stmts_to_fix
.pop ();
1394 propagate_op_to_single_use (name
, stmt
, def
);
1398 /* We might have a multiply of two __builtin_pow* calls, and
1399 the operand might be hiding in the rightmost one. Likewise
1400 this can happen for a negate. */
1401 if (opcode
== MULT_EXPR
1402 && gimple_assign_rhs_code (stmt
) == opcode
1403 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1404 && has_single_use (gimple_assign_rhs2 (stmt
)))
1406 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1407 if (stmt_is_power_of_op (stmt2
, op
))
1409 if (decrement_power (stmt2
) == 1)
1410 propagate_op_to_single_use (op
, stmt2
, def
);
1412 stmts_to_fix
.safe_push (stmt2
);
1415 else if (is_gimple_assign (stmt2
)
1416 && gimple_assign_rhs_code (stmt2
) == NEGATE_EXPR
)
1418 if (gimple_assign_rhs1 (stmt2
) == op
)
1420 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1421 propagate_op_to_single_use (cst
, stmt2
, def
);
1424 else if (integer_minus_onep (op
)
1425 || real_minus_onep (op
))
1427 stmts_to_fix
.safe_push (stmt2
);
1428 gimple_assign_set_rhs_code
1429 (stmt2
, TREE_CODE (gimple_assign_rhs1 (stmt2
)));
1435 /* Continue walking the chain. */
1436 gcc_assert (name
!= op
1437 && TREE_CODE (name
) == SSA_NAME
);
1438 stmt
= SSA_NAME_DEF_STMT (name
);
1439 stmts_to_fix
.safe_push (stmt
);
1443 if (stmts_to_fix
.length () > 0 || *def
== orig_def
)
1444 make_new_ssa_for_all_defs (def
, opcode
, op
, stmts_to_fix
);
1447 /* Returns true if statement S1 dominates statement S2. Like
1448 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1451 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1453 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1455 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1456 SSA_NAME. Assume it lives at the beginning of function and
1457 thus dominates everything. */
1458 if (!bb1
|| s1
== s2
)
1461 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1467 /* PHIs in the same basic block are assumed to be
1468 executed all in parallel, if only one stmt is a PHI,
1469 it dominates the other stmt in the same basic block. */
1470 if (gimple_code (s1
) == GIMPLE_PHI
)
1473 if (gimple_code (s2
) == GIMPLE_PHI
)
1476 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1478 if (gimple_uid (s1
) < gimple_uid (s2
))
1481 if (gimple_uid (s1
) > gimple_uid (s2
))
1484 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1485 unsigned int uid
= gimple_uid (s1
);
1486 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1488 gimple
*s
= gsi_stmt (gsi
);
1489 if (gimple_uid (s
) != uid
)
1498 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1501 /* Insert STMT after INSERT_POINT. */
1504 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1506 gimple_stmt_iterator gsi
;
1509 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1510 bb
= gimple_bb (insert_point
);
1511 else if (!stmt_ends_bb_p (insert_point
))
1513 gsi
= gsi_for_stmt (insert_point
);
1514 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1515 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1518 else if (gimple_code (insert_point
) == GIMPLE_ASM
1519 && gimple_asm_nlabels (as_a
<gasm
*> (insert_point
)) != 0)
1520 /* We have no idea where to insert - it depends on where the
1521 uses will be placed. */
1524 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1525 thus if it must end a basic block, it should be a call that can
1526 throw, or some assignment that can throw. If it throws, the LHS
1527 of it will not be initialized though, so only valid places using
1528 the SSA_NAME should be dominated by the fallthru edge. */
1529 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1530 gsi
= gsi_after_labels (bb
);
1531 if (gsi_end_p (gsi
))
1533 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1534 gimple_set_uid (stmt
,
1535 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1538 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1539 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1542 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1543 the result. Places the statement after the definition of either
1544 OP1 or OP2. Returns the new statement. */
1547 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1549 gimple
*op1def
= NULL
, *op2def
= NULL
;
1550 gimple_stmt_iterator gsi
;
1554 /* Create the addition statement. */
1555 op
= make_ssa_name (type
);
1556 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1558 /* Find an insertion place and insert. */
1559 if (TREE_CODE (op1
) == SSA_NAME
)
1560 op1def
= SSA_NAME_DEF_STMT (op1
);
1561 if (TREE_CODE (op2
) == SSA_NAME
)
1562 op2def
= SSA_NAME_DEF_STMT (op2
);
1563 if ((!op1def
|| gimple_nop_p (op1def
))
1564 && (!op2def
|| gimple_nop_p (op2def
)))
1566 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1567 if (gsi_end_p (gsi
))
1569 gimple_stmt_iterator gsi2
1570 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1571 gimple_set_uid (sum
,
1572 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1575 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1576 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1580 gimple
*insert_point
;
1581 if ((!op1def
|| gimple_nop_p (op1def
))
1582 || (op2def
&& !gimple_nop_p (op2def
)
1583 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1584 insert_point
= op2def
;
1586 insert_point
= op1def
;
1587 insert_stmt_after (sum
, insert_point
);
1594 /* Perform un-distribution of divisions and multiplications.
1595 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1596 to (A + B) / X for real X.
1598 The algorithm is organized as follows.
1600 - First we walk the addition chain *OPS looking for summands that
1601 are defined by a multiplication or a real division. This results
1602 in the candidates bitmap with relevant indices into *OPS.
1604 - Second we build the chains of multiplications or divisions for
1605 these candidates, counting the number of occurrences of (operand, code)
1606 pairs in all of the candidates chains.
1608 - Third we sort the (operand, code) pairs by number of occurrence and
1609 process them starting with the pair with the most uses.
1611 * For each such pair we walk the candidates again to build a
1612 second candidate bitmap noting all multiplication/division chains
1613 that have at least one occurrence of (operand, code).
1615 * We build an alternate addition chain only covering these
1616 candidates with one (operand, code) operation removed from their
1617 multiplication/division chain.
1619 * The first candidate gets replaced by the alternate addition chain
1620 multiplied/divided by the operand.
1622 * All candidate chains get disabled for further processing and
1623 processing of (operand, code) pairs continues.
1625 The alternate addition chains built are re-processed by the main
1626 reassociation algorithm which allows optimizing a * x * y + b * y * x
1627 to (a + b ) * x * y in one invocation of the reassociation pass. */
1630 undistribute_ops_list (enum tree_code opcode
,
1631 vec
<operand_entry
*> *ops
, class loop
*loop
)
1633 unsigned int length
= ops
->length ();
1636 unsigned nr_candidates
, nr_candidates2
;
1637 sbitmap_iterator sbi0
;
1638 vec
<operand_entry
*> *subops
;
1639 bool changed
= false;
1640 unsigned int next_oecount_id
= 0;
1643 || opcode
!= PLUS_EXPR
)
1646 /* Build a list of candidates to process. */
1647 auto_sbitmap
candidates (length
);
1648 bitmap_clear (candidates
);
1650 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1652 enum tree_code dcode
;
1655 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1657 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1658 if (!is_gimple_assign (oe1def
))
1660 dcode
= gimple_assign_rhs_code (oe1def
);
1661 if ((dcode
!= MULT_EXPR
1662 && dcode
!= RDIV_EXPR
)
1663 || !is_reassociable_op (oe1def
, dcode
, loop
))
1666 bitmap_set_bit (candidates
, i
);
1670 if (nr_candidates
< 2)
1673 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1675 fprintf (dump_file
, "searching for un-distribute opportunities ");
1676 print_generic_expr (dump_file
,
1677 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, TDF_NONE
);
1678 fprintf (dump_file
, " %d\n", nr_candidates
);
1681 /* Build linearized sub-operand lists and the counting table. */
1684 hash_table
<oecount_hasher
> ctable (15);
1686 /* ??? Macro arguments cannot have multi-argument template types in
1687 them. This typedef is needed to workaround that limitation. */
1688 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1689 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1690 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1693 enum tree_code oecode
;
1696 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1697 oecode
= gimple_assign_rhs_code (oedef
);
1698 linearize_expr_tree (&subops
[i
], oedef
,
1699 associative_tree_code (oecode
), false);
1701 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1708 c
.id
= next_oecount_id
++;
1711 idx
= cvec
.length () + 41;
1712 slot
= ctable
.find_slot (idx
, INSERT
);
1720 cvec
[*slot
- 42].cnt
++;
1725 /* Sort the counting table. */
1726 cvec
.qsort (oecount_cmp
);
1728 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1731 fprintf (dump_file
, "Candidates:\n");
1732 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1734 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1735 c
->oecode
== MULT_EXPR
1736 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1737 print_generic_expr (dump_file
, c
->op
);
1738 fprintf (dump_file
, "\n");
1742 /* Process the (operand, code) pairs in order of most occurrence. */
1743 auto_sbitmap
candidates2 (length
);
1744 while (!cvec
.is_empty ())
1746 oecount
*c
= &cvec
.last ();
1750 /* Now collect the operands in the outer chain that contain
1751 the common operand in their inner chain. */
1752 bitmap_clear (candidates2
);
1754 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1757 enum tree_code oecode
;
1759 tree op
= (*ops
)[i
]->op
;
1761 /* If we undistributed in this chain already this may be
1763 if (TREE_CODE (op
) != SSA_NAME
)
1766 oedef
= SSA_NAME_DEF_STMT (op
);
1767 oecode
= gimple_assign_rhs_code (oedef
);
1768 if (oecode
!= c
->oecode
)
1771 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1773 if (oe1
->op
== c
->op
)
1775 bitmap_set_bit (candidates2
, i
);
1782 if (nr_candidates2
>= 2)
1784 operand_entry
*oe1
, *oe2
;
1786 int first
= bitmap_first_set_bit (candidates2
);
1788 /* Build the new addition chain. */
1789 oe1
= (*ops
)[first
];
1790 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1792 fprintf (dump_file
, "Building (");
1793 print_generic_expr (dump_file
, oe1
->op
);
1795 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1796 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1800 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1802 fprintf (dump_file
, " + ");
1803 print_generic_expr (dump_file
, oe2
->op
);
1805 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1806 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1807 oe1
->op
, oe2
->op
, opcode
);
1808 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1810 oe1
->op
= gimple_get_lhs (sum
);
1813 /* Apply the multiplication/division. */
1814 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1815 oe1
->op
, c
->op
, c
->oecode
);
1816 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1818 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1819 print_generic_expr (dump_file
, c
->op
);
1820 fprintf (dump_file
, "\n");
1823 /* Record it in the addition chain and disable further
1824 undistribution with this op. */
1825 oe1
->op
= gimple_assign_lhs (prod
);
1826 oe1
->rank
= get_rank (oe1
->op
);
1827 subops
[first
].release ();
1835 for (i
= 0; i
< ops
->length (); ++i
)
1836 subops
[i
].release ();
1843 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1844 first: element index for each relevant BIT_FIELD_REF.
1845 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1846 typedef std::pair
<unsigned, unsigned> v_info_elem
;
1849 auto_vec
<v_info_elem
, 32> vec
;
1851 typedef v_info
*v_info_ptr
;
1853 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1855 sort_by_mach_mode (const void *p_i
, const void *p_j
)
1857 const tree tr1
= *((const tree
*) p_i
);
1858 const tree tr2
= *((const tree
*) p_j
);
1859 unsigned int mode1
= TYPE_MODE (TREE_TYPE (tr1
));
1860 unsigned int mode2
= TYPE_MODE (TREE_TYPE (tr2
));
1863 else if (mode1
< mode2
)
1865 if (SSA_NAME_VERSION (tr1
) < SSA_NAME_VERSION (tr2
))
1867 else if (SSA_NAME_VERSION (tr1
) > SSA_NAME_VERSION (tr2
))
1872 /* Cleanup hash map for VECTOR information. */
1874 cleanup_vinfo_map (hash_map
<tree
, v_info_ptr
> &info_map
)
1876 for (hash_map
<tree
, v_info_ptr
>::iterator it
= info_map
.begin ();
1877 it
!= info_map
.end (); ++it
)
1879 v_info_ptr info
= (*it
).second
;
1881 (*it
).second
= NULL
;
1885 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1886 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1888 Vs = (V1 + V2 + ... + Vn)
1889 Vs[0] + Vs[1] + ... + Vs[k]
1891 The basic steps are listed below:
1893 1) Check the addition chain *OPS by looking those summands coming from
1894 VECTOR bit_field_ref on VECTOR type. Put the information into
1895 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1897 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1898 continuous, they can cover the whole VECTOR perfectly without any holes.
1899 Obtain one VECTOR list which contain candidates to be transformed.
1901 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1902 candidates with same mode, build the addition statements for them and
1903 generate BIT_FIELD_REFs accordingly.
1906 The current implementation requires the whole VECTORs should be fully
1907 covered, but it can be extended to support partial, checking adjacent
1908 but not fill the whole, it may need some cost model to define the
1909 boundary to do or not.
1912 undistribute_bitref_for_vector (enum tree_code opcode
,
1913 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1915 if (ops
->length () <= 1)
1918 if (opcode
!= PLUS_EXPR
1919 && opcode
!= MULT_EXPR
1920 && opcode
!= BIT_XOR_EXPR
1921 && opcode
!= BIT_IOR_EXPR
1922 && opcode
!= BIT_AND_EXPR
)
1925 hash_map
<tree
, v_info_ptr
> v_info_map
;
1929 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1930 information into map. */
1931 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1933 enum tree_code dcode
;
1936 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1938 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1939 if (!is_gimple_assign (oe1def
))
1941 dcode
= gimple_assign_rhs_code (oe1def
);
1942 if (dcode
!= BIT_FIELD_REF
|| !is_reassociable_op (oe1def
, dcode
, loop
))
1945 tree rhs
= gimple_assign_rhs1 (oe1def
);
1946 tree vec
= TREE_OPERAND (rhs
, 0);
1947 tree vec_type
= TREE_TYPE (vec
);
1949 if (TREE_CODE (vec
) != SSA_NAME
|| !VECTOR_TYPE_P (vec_type
))
1952 /* Ignore it if target machine can't support this VECTOR type. */
1953 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1956 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1957 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
1960 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
1961 || !is_a
<scalar_mode
> (TYPE_MODE (TREE_TYPE (rhs
))))
1964 /* The type of BIT_FIELD_REF might not be equal to the element type of
1965 the vector. We want to use a vector type with element type the
1966 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1967 if (!useless_type_conversion_p (TREE_TYPE (rhs
), TREE_TYPE (vec_type
)))
1969 machine_mode simd_mode
;
1970 unsigned HOST_WIDE_INT size
, nunits
;
1971 unsigned HOST_WIDE_INT elem_size
1972 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs
)));
1973 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type
)).is_constant (&size
))
1975 if (size
<= elem_size
|| (size
% elem_size
) != 0)
1977 nunits
= size
/ elem_size
;
1978 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs
)),
1979 nunits
).exists (&simd_mode
))
1981 vec_type
= build_vector_type_for_mode (TREE_TYPE (rhs
), simd_mode
);
1983 /* Ignore it if target machine can't support this VECTOR type. */
1984 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1987 /* Check const vector type, constrain BIT_FIELD_REF offset and
1989 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
1992 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type
)),
1993 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec
)))))
1997 tree elem_type
= TREE_TYPE (vec_type
);
1998 unsigned HOST_WIDE_INT elem_size
= tree_to_uhwi (TYPE_SIZE (elem_type
));
1999 if (maybe_ne (bit_field_size (rhs
), elem_size
))
2003 if (!constant_multiple_p (bit_field_offset (rhs
), elem_size
, &idx
))
2006 /* Ignore it if target machine can't support this type of VECTOR
2008 optab op_tab
= optab_for_tree_code (opcode
, vec_type
, optab_vector
);
2009 if (optab_handler (op_tab
, TYPE_MODE (vec_type
)) == CODE_FOR_nothing
)
2013 v_info_ptr
&info
= v_info_map
.get_or_insert (vec
, &existed
);
2017 info
->vec_type
= vec_type
;
2019 else if (!types_compatible_p (vec_type
, info
->vec_type
))
2021 info
->vec
.safe_push (std::make_pair (idx
, i
));
2024 /* At least two VECTOR to combine. */
2025 if (v_info_map
.elements () <= 1)
2027 cleanup_vinfo_map (v_info_map
);
2031 /* Verify all VECTOR candidates by checking two conditions:
2032 1) sorted offsets are adjacent, no holes.
2033 2) can fill the whole VECTOR perfectly.
2034 And add the valid candidates to a vector for further handling. */
2035 auto_vec
<tree
> valid_vecs (v_info_map
.elements ());
2036 for (hash_map
<tree
, v_info_ptr
>::iterator it
= v_info_map
.begin ();
2037 it
!= v_info_map
.end (); ++it
)
2039 tree cand_vec
= (*it
).first
;
2040 v_info_ptr cand_info
= (*it
).second
;
2041 unsigned int num_elems
2042 = TYPE_VECTOR_SUBPARTS (cand_info
->vec_type
).to_constant ();
2043 if (cand_info
->vec
.length () != num_elems
)
2045 sbitmap holes
= sbitmap_alloc (num_elems
);
2046 bitmap_ones (holes
);
2049 FOR_EACH_VEC_ELT (cand_info
->vec
, i
, curr
)
2051 if (!bitmap_bit_p (holes
, curr
->first
))
2057 bitmap_clear_bit (holes
, curr
->first
);
2059 if (valid
&& bitmap_empty_p (holes
))
2060 valid_vecs
.quick_push (cand_vec
);
2061 sbitmap_free (holes
);
2064 /* At least two VECTOR to combine. */
2065 if (valid_vecs
.length () <= 1)
2067 cleanup_vinfo_map (v_info_map
);
2071 valid_vecs
.qsort (sort_by_mach_mode
);
2072 /* Go through all candidates by machine mode order, query the mode_to_total
2073 to get the total number for each mode and skip the single one. */
2074 for (unsigned i
= 0; i
< valid_vecs
.length () - 1; ++i
)
2076 tree tvec
= valid_vecs
[i
];
2077 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (tvec
));
2079 /* Skip modes with only a single candidate. */
2080 if (TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) != mode
)
2083 unsigned int idx
, j
;
2085 tree sum_vec
= tvec
;
2086 v_info_ptr info_ptr
= *(v_info_map
.get (tvec
));
2088 tree vec_type
= info_ptr
->vec_type
;
2090 /* Build the sum for all candidates with same mode. */
2093 sum
= build_and_add_sum (vec_type
, sum_vec
,
2094 valid_vecs
[i
+ 1], opcode
);
2095 if (!useless_type_conversion_p (vec_type
,
2096 TREE_TYPE (valid_vecs
[i
+ 1])))
2098 /* Update the operands only after build_and_add_sum,
2099 so that we don't have to repeat the placement algorithm
2100 of build_and_add_sum. */
2101 gimple_stmt_iterator gsi
= gsi_for_stmt (sum
);
2102 tree vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
,
2104 tree lhs
= make_ssa_name (vec_type
);
2105 gimple
*g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2106 gimple_set_uid (g
, gimple_uid (sum
));
2107 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2108 gimple_assign_set_rhs2 (sum
, lhs
);
2109 if (sum_vec
== tvec
)
2111 vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
, sum_vec
);
2112 lhs
= make_ssa_name (vec_type
);
2113 g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2114 gimple_set_uid (g
, gimple_uid (sum
));
2115 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2116 gimple_assign_set_rhs1 (sum
, lhs
);
2120 sum_vec
= gimple_get_lhs (sum
);
2121 info_ptr
= *(v_info_map
.get (valid_vecs
[i
+ 1]));
2122 gcc_assert (types_compatible_p (vec_type
, info_ptr
->vec_type
));
2123 /* Update those related ops of current candidate VECTOR. */
2124 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2127 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2128 /* Set this then op definition will get DCEd later. */
2129 gimple_set_visited (def
, true);
2130 if (opcode
== PLUS_EXPR
2131 || opcode
== BIT_XOR_EXPR
2132 || opcode
== BIT_IOR_EXPR
)
2133 (*ops
)[idx
]->op
= build_zero_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2134 else if (opcode
== MULT_EXPR
)
2135 (*ops
)[idx
]->op
= build_one_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2138 gcc_assert (opcode
== BIT_AND_EXPR
);
2140 = build_all_ones_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2142 (*ops
)[idx
]->rank
= 0;
2144 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2146 fprintf (dump_file
, "Generating addition -> ");
2147 print_gimple_stmt (dump_file
, sum
, 0);
2151 while ((i
< valid_vecs
.length () - 1)
2152 && TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) == mode
);
2154 /* Referring to first valid VECTOR with this mode, generate the
2155 BIT_FIELD_REF statements accordingly. */
2156 info_ptr
= *(v_info_map
.get (tvec
));
2158 tree elem_type
= TREE_TYPE (vec_type
);
2159 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2162 tree dst
= make_ssa_name (elem_type
);
2163 tree pos
= bitsize_int (elem
->first
2164 * tree_to_uhwi (TYPE_SIZE (elem_type
)));
2165 tree bfr
= build3 (BIT_FIELD_REF
, elem_type
, sum_vec
,
2166 TYPE_SIZE (elem_type
), pos
);
2167 gimple
*gs
= gimple_build_assign (dst
, BIT_FIELD_REF
, bfr
);
2168 insert_stmt_after (gs
, sum
);
2169 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2170 /* Set this then op definition will get DCEd later. */
2171 gimple_set_visited (def
, true);
2172 (*ops
)[idx
]->op
= gimple_assign_lhs (gs
);
2173 (*ops
)[idx
]->rank
= get_rank ((*ops
)[idx
]->op
);
2174 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2176 fprintf (dump_file
, "Generating bit_field_ref -> ");
2177 print_gimple_stmt (dump_file
, gs
, 0);
2182 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2183 fprintf (dump_file
, "undistributiong bit_field_ref for vector done.\n");
2185 cleanup_vinfo_map (v_info_map
);
2190 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2191 expression, examine the other OPS to see if any of them are comparisons
2192 of the same values, which we may be able to combine or eliminate.
2193 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2196 eliminate_redundant_comparison (enum tree_code opcode
,
2197 vec
<operand_entry
*> *ops
,
2198 unsigned int currindex
,
2199 operand_entry
*curr
)
2202 enum tree_code lcode
, rcode
;
2203 gimple
*def1
, *def2
;
2207 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
2210 /* Check that CURR is a comparison. */
2211 if (TREE_CODE (curr
->op
) != SSA_NAME
)
2213 def1
= SSA_NAME_DEF_STMT (curr
->op
);
2214 if (!is_gimple_assign (def1
))
2216 lcode
= gimple_assign_rhs_code (def1
);
2217 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
2219 op1
= gimple_assign_rhs1 (def1
);
2220 op2
= gimple_assign_rhs2 (def1
);
2222 /* Now look for a similar comparison in the remaining OPS. */
2223 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
2227 if (TREE_CODE (oe
->op
) != SSA_NAME
)
2229 def2
= SSA_NAME_DEF_STMT (oe
->op
);
2230 if (!is_gimple_assign (def2
))
2232 rcode
= gimple_assign_rhs_code (def2
);
2233 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
2236 /* If we got here, we have a match. See if we can combine the
2238 tree type
= TREE_TYPE (gimple_assign_lhs (def1
));
2239 if (opcode
== BIT_IOR_EXPR
)
2240 t
= maybe_fold_or_comparisons (type
,
2242 rcode
, gimple_assign_rhs1 (def2
),
2243 gimple_assign_rhs2 (def2
));
2245 t
= maybe_fold_and_comparisons (type
,
2247 rcode
, gimple_assign_rhs1 (def2
),
2248 gimple_assign_rhs2 (def2
));
2252 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2253 always give us a boolean_type_node value back. If the original
2254 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2255 we need to convert. */
2256 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
2258 if (!fold_convertible_p (TREE_TYPE (curr
->op
), t
))
2260 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
2263 if (TREE_CODE (t
) != INTEGER_CST
2264 && !operand_equal_p (t
, curr
->op
, 0))
2266 enum tree_code subcode
;
2267 tree newop1
, newop2
;
2268 if (!COMPARISON_CLASS_P (t
))
2270 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2271 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2272 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2273 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
2277 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2279 fprintf (dump_file
, "Equivalence: ");
2280 print_generic_expr (dump_file
, curr
->op
);
2281 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
2282 print_generic_expr (dump_file
, oe
->op
);
2283 fprintf (dump_file
, " -> ");
2284 print_generic_expr (dump_file
, t
);
2285 fprintf (dump_file
, "\n");
2288 /* Now we can delete oe, as it has been subsumed by the new combined
2290 ops
->ordered_remove (i
);
2291 reassociate_stats
.ops_eliminated
++;
2293 /* If t is the same as curr->op, we're done. Otherwise we must
2294 replace curr->op with t. Special case is if we got a constant
2295 back, in which case we add it to the end instead of in place of
2296 the current entry. */
2297 if (TREE_CODE (t
) == INTEGER_CST
)
2299 ops
->ordered_remove (currindex
);
2300 add_to_ops_vec (ops
, t
);
2302 else if (!operand_equal_p (t
, curr
->op
, 0))
2305 enum tree_code subcode
;
2308 gcc_assert (COMPARISON_CLASS_P (t
));
2309 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2310 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2311 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2312 gcc_checking_assert (is_gimple_val (newop1
)
2313 && is_gimple_val (newop2
));
2314 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
2315 curr
->op
= gimple_get_lhs (sum
);
2324 /* Transform repeated addition of same values into multiply with
2327 transform_add_to_multiply (vec
<operand_entry
*> *ops
)
2330 tree op
= NULL_TREE
;
2332 int i
, start
= -1, end
= 0, count
= 0;
2333 auto_vec
<std::pair
<int, int> > indxs
;
2334 bool changed
= false;
2336 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2337 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2338 || !flag_unsafe_math_optimizations
))
2341 /* Look for repeated operands. */
2342 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2350 else if (operand_equal_p (oe
->op
, op
, 0))
2358 indxs
.safe_push (std::make_pair (start
, end
));
2366 indxs
.safe_push (std::make_pair (start
, end
));
2368 for (j
= indxs
.length () - 1; j
>= 0; --j
)
2370 /* Convert repeated operand addition to multiplication. */
2371 start
= indxs
[j
].first
;
2372 end
= indxs
[j
].second
;
2373 op
= (*ops
)[start
]->op
;
2374 count
= end
- start
+ 1;
2375 for (i
= end
; i
>= start
; --i
)
2376 ops
->unordered_remove (i
);
2377 tree tmp
= make_ssa_name (TREE_TYPE (op
));
2378 tree cst
= build_int_cst (integer_type_node
, count
);
2380 = gimple_build_assign (tmp
, MULT_EXPR
,
2381 op
, fold_convert (TREE_TYPE (op
), cst
));
2382 gimple_set_visited (mul_stmt
, true);
2383 add_to_ops_vec (ops
, tmp
, mul_stmt
);
2391 /* Perform various identities and other optimizations on the list of
2392 operand entries, stored in OPS. The tree code for the binary
2393 operation between all the operands is OPCODE. */
2396 optimize_ops_list (enum tree_code opcode
,
2397 vec
<operand_entry
*> *ops
)
2399 unsigned int length
= ops
->length ();
2402 operand_entry
*oelast
= NULL
;
2403 bool iterate
= false;
2408 oelast
= ops
->last ();
2410 /* If the last two are constants, pop the constants off, merge them
2411 and try the next two. */
2412 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
2414 operand_entry
*oelm1
= (*ops
)[length
- 2];
2416 if (oelm1
->rank
== 0
2417 && is_gimple_min_invariant (oelm1
->op
)
2418 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
2419 TREE_TYPE (oelast
->op
)))
2421 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
2422 oelm1
->op
, oelast
->op
);
2424 if (folded
&& is_gimple_min_invariant (folded
))
2426 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2427 fprintf (dump_file
, "Merging constants\n");
2432 add_to_ops_vec (ops
, folded
);
2433 reassociate_stats
.constants_eliminated
++;
2435 optimize_ops_list (opcode
, ops
);
2441 eliminate_using_constants (opcode
, ops
);
2444 for (i
= 0; ops
->iterate (i
, &oe
);)
2448 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
2450 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
2451 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
2452 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
2465 optimize_ops_list (opcode
, ops
);
2468 /* The following functions are subroutines to optimize_range_tests and allow
2469 it to try to change a logical combination of comparisons into a range
2473 X == 2 || X == 5 || X == 3 || X == 4
2477 (unsigned) (X - 2) <= 3
2479 For more information see comments above fold_test_range in fold-const.cc,
2480 this implementation is for GIMPLE. */
2484 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2487 dump_range_entry (FILE *file
, struct range_entry
*r
, bool skip_exp
)
2490 print_generic_expr (file
, r
->exp
);
2491 fprintf (file
, " %c[", r
->in_p
? '+' : '-');
2492 print_generic_expr (file
, r
->low
);
2494 print_generic_expr (file
, r
->high
);
2498 /* Dump the range entry R to STDERR. */
2501 debug_range_entry (struct range_entry
*r
)
2503 dump_range_entry (stderr
, r
, false);
2504 fputc ('\n', stderr
);
2507 /* This is similar to make_range in fold-const.cc, but on top of
2508 GIMPLE instead of trees. If EXP is non-NULL, it should be
2509 an SSA_NAME and STMT argument is ignored, otherwise STMT
2510 argument should be a GIMPLE_COND. */
2513 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
2517 bool is_bool
, strict_overflow_p
;
2521 r
->strict_overflow_p
= false;
2523 r
->high
= NULL_TREE
;
2524 if (exp
!= NULL_TREE
2525 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
2528 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2529 and see if we can refine the range. Some of the cases below may not
2530 happen, but it doesn't seem worth worrying about this. We "continue"
2531 the outer loop when we've changed something; otherwise we "break"
2532 the switch, which will "break" the while. */
2533 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
2536 strict_overflow_p
= false;
2538 if (exp
== NULL_TREE
)
2540 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
2542 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
2547 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
2552 enum tree_code code
;
2553 tree arg0
, arg1
, exp_type
;
2557 if (exp
!= NULL_TREE
)
2559 if (TREE_CODE (exp
) != SSA_NAME
2560 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
2563 stmt
= SSA_NAME_DEF_STMT (exp
);
2564 if (!is_gimple_assign (stmt
))
2567 code
= gimple_assign_rhs_code (stmt
);
2568 arg0
= gimple_assign_rhs1 (stmt
);
2569 arg1
= gimple_assign_rhs2 (stmt
);
2570 exp_type
= TREE_TYPE (exp
);
2574 code
= gimple_cond_code (stmt
);
2575 arg0
= gimple_cond_lhs (stmt
);
2576 arg1
= gimple_cond_rhs (stmt
);
2577 exp_type
= boolean_type_node
;
2580 if (TREE_CODE (arg0
) != SSA_NAME
2581 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0
))
2583 loc
= gimple_location (stmt
);
2587 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
2588 /* Ensure the range is either +[-,0], +[0,0],
2589 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2590 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2591 or similar expression of unconditional true or
2592 false, it should not be negated. */
2593 && ((high
&& integer_zerop (high
))
2594 || (low
&& integer_onep (low
))))
2607 if ((TYPE_PRECISION (exp_type
) == 1
2608 || TREE_CODE (exp_type
) == BOOLEAN_TYPE
)
2609 && TYPE_PRECISION (TREE_TYPE (arg0
)) > 1)
2612 else if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
2614 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
2619 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2634 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2636 &strict_overflow_p
);
2637 if (nexp
!= NULL_TREE
)
2640 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2653 r
->strict_overflow_p
= strict_overflow_p
;
2657 /* Comparison function for qsort. Sort entries
2658 without SSA_NAME exp first, then with SSA_NAMEs sorted
2659 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2660 by increasing ->low and if ->low is the same, by increasing
2661 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2665 range_entry_cmp (const void *a
, const void *b
)
2667 const struct range_entry
*p
= (const struct range_entry
*) a
;
2668 const struct range_entry
*q
= (const struct range_entry
*) b
;
2670 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2672 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2674 /* Group range_entries for the same SSA_NAME together. */
2675 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2677 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2679 /* If ->low is different, NULL low goes first, then by
2681 if (p
->low
!= NULL_TREE
)
2683 if (q
->low
!= NULL_TREE
)
2685 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2687 if (tem
&& integer_onep (tem
))
2689 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2691 if (tem
&& integer_onep (tem
))
2697 else if (q
->low
!= NULL_TREE
)
2699 /* If ->high is different, NULL high goes last, before that by
2701 if (p
->high
!= NULL_TREE
)
2703 if (q
->high
!= NULL_TREE
)
2705 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2707 if (tem
&& integer_onep (tem
))
2709 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2711 if (tem
&& integer_onep (tem
))
2717 else if (q
->high
!= NULL_TREE
)
2719 /* If both ranges are the same, sort below by ascending idx. */
2724 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2727 if (p
->idx
< q
->idx
)
2731 gcc_checking_assert (p
->idx
> q
->idx
);
2736 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2737 insert needed statements BEFORE or after GSI. */
2740 force_into_ssa_name (gimple_stmt_iterator
*gsi
, tree expr
, bool before
)
2742 enum gsi_iterator_update m
= before
? GSI_SAME_STMT
: GSI_CONTINUE_LINKING
;
2743 tree ret
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
, before
, m
);
2744 if (TREE_CODE (ret
) != SSA_NAME
)
2746 gimple
*g
= gimple_build_assign (make_ssa_name (TREE_TYPE (ret
)), ret
);
2748 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2750 gsi_insert_after (gsi
, g
, GSI_CONTINUE_LINKING
);
2751 ret
= gimple_assign_lhs (g
);
2756 /* Helper routine of optimize_range_test.
2757 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2758 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2759 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2760 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2761 an array of COUNT pointers to other ranges. Return
2762 true if the range merge has been successful.
2763 If OPCODE is ERROR_MARK, this is called from within
2764 maybe_optimize_range_tests and is performing inter-bb range optimization.
2765 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2769 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2770 struct range_entry
**otherrangep
,
2771 unsigned int count
, enum tree_code opcode
,
2772 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2773 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2775 unsigned int idx
= range
->idx
;
2776 struct range_entry
*swap_with
= NULL
;
2777 basic_block rewrite_bb_first
= NULL
, rewrite_bb_last
= NULL
;
2778 if (opcode
== ERROR_MARK
)
2780 /* For inter-bb range test optimization, pick from the range tests
2781 the one which is tested in the earliest condition (one dominating
2782 the others), because otherwise there could be some UB (e.g. signed
2783 overflow) in following bbs that we'd expose which wasn't there in
2784 the original program. See PR104196. */
2785 basic_block orig_range_bb
= BASIC_BLOCK_FOR_FN (cfun
, (*ops
)[idx
]->id
);
2786 basic_block range_bb
= orig_range_bb
;
2787 for (unsigned int i
= 0; i
< count
; i
++)
2789 struct range_entry
*this_range
;
2791 this_range
= otherrange
+ i
;
2793 this_range
= otherrangep
[i
];
2794 operand_entry
*oe
= (*ops
)[this_range
->idx
];
2795 basic_block this_bb
= BASIC_BLOCK_FOR_FN (cfun
, oe
->id
);
2796 if (range_bb
!= this_bb
2797 && dominated_by_p (CDI_DOMINATORS
, range_bb
, this_bb
))
2799 swap_with
= this_range
;
2801 idx
= this_range
->idx
;
2804 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2805 only defined in later blocks. In this case we can't move the
2806 merged comparison earlier, so instead check if there are any stmts
2807 that might trigger signed integer overflow in between and rewrite
2808 them. But only after we check if the optimization is possible. */
2809 if (seq
&& swap_with
)
2811 rewrite_bb_first
= range_bb
;
2812 rewrite_bb_last
= orig_range_bb
;
2817 operand_entry
*oe
= (*ops
)[idx
];
2819 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2820 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2821 location_t loc
= gimple_location (stmt
);
2822 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2823 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2825 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2826 gimple_stmt_iterator gsi
;
2827 unsigned int i
, uid
;
2829 if (tem
== NULL_TREE
)
2832 /* If op is default def SSA_NAME, there is no place to insert the
2833 new comparison. Give up, unless we can use OP itself as the
2835 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2837 if (op
== range
->exp
2838 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2839 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2841 || (TREE_CODE (tem
) == EQ_EXPR
2842 && TREE_OPERAND (tem
, 0) == op
2843 && integer_onep (TREE_OPERAND (tem
, 1))))
2844 && opcode
!= BIT_IOR_EXPR
2845 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2855 std::swap (range
->idx
, swap_with
->idx
);
2857 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2858 warning_at (loc
, OPT_Wstrict_overflow
,
2859 "assuming signed overflow does not occur "
2860 "when simplifying range test");
2862 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2864 struct range_entry
*r
;
2865 fprintf (dump_file
, "Optimizing range tests ");
2866 dump_range_entry (dump_file
, range
, false);
2867 for (i
= 0; i
< count
; i
++)
2874 && r
->exp
!= range
->exp
2875 && TREE_CODE (r
->exp
) == SSA_NAME
)
2877 fprintf (dump_file
, " and ");
2878 dump_range_entry (dump_file
, r
, false);
2882 fprintf (dump_file
, " and");
2883 dump_range_entry (dump_file
, r
, true);
2886 fprintf (dump_file
, "\n into ");
2887 print_generic_expr (dump_file
, tem
);
2888 fprintf (dump_file
, "\n");
2891 /* In inter-bb range optimization mode, if we have a seq, we can't
2892 move the merged comparison to the earliest bb from the comparisons
2893 being replaced, so instead rewrite stmts that could trigger signed
2894 integer overflow. */
2895 for (basic_block bb
= rewrite_bb_last
;
2896 bb
!= rewrite_bb_first
; bb
= single_pred (bb
))
2897 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
);
2898 !gsi_end_p (gsi
); gsi_next (&gsi
))
2900 gimple
*stmt
= gsi_stmt (gsi
);
2901 if (is_gimple_assign (stmt
))
2902 if (tree lhs
= gimple_assign_lhs (stmt
))
2903 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2904 || POINTER_TYPE_P (TREE_TYPE (lhs
)))
2905 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
)))
2907 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2908 if (arith_code_with_undefined_signed_overflow (code
))
2910 gimple_stmt_iterator gsip
= gsi
;
2911 gimple_stmt_iterator gsin
= gsi
;
2914 rewrite_to_defined_overflow (stmt
, true);
2915 unsigned uid
= gimple_uid (stmt
);
2916 if (gsi_end_p (gsip
))
2917 gsip
= gsi_after_labels (bb
);
2920 for (; gsi_stmt (gsip
) != gsi_stmt (gsin
);
2922 gimple_set_uid (gsi_stmt (gsip
), uid
);
2927 if (opcode
== BIT_IOR_EXPR
2928 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2929 tem
= invert_truthvalue_loc (loc
, tem
);
2931 tem
= fold_convert_loc (loc
, optype
, tem
);
2934 gsi
= gsi_for_stmt (stmt
);
2935 uid
= gimple_uid (stmt
);
2943 gcc_checking_assert (tem
== op
);
2944 /* In rare cases range->exp can be equal to lhs of stmt.
2945 In that case we have to insert after the stmt rather then before
2946 it. If stmt is a PHI, insert it at the start of the basic block. */
2947 else if (op
!= range
->exp
)
2949 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2950 tem
= force_into_ssa_name (&gsi
, tem
, true);
2953 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2955 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2956 tem
= force_into_ssa_name (&gsi
, tem
, false);
2960 gsi
= gsi_after_labels (gimple_bb (stmt
));
2961 if (!gsi_end_p (gsi
))
2962 uid
= gimple_uid (gsi_stmt (gsi
));
2965 gsi
= gsi_start_bb (gimple_bb (stmt
));
2967 while (!gsi_end_p (gsi
))
2969 uid
= gimple_uid (gsi_stmt (gsi
));
2973 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2974 tem
= force_into_ssa_name (&gsi
, tem
, true);
2975 if (gsi_end_p (gsi
))
2976 gsi
= gsi_last_bb (gimple_bb (stmt
));
2980 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2981 if (gimple_uid (gsi_stmt (gsi
)))
2984 gimple_set_uid (gsi_stmt (gsi
), uid
);
2991 range
->strict_overflow_p
= false;
2993 for (i
= 0; i
< count
; i
++)
2996 range
= otherrange
+ i
;
2998 range
= otherrangep
[i
];
2999 oe
= (*ops
)[range
->idx
];
3000 /* Now change all the other range test immediate uses, so that
3001 those tests will be optimized away. */
3002 if (opcode
== ERROR_MARK
)
3005 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
3006 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
3008 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
3009 ? boolean_false_node
: boolean_true_node
);
3012 oe
->op
= error_mark_node
;
3013 range
->exp
= NULL_TREE
;
3014 range
->low
= NULL_TREE
;
3015 range
->high
= NULL_TREE
;
3020 /* Optimize X == CST1 || X == CST2
3021 if popcount (CST1 ^ CST2) == 1 into
3022 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3023 Similarly for ranges. E.g.
3024 X != 2 && X != 3 && X != 10 && X != 11
3025 will be transformed by the previous optimization into
3026 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3027 and this loop can transform that into
3028 !(((X & ~8) - 2U) <= 1U). */
3031 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
3032 tree lowi
, tree lowj
, tree highi
, tree highj
,
3033 vec
<operand_entry
*> *ops
,
3034 struct range_entry
*rangei
,
3035 struct range_entry
*rangej
)
3037 tree lowxor
, highxor
, tem
, exp
;
3038 /* Check lowi ^ lowj == highi ^ highj and
3039 popcount (lowi ^ lowj) == 1. */
3040 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
3041 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
3043 if (!integer_pow2p (lowxor
))
3045 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
3046 if (!tree_int_cst_equal (lowxor
, highxor
))
3050 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
3051 int prec
= GET_MODE_PRECISION (mode
);
3052 if (TYPE_PRECISION (type
) < prec
3053 || (wi::to_wide (TYPE_MIN_VALUE (type
))
3054 != wi::min_value (prec
, TYPE_SIGN (type
)))
3055 || (wi::to_wide (TYPE_MAX_VALUE (type
))
3056 != wi::max_value (prec
, TYPE_SIGN (type
))))
3058 type
= build_nonstandard_integer_type (prec
, TYPE_UNSIGNED (type
));
3059 exp
= fold_convert (type
, exp
);
3060 lowxor
= fold_convert (type
, lowxor
);
3061 lowi
= fold_convert (type
, lowi
);
3062 highi
= fold_convert (type
, highi
);
3064 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
3065 exp
= fold_build2 (BIT_AND_EXPR
, type
, exp
, tem
);
3066 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
3067 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
3068 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
3069 NULL
, rangei
->in_p
, lowj
, highj
,
3070 rangei
->strict_overflow_p
3071 || rangej
->strict_overflow_p
))
3076 /* Optimize X == CST1 || X == CST2
3077 if popcount (CST2 - CST1) == 1 into
3078 ((X - CST1) & ~(CST2 - CST1)) == 0.
3079 Similarly for ranges. E.g.
3080 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3081 || X == 75 || X == 45
3082 will be transformed by the previous optimization into
3083 (X - 43U) <= 3U || (X - 75U) <= 3U
3084 and this loop can transform that into
3085 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3087 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
3088 tree lowi
, tree lowj
, tree highi
, tree highj
,
3089 vec
<operand_entry
*> *ops
,
3090 struct range_entry
*rangei
,
3091 struct range_entry
*rangej
)
3093 tree tem1
, tem2
, mask
;
3094 /* Check highi - lowi == highj - lowj. */
3095 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
3096 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
3098 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
3099 if (!tree_int_cst_equal (tem1
, tem2
))
3101 /* Check popcount (lowj - lowi) == 1. */
3102 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
3103 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
3105 if (!integer_pow2p (tem1
))
3108 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
3109 int prec
= GET_MODE_PRECISION (mode
);
3110 if (TYPE_PRECISION (type
) < prec
3111 || (wi::to_wide (TYPE_MIN_VALUE (type
))
3112 != wi::min_value (prec
, TYPE_SIGN (type
)))
3113 || (wi::to_wide (TYPE_MAX_VALUE (type
))
3114 != wi::max_value (prec
, TYPE_SIGN (type
))))
3115 type
= build_nonstandard_integer_type (prec
, 1);
3117 type
= unsigned_type_for (type
);
3118 tem1
= fold_convert (type
, tem1
);
3119 tem2
= fold_convert (type
, tem2
);
3120 lowi
= fold_convert (type
, lowi
);
3121 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
3122 tem1
= fold_build2 (MINUS_EXPR
, type
,
3123 fold_convert (type
, rangei
->exp
), lowi
);
3124 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
3125 lowj
= build_int_cst (type
, 0);
3126 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
3127 NULL
, rangei
->in_p
, lowj
, tem2
,
3128 rangei
->strict_overflow_p
3129 || rangej
->strict_overflow_p
))
3134 /* It does some common checks for function optimize_range_tests_xor and
3135 optimize_range_tests_diff.
3136 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3137 Else it calls optimize_range_tests_diff. */
3140 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
3141 bool optimize_xor
, vec
<operand_entry
*> *ops
,
3142 struct range_entry
*ranges
)
3145 bool any_changes
= false;
3146 for (i
= first
; i
< length
; i
++)
3148 tree lowi
, highi
, lowj
, highj
, type
, tem
;
3150 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3152 type
= TREE_TYPE (ranges
[i
].exp
);
3153 if (!INTEGRAL_TYPE_P (type
))
3155 lowi
= ranges
[i
].low
;
3156 if (lowi
== NULL_TREE
)
3157 lowi
= TYPE_MIN_VALUE (type
);
3158 highi
= ranges
[i
].high
;
3159 if (highi
== NULL_TREE
)
3161 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
3164 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
3166 lowj
= ranges
[j
].low
;
3167 if (lowj
== NULL_TREE
)
3169 highj
= ranges
[j
].high
;
3170 if (highj
== NULL_TREE
)
3171 highj
= TYPE_MAX_VALUE (type
);
3172 /* Check lowj > highi. */
3173 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
3175 if (tem
== NULL_TREE
|| !integer_onep (tem
))
3178 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
3180 ranges
+ i
, ranges
+ j
);
3182 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
3184 ranges
+ i
, ranges
+ j
);
3195 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3196 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3197 EXP on success, NULL otherwise. */
3200 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
3201 wide_int
*mask
, tree
*totallowp
)
3203 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
3204 if (tem
== NULL_TREE
3205 || TREE_CODE (tem
) != INTEGER_CST
3206 || TREE_OVERFLOW (tem
)
3207 || tree_int_cst_sgn (tem
) == -1
3208 || compare_tree_int (tem
, prec
) != -1)
3211 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
3212 *mask
= wi::shifted_mask (0, max
, false, prec
);
3213 if (TREE_CODE (exp
) == BIT_AND_EXPR
3214 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3216 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
3217 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
3218 if (wi::popcount (msk
) == 1
3219 && wi::ltu_p (msk
, prec
- max
))
3221 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
3222 max
+= msk
.to_uhwi ();
3223 exp
= TREE_OPERAND (exp
, 0);
3224 if (integer_zerop (low
)
3225 && TREE_CODE (exp
) == PLUS_EXPR
3226 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3228 tree ret
= TREE_OPERAND (exp
, 0);
3231 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
3232 TYPE_PRECISION (TREE_TYPE (low
))));
3233 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
3239 else if (!tree_int_cst_lt (totallow
, tbias
))
3241 bias
= wi::to_widest (tbias
);
3242 bias
-= wi::to_widest (totallow
);
3243 if (bias
>= 0 && bias
< prec
- max
)
3245 *mask
= wi::lshift (*mask
, bias
);
3253 if (!tree_int_cst_lt (totallow
, low
))
3255 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
3256 if (tem
== NULL_TREE
3257 || TREE_CODE (tem
) != INTEGER_CST
3258 || TREE_OVERFLOW (tem
)
3259 || compare_tree_int (tem
, prec
- max
) == 1)
3262 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
3266 /* Attempt to optimize small range tests using bit test.
3268 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3269 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3270 has been by earlier optimizations optimized into:
3271 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3272 As all the 43 through 82 range is less than 64 numbers,
3273 for 64-bit word targets optimize that into:
3274 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3277 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
3278 vec
<operand_entry
*> *ops
,
3279 struct range_entry
*ranges
)
3282 bool any_changes
= false;
3283 int prec
= GET_MODE_BITSIZE (word_mode
);
3284 auto_vec
<struct range_entry
*, 64> candidates
;
3286 for (i
= first
; i
< length
- 1; i
++)
3288 tree lowi
, highi
, lowj
, highj
, type
;
3290 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3292 type
= TREE_TYPE (ranges
[i
].exp
);
3293 if (!INTEGRAL_TYPE_P (type
))
3295 lowi
= ranges
[i
].low
;
3296 if (lowi
== NULL_TREE
)
3297 lowi
= TYPE_MIN_VALUE (type
);
3298 highi
= ranges
[i
].high
;
3299 if (highi
== NULL_TREE
)
3302 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
3303 highi
, &mask
, &lowi
);
3304 if (exp
== NULL_TREE
)
3306 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3307 candidates
.truncate (0);
3308 int end
= MIN (i
+ 64, length
);
3309 for (j
= i
+ 1; j
< end
; j
++)
3312 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
3314 if (ranges
[j
].exp
== exp
)
3316 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
3318 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
3321 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
3323 exp2
= TREE_OPERAND (exp2
, 0);
3333 lowj
= ranges
[j
].low
;
3334 if (lowj
== NULL_TREE
)
3336 highj
= ranges
[j
].high
;
3337 if (highj
== NULL_TREE
)
3338 highj
= TYPE_MAX_VALUE (type
);
3340 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
3341 highj
, &mask2
, NULL
);
3345 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3346 candidates
.safe_push (&ranges
[j
]);
3349 /* If every possible relative value of the expression is a valid shift
3350 amount, then we can merge the entry test in the bit test. In this
3351 case, if we would need otherwise 2 or more comparisons, then use
3352 the bit test; in the other cases, the threshold is 3 comparisons. */
3353 bool entry_test_needed
;
3355 if (TREE_CODE (exp
) == SSA_NAME
3356 && get_range_query (cfun
)->range_of_expr (r
, exp
)
3357 && r
.kind () == VR_RANGE
3358 && wi::leu_p (r
.upper_bound () - r
.lower_bound (), prec
- 1))
3360 wide_int min
= r
.lower_bound ();
3361 wide_int ilowi
= wi::to_wide (lowi
);
3362 if (wi::lt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3364 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3365 mask
= wi::lshift (mask
, ilowi
- min
);
3367 else if (wi::gt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3369 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3370 mask
= wi::lrshift (mask
, min
- ilowi
);
3372 entry_test_needed
= false;
3375 entry_test_needed
= true;
3376 if (candidates
.length () >= (entry_test_needed
? 2 : 1))
3378 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
3379 wi::to_widest (lowi
)
3380 + prec
- 1 - wi::clz (mask
));
3381 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3383 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
3384 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3385 location_t loc
= gimple_location (stmt
);
3386 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
3388 /* See if it isn't cheaper to pretend the minimum value of the
3389 range is 0, if maximum value is small enough.
3390 We can avoid then subtraction of the minimum value, but the
3391 mask constant could be perhaps more expensive. */
3392 if (compare_tree_int (lowi
, 0) > 0
3393 && compare_tree_int (high
, prec
) < 0)
3396 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
3397 rtx reg
= gen_raw_REG (word_mode
, 10000);
3398 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
3399 cost_diff
= set_src_cost (gen_rtx_PLUS (word_mode
, reg
,
3401 word_mode
, speed_p
);
3402 rtx r
= immed_wide_int_const (mask
, word_mode
);
3403 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3404 word_mode
, speed_p
);
3405 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
3406 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3407 word_mode
, speed_p
);
3410 mask
= wi::lshift (mask
, m
);
3411 lowi
= build_zero_cst (TREE_TYPE (lowi
));
3416 if (entry_test_needed
)
3418 tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
3420 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
3425 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
3426 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
3427 fold_convert_loc (loc
, etype
, exp
),
3428 fold_convert_loc (loc
, etype
, lowi
));
3429 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
3430 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
3431 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
3432 build_int_cst (word_type
, 1), exp
);
3433 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
3434 wide_int_to_tree (word_type
, mask
));
3435 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
3436 build_zero_cst (word_type
));
3437 if (is_gimple_val (exp
))
3440 /* The shift might have undefined behavior if TEM is true,
3441 but reassociate_bb isn't prepared to have basic blocks
3442 split when it is running. So, temporarily emit a code
3443 with BIT_IOR_EXPR instead of &&, and fix it up in
3445 gimple_seq seq
= NULL
;
3448 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
3449 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
3450 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
3453 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
3454 gimple_seq_add_seq_without_update (&seq
, seq2
);
3455 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
3456 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
3459 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
3460 BIT_IOR_EXPR
, tem
, exp
);
3461 gimple_set_location (g
, loc
);
3462 gimple_seq_add_stmt_without_update (&seq
, g
);
3463 exp
= gimple_assign_lhs (g
);
3465 tree val
= build_zero_cst (optype
);
3466 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
3467 candidates
.length (), opcode
, ops
, exp
,
3468 seq
, false, val
, val
, strict_overflow_p
))
3472 reassoc_branch_fixups
.safe_push (tem
);
3475 gimple_seq_discard (seq
);
3481 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3482 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3483 Also, handle x < C && y < C && z < C where C is power of two as
3484 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3485 as (x | y | z) < 0. */
3488 optimize_range_tests_cmp_bitwise (enum tree_code opcode
, int first
, int length
,
3489 vec
<operand_entry
*> *ops
,
3490 struct range_entry
*ranges
)
3494 bool any_changes
= false;
3495 auto_vec
<int, 128> buckets
;
3496 auto_vec
<int, 32> chains
;
3497 auto_vec
<struct range_entry
*, 32> candidates
;
3499 for (i
= first
; i
< length
; i
++)
3503 if (ranges
[i
].exp
== NULL_TREE
3504 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3505 || TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) <= 1
3506 || TREE_CODE (TREE_TYPE (ranges
[i
].exp
)) == BOOLEAN_TYPE
)
3509 if (ranges
[i
].low
!= NULL_TREE
3510 && ranges
[i
].high
!= NULL_TREE
3512 && tree_int_cst_equal (ranges
[i
].low
, ranges
[i
].high
))
3514 idx
= !integer_zerop (ranges
[i
].low
);
3515 if (idx
&& !integer_all_onesp (ranges
[i
].low
))
3518 else if (ranges
[i
].high
!= NULL_TREE
3519 && TREE_CODE (ranges
[i
].high
) == INTEGER_CST
3522 wide_int w
= wi::to_wide (ranges
[i
].high
);
3523 int prec
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
));
3524 int l
= wi::clz (w
);
3528 || w
!= wi::mask (prec
- l
, false, prec
))
3530 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3531 && ranges
[i
].low
== NULL_TREE
)
3533 && integer_zerop (ranges
[i
].low
))))
3536 else if (ranges
[i
].high
== NULL_TREE
3537 && ranges
[i
].low
!= NULL_TREE
3538 /* Perform this optimization only in the last
3539 reassoc pass, as it interferes with the reassociation
3540 itself or could also with VRP etc. which might not
3541 be able to virtually undo the optimization. */
3542 && !reassoc_insert_powi_p
3543 && !TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3544 && integer_zerop (ranges
[i
].low
))
3549 b
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) * 4 + idx
;
3550 if (buckets
.length () <= b
)
3551 buckets
.safe_grow_cleared (b
+ 1, true);
3552 if (chains
.length () <= (unsigned) i
)
3553 chains
.safe_grow (i
+ 1, true);
3554 chains
[i
] = buckets
[b
];
3558 FOR_EACH_VEC_ELT (buckets
, b
, i
)
3559 if (i
&& chains
[i
- 1])
3564 /* When ranges[X - 1].high + 1 is a power of two,
3565 we need to process the same bucket up to
3566 precision - 1 times, each time split the entries
3567 with the same high bound into one chain and the
3568 rest into another one to be processed later. */
3571 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3573 if (tree_int_cst_equal (ranges
[i
- 1].high
,
3574 ranges
[j
- 1].high
))
3576 chains
[this_prev
- 1] = j
;
3579 else if (other_prev
== 0)
3586 chains
[other_prev
- 1] = j
;
3590 chains
[this_prev
- 1] = 0;
3592 chains
[other_prev
- 1] = 0;
3593 if (chains
[i
- 1] == 0)
3600 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3602 gimple
*gk
= SSA_NAME_DEF_STMT (ranges
[k
- 1].exp
);
3603 gimple
*gj
= SSA_NAME_DEF_STMT (ranges
[j
- 1].exp
);
3604 if (reassoc_stmt_dominates_stmt_p (gk
, gj
))
3607 tree type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3608 tree type2
= NULL_TREE
;
3609 bool strict_overflow_p
= false;
3610 candidates
.truncate (0);
3611 for (j
= i
; j
; j
= chains
[j
- 1])
3613 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3614 strict_overflow_p
|= ranges
[j
- 1].strict_overflow_p
;
3617 /* For the signed < 0 cases, the types should be
3618 really compatible (all signed with the same precision,
3619 instead put ranges that have different in_p from
3621 if (!useless_type_conversion_p (type1
, type
))
3623 if (ranges
[j
- 1].in_p
!= ranges
[k
- 1].in_p
)
3624 candidates
.safe_push (&ranges
[j
- 1]);
3629 || useless_type_conversion_p (type1
, type
))
3631 else if (type2
== NULL_TREE
3632 || useless_type_conversion_p (type2
, type
))
3634 if (type2
== NULL_TREE
)
3636 candidates
.safe_push (&ranges
[j
- 1]);
3639 unsigned l
= candidates
.length ();
3640 for (j
= i
; j
; j
= chains
[j
- 1])
3642 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3647 if (!useless_type_conversion_p (type1
, type
))
3649 if (ranges
[j
- 1].in_p
== ranges
[k
- 1].in_p
)
3650 candidates
.safe_push (&ranges
[j
- 1]);
3653 if (useless_type_conversion_p (type1
, type
))
3655 else if (type2
== NULL_TREE
3656 || useless_type_conversion_p (type2
, type
))
3658 candidates
.safe_push (&ranges
[j
- 1]);
3660 gimple_seq seq
= NULL
;
3661 tree op
= NULL_TREE
;
3663 struct range_entry
*r
;
3664 candidates
.safe_push (&ranges
[k
- 1]);
3665 FOR_EACH_VEC_ELT (candidates
, id
, r
)
3668 enum tree_code code
;
3676 code
= (b
% 4) == 3 ? BIT_NOT_EXPR
: NOP_EXPR
;
3677 g
= gimple_build_assign (make_ssa_name (type1
), code
, op
);
3678 gimple_seq_add_stmt_without_update (&seq
, g
);
3679 op
= gimple_assign_lhs (g
);
3681 tree type
= TREE_TYPE (r
->exp
);
3683 if (id
>= l
&& !useless_type_conversion_p (type1
, type
))
3685 g
= gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, exp
);
3686 gimple_seq_add_stmt_without_update (&seq
, g
);
3687 exp
= gimple_assign_lhs (g
);
3690 code
= r
->in_p
? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3692 code
= (b
% 4) == 1 ? BIT_AND_EXPR
: BIT_IOR_EXPR
;
3693 g
= gimple_build_assign (make_ssa_name (id
>= l
? type1
: type2
),
3695 gimple_seq_add_stmt_without_update (&seq
, g
);
3696 op
= gimple_assign_lhs (g
);
3699 if (update_range_test (&ranges
[k
- 1], NULL
, candidates
.address (),
3700 candidates
.length (), opcode
, ops
, op
,
3701 seq
, ranges
[k
- 1].in_p
, ranges
[k
- 1].low
,
3702 ranges
[k
- 1].high
, strict_overflow_p
))
3705 gimple_seq_discard (seq
);
3706 if ((b
% 4) == 2 && buckets
[b
] != i
)
3707 /* There is more work to do for this bucket. */
3714 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3715 a >= 0 && a < b into (unsigned) a < (unsigned) b
3716 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3719 optimize_range_tests_var_bound (enum tree_code opcode
, int first
, int length
,
3720 vec
<operand_entry
*> *ops
,
3721 struct range_entry
*ranges
,
3722 basic_block first_bb
)
3725 bool any_changes
= false;
3726 hash_map
<tree
, int> *map
= NULL
;
3728 for (i
= first
; i
< length
; i
++)
3730 if (ranges
[i
].exp
== NULL_TREE
3731 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3735 tree type
= TREE_TYPE (ranges
[i
].exp
);
3736 if (!INTEGRAL_TYPE_P (type
)
3737 || TYPE_UNSIGNED (type
)
3738 || ranges
[i
].low
== NULL_TREE
3739 || !integer_zerop (ranges
[i
].low
)
3740 || ranges
[i
].high
!= NULL_TREE
)
3742 /* EXP >= 0 here. */
3744 map
= new hash_map
<tree
, int>;
3745 map
->put (ranges
[i
].exp
, i
);
3751 for (i
= 0; i
< length
; i
++)
3753 bool in_p
= ranges
[i
].in_p
;
3754 if (ranges
[i
].low
== NULL_TREE
3755 || ranges
[i
].high
== NULL_TREE
)
3757 if (!integer_zerop (ranges
[i
].low
)
3758 || !integer_zerop (ranges
[i
].high
))
3761 && TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) == 1
3762 && TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3763 && integer_onep (ranges
[i
].low
)
3764 && integer_onep (ranges
[i
].high
))
3775 if (TREE_CODE (ranges
[i
].exp
) != SSA_NAME
)
3777 stmt
= SSA_NAME_DEF_STMT (ranges
[i
].exp
);
3778 if (!is_gimple_assign (stmt
))
3780 ccode
= gimple_assign_rhs_code (stmt
);
3781 rhs1
= gimple_assign_rhs1 (stmt
);
3782 rhs2
= gimple_assign_rhs2 (stmt
);
3786 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3787 stmt
= last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3788 if (gimple_code (stmt
) != GIMPLE_COND
)
3790 ccode
= gimple_cond_code (stmt
);
3791 rhs1
= gimple_cond_lhs (stmt
);
3792 rhs2
= gimple_cond_rhs (stmt
);
3795 if (TREE_CODE (rhs1
) != SSA_NAME
3796 || rhs2
== NULL_TREE
3797 || TREE_CODE (rhs2
) != SSA_NAME
)
3811 ccode
= invert_tree_comparison (ccode
, false);
3816 std::swap (rhs1
, rhs2
);
3817 ccode
= swap_tree_comparison (ccode
);
3826 int *idx
= map
->get (rhs1
);
3830 /* maybe_optimize_range_tests allows statements without side-effects
3831 in the basic blocks as long as they are consumed in the same bb.
3832 Make sure rhs2's def stmt is not among them, otherwise we can't
3833 use safely get_nonzero_bits on it. E.g. in:
3834 # RANGE [-83, 1] NONZERO 173
3835 # k_32 = PHI <k_47(13), k_12(9)>
3838 goto <bb 5>; [26.46%]
3840 goto <bb 9>; [73.54%]
3842 <bb 5> [local count: 140323371]:
3843 # RANGE [0, 1] NONZERO 1
3845 # RANGE [0, 4] NONZERO 4
3847 # RANGE [0, 4] NONZERO 4
3848 iftmp.0_44 = (char) _21;
3849 if (k_32 < iftmp.0_44)
3850 goto <bb 6>; [84.48%]
3852 goto <bb 9>; [15.52%]
3853 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3854 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3855 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3856 those stmts even for negative k_32 and the value ranges would be no
3857 longer guaranteed and so the optimization would be invalid. */
3858 while (opcode
== ERROR_MARK
)
3860 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3861 basic_block bb2
= gimple_bb (g
);
3864 && dominated_by_p (CDI_DOMINATORS
, bb2
, first_bb
))
3866 /* As an exception, handle a few common cases. */
3867 if (gimple_assign_cast_p (g
)
3868 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g
))))
3870 tree op0
= gimple_assign_rhs1 (g
);
3871 if (TYPE_UNSIGNED (TREE_TYPE (op0
))
3872 && (TYPE_PRECISION (TREE_TYPE (rhs2
))
3873 > TYPE_PRECISION (TREE_TYPE (op0
))))
3874 /* Zero-extension is always ok. */
3876 else if (TYPE_PRECISION (TREE_TYPE (rhs2
))
3877 == TYPE_PRECISION (TREE_TYPE (op0
))
3878 && TREE_CODE (op0
) == SSA_NAME
)
3880 /* Cast from signed to unsigned or vice versa. Retry
3881 with the op0 as new rhs2. */
3886 else if (is_gimple_assign (g
)
3887 && gimple_assign_rhs_code (g
) == BIT_AND_EXPR
3888 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
3889 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g
))))
3890 /* Masking with INTEGER_CST with MSB clear is always ok
3897 if (rhs2
== NULL_TREE
)
3900 wide_int nz
= get_nonzero_bits (rhs2
);
3904 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3905 and RHS2 is known to be RHS2 >= 0. */
3906 tree utype
= unsigned_type_for (TREE_TYPE (rhs1
));
3908 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
3909 if ((ranges
[*idx
].strict_overflow_p
3910 || ranges
[i
].strict_overflow_p
)
3911 && issue_strict_overflow_warning (wc
))
3912 warning_at (gimple_location (stmt
), OPT_Wstrict_overflow
,
3913 "assuming signed overflow does not occur "
3914 "when simplifying range test");
3916 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3918 struct range_entry
*r
= &ranges
[*idx
];
3919 fprintf (dump_file
, "Optimizing range test ");
3920 print_generic_expr (dump_file
, r
->exp
);
3921 fprintf (dump_file
, " +[");
3922 print_generic_expr (dump_file
, r
->low
);
3923 fprintf (dump_file
, ", ");
3924 print_generic_expr (dump_file
, r
->high
);
3925 fprintf (dump_file
, "] and comparison ");
3926 print_generic_expr (dump_file
, rhs1
);
3927 fprintf (dump_file
, " %s ", op_symbol_code (ccode
));
3928 print_generic_expr (dump_file
, rhs2
);
3929 fprintf (dump_file
, "\n into (");
3930 print_generic_expr (dump_file
, utype
);
3931 fprintf (dump_file
, ") ");
3932 print_generic_expr (dump_file
, rhs1
);
3933 fprintf (dump_file
, " %s (", op_symbol_code (ccode
));
3934 print_generic_expr (dump_file
, utype
);
3935 fprintf (dump_file
, ") ");
3936 print_generic_expr (dump_file
, rhs2
);
3937 fprintf (dump_file
, "\n");
3940 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3942 if (opcode
== BIT_IOR_EXPR
3943 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3946 ccode
= invert_tree_comparison (ccode
, false);
3949 unsigned int uid
= gimple_uid (stmt
);
3950 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3951 gimple
*g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs1
);
3952 gimple_set_uid (g
, uid
);
3953 rhs1
= gimple_assign_lhs (g
);
3954 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3955 if (!useless_type_conversion_p (utype
, TREE_TYPE (rhs2
)))
3957 g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs2
);
3958 gimple_set_uid (g
, uid
);
3959 rhs2
= gimple_assign_lhs (g
);
3960 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3962 if (tree_swap_operands_p (rhs1
, rhs2
))
3964 std::swap (rhs1
, rhs2
);
3965 ccode
= swap_tree_comparison (ccode
);
3967 if (gimple_code (stmt
) == GIMPLE_COND
)
3969 gcond
*c
= as_a
<gcond
*> (stmt
);
3970 gimple_cond_set_code (c
, ccode
);
3971 gimple_cond_set_lhs (c
, rhs1
);
3972 gimple_cond_set_rhs (c
, rhs2
);
3977 tree ctype
= oe
->op
? TREE_TYPE (oe
->op
) : boolean_type_node
;
3978 if (!INTEGRAL_TYPE_P (ctype
)
3979 || (TREE_CODE (ctype
) != BOOLEAN_TYPE
3980 && TYPE_PRECISION (ctype
) != 1))
3981 ctype
= boolean_type_node
;
3982 g
= gimple_build_assign (make_ssa_name (ctype
), ccode
, rhs1
, rhs2
);
3983 gimple_set_uid (g
, uid
);
3984 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3985 if (oe
->op
&& ctype
!= TREE_TYPE (oe
->op
))
3987 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (oe
->op
)),
3988 NOP_EXPR
, gimple_assign_lhs (g
));
3989 gimple_set_uid (g
, uid
);
3990 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3992 ranges
[i
].exp
= gimple_assign_lhs (g
);
3993 oe
->op
= ranges
[i
].exp
;
3994 ranges
[i
].low
= build_zero_cst (TREE_TYPE (ranges
[i
].exp
));
3995 ranges
[i
].high
= ranges
[i
].low
;
3997 ranges
[i
].strict_overflow_p
= false;
3998 oe
= (*ops
)[ranges
[*idx
].idx
];
3999 /* Now change all the other range test immediate uses, so that
4000 those tests will be optimized away. */
4001 if (opcode
== ERROR_MARK
)
4004 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
4005 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
4007 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
4008 ? boolean_false_node
: boolean_true_node
);
4011 oe
->op
= error_mark_node
;
4012 ranges
[*idx
].exp
= NULL_TREE
;
4013 ranges
[*idx
].low
= NULL_TREE
;
4014 ranges
[*idx
].high
= NULL_TREE
;
4022 /* Optimize range tests, similarly how fold_range_test optimizes
4023 it on trees. The tree code for the binary
4024 operation between all the operands is OPCODE.
4025 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4026 maybe_optimize_range_tests for inter-bb range optimization.
4027 In that case if oe->op is NULL, oe->id is bb->index whose
4028 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4030 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4033 optimize_range_tests (enum tree_code opcode
,
4034 vec
<operand_entry
*> *ops
, basic_block first_bb
)
4036 unsigned int length
= ops
->length (), i
, j
, first
;
4038 struct range_entry
*ranges
;
4039 bool any_changes
= false;
4044 ranges
= XNEWVEC (struct range_entry
, length
);
4045 for (i
= 0; i
< length
; i
++)
4049 init_range_entry (ranges
+ i
, oe
->op
,
4052 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
4053 /* For | invert it now, we will invert it again before emitting
4054 the optimized expression. */
4055 if (opcode
== BIT_IOR_EXPR
4056 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
4057 ranges
[i
].in_p
= !ranges
[i
].in_p
;
4060 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
4061 for (i
= 0; i
< length
; i
++)
4062 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
4065 /* Try to merge ranges. */
4066 for (first
= i
; i
< length
; i
++)
4068 tree low
= ranges
[i
].low
;
4069 tree high
= ranges
[i
].high
;
4070 int in_p
= ranges
[i
].in_p
;
4071 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
4072 int update_fail_count
= 0;
4074 for (j
= i
+ 1; j
< length
; j
++)
4076 if (ranges
[i
].exp
!= ranges
[j
].exp
)
4078 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
4079 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
4081 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
4087 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
4088 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
4089 low
, high
, strict_overflow_p
))
4094 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4095 while update_range_test would fail. */
4096 else if (update_fail_count
== 64)
4099 ++update_fail_count
;
4102 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
4105 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
4106 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
4108 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
4109 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
4111 any_changes
|= optimize_range_tests_var_bound (opcode
, first
, length
, ops
,
4113 any_changes
|= optimize_range_tests_cmp_bitwise (opcode
, first
, length
,
4116 if (any_changes
&& opcode
!= ERROR_MARK
)
4119 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4121 if (oe
->op
== error_mark_node
)
4130 XDELETEVEC (ranges
);
4134 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4135 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4136 otherwise the comparison code. TYPE is a return value that is set
4137 to type of comparison. */
4140 ovce_extract_ops (tree var
, gassign
**rets
, bool *reti
, tree
*type
,
4141 tree
*lhs
, tree
*rhs
, gassign
**vcond
)
4143 if (TREE_CODE (var
) != SSA_NAME
)
4146 gassign
*stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (var
));
4152 /* ??? If we start creating more COND_EXPR, we could perform
4153 this same optimization with them. For now, simplify. */
4154 if (gimple_assign_rhs_code (stmt
) != VEC_COND_EXPR
)
4157 tree cond
= gimple_assign_rhs1 (stmt
);
4158 tree_code cmp
= TREE_CODE (cond
);
4159 if (cmp
!= SSA_NAME
)
4162 gassign
*assign
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (cond
));
4164 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign
)) != tcc_comparison
)
4167 cmp
= gimple_assign_rhs_code (assign
);
4169 *lhs
= gimple_assign_rhs1 (assign
);
4171 *rhs
= gimple_assign_rhs2 (assign
);
4173 /* ??? For now, allow only canonical true and false result vectors.
4174 We could expand this to other constants should the need arise,
4175 but at the moment we don't create them. */
4176 tree t
= gimple_assign_rhs2 (stmt
);
4177 tree f
= gimple_assign_rhs3 (stmt
);
4179 if (integer_all_onesp (t
))
4181 else if (integer_all_onesp (f
))
4183 cmp
= invert_tree_comparison (cmp
, false);
4188 if (!integer_zerop (f
))
4197 *type
= TREE_TYPE (cond
);
4201 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4202 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4205 optimize_vec_cond_expr (tree_code opcode
, vec
<operand_entry
*> *ops
)
4207 unsigned int length
= ops
->length (), i
, j
;
4208 bool any_changes
= false;
4213 for (i
= 0; i
< length
; ++i
)
4215 tree elt0
= (*ops
)[i
]->op
;
4217 gassign
*stmt0
, *vcond0
;
4219 tree type
, lhs0
, rhs0
;
4220 tree_code cmp0
= ovce_extract_ops (elt0
, &stmt0
, &invert
, &type
, &lhs0
,
4222 if (cmp0
== ERROR_MARK
)
4225 for (j
= i
+ 1; j
< length
; ++j
)
4227 tree
&elt1
= (*ops
)[j
]->op
;
4229 gassign
*stmt1
, *vcond1
;
4231 tree_code cmp1
= ovce_extract_ops (elt1
, &stmt1
, NULL
, NULL
, &lhs1
,
4233 if (cmp1
== ERROR_MARK
)
4237 if (opcode
== BIT_AND_EXPR
)
4238 comb
= maybe_fold_and_comparisons (type
, cmp0
, lhs0
, rhs0
,
4240 else if (opcode
== BIT_IOR_EXPR
)
4241 comb
= maybe_fold_or_comparisons (type
, cmp0
, lhs0
, rhs0
,
4249 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4251 fprintf (dump_file
, "Transforming ");
4252 print_generic_expr (dump_file
, gimple_assign_lhs (stmt0
));
4253 fprintf (dump_file
, " %c ", opcode
== BIT_AND_EXPR
? '&' : '|');
4254 print_generic_expr (dump_file
, gimple_assign_lhs (stmt1
));
4255 fprintf (dump_file
, " into ");
4256 print_generic_expr (dump_file
, comb
);
4257 fputc ('\n', dump_file
);
4260 gimple_stmt_iterator gsi
= gsi_for_stmt (vcond0
);
4261 tree exp
= force_gimple_operand_gsi (&gsi
, comb
, true, NULL_TREE
,
4262 true, GSI_SAME_STMT
);
4264 swap_ssa_operands (vcond0
, gimple_assign_rhs2_ptr (vcond0
),
4265 gimple_assign_rhs3_ptr (vcond0
));
4266 gimple_assign_set_rhs1 (vcond0
, exp
);
4267 update_stmt (vcond0
);
4269 elt1
= error_mark_node
;
4278 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4280 if (oe
->op
== error_mark_node
)
4292 /* Return true if STMT is a cast like:
4298 # _345 = PHI <_123(N), 1(...), 1(...)>
4299 where _234 has bool type, _123 has single use and
4300 bb N has a single successor M. This is commonly used in
4301 the last block of a range test.
4303 Also Return true if STMT is tcc_compare like:
4309 # _345 = PHI <_234(N), 1(...), 1(...)>
4311 where _234 has booltype, single use and
4312 bb N has a single successor M. This is commonly used in
4313 the last block of a range test. */
4316 final_range_test_p (gimple
*stmt
)
4318 basic_block bb
, rhs_bb
, lhs_bb
;
4321 use_operand_p use_p
;
4324 if (!gimple_assign_cast_p (stmt
)
4325 && (!is_gimple_assign (stmt
)
4326 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4327 != tcc_comparison
)))
4329 bb
= gimple_bb (stmt
);
4330 if (!single_succ_p (bb
))
4332 e
= single_succ_edge (bb
);
4333 if (e
->flags
& EDGE_COMPLEX
)
4336 lhs
= gimple_assign_lhs (stmt
);
4337 rhs
= gimple_assign_rhs1 (stmt
);
4338 if (gimple_assign_cast_p (stmt
)
4339 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4340 || TREE_CODE (rhs
) != SSA_NAME
4341 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
))
4344 if (!gimple_assign_cast_p (stmt
)
4345 && (TREE_CODE (TREE_TYPE (lhs
)) != BOOLEAN_TYPE
))
4348 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4349 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
4352 if (gimple_code (use_stmt
) != GIMPLE_PHI
4353 || gimple_bb (use_stmt
) != e
->dest
)
4356 /* And that the rhs is defined in the same loop. */
4357 if (gimple_assign_cast_p (stmt
))
4359 if (TREE_CODE (rhs
) != SSA_NAME
4360 || !(rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
)))
4361 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
4366 if (TREE_CODE (lhs
) != SSA_NAME
4367 || !(lhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
)))
4368 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), lhs_bb
))
4375 /* Return true if BB is suitable basic block for inter-bb range test
4376 optimization. If BACKWARD is true, BB should be the only predecessor
4377 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4378 or compared with to find a common basic block to which all conditions
4379 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4380 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4381 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4382 block points to an empty block that falls through into *OTHER_BB and
4383 the phi args match that path. */
4386 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
4387 bool *test_swapped_p
, bool backward
)
4389 edge_iterator ei
, ei2
;
4393 bool other_edge_seen
= false;
4398 /* Check last stmt first. */
4399 stmt
= last_stmt (bb
);
4401 || (gimple_code (stmt
) != GIMPLE_COND
4402 && (backward
|| !final_range_test_p (stmt
)))
4403 || gimple_visited_p (stmt
)
4404 || stmt_could_throw_p (cfun
, stmt
)
4407 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
4410 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4411 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4412 to *OTHER_BB (if not set yet, try to find it out). */
4413 if (EDGE_COUNT (bb
->succs
) != 2)
4415 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
4417 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4419 if (e
->dest
== test_bb
)
4428 if (*other_bb
== NULL
)
4430 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
4431 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4433 else if (e
->dest
== e2
->dest
)
4434 *other_bb
= e
->dest
;
4435 if (*other_bb
== NULL
)
4438 if (e
->dest
== *other_bb
)
4439 other_edge_seen
= true;
4443 if (*other_bb
== NULL
|| !other_edge_seen
)
4446 else if (single_succ (bb
) != *other_bb
)
4449 /* Now check all PHIs of *OTHER_BB. */
4450 e
= find_edge (bb
, *other_bb
);
4451 e2
= find_edge (test_bb
, *other_bb
);
4453 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4455 gphi
*phi
= gsi
.phi ();
4456 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4457 corresponding to BB and TEST_BB predecessor must be the same. */
4458 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
4459 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
4461 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4462 one of the PHIs should have the lhs of the last stmt in
4463 that block as PHI arg and that PHI should have 0 or 1
4464 corresponding to it in all other range test basic blocks
4468 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
4469 == gimple_assign_lhs (stmt
)
4470 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
4471 || integer_onep (gimple_phi_arg_def (phi
,
4477 gimple
*test_last
= last_stmt (test_bb
);
4478 if (gimple_code (test_last
) == GIMPLE_COND
)
4480 if (backward
? e2
->src
!= test_bb
: e
->src
!= bb
)
4483 /* For last_bb, handle also:
4485 goto <bb 6>; [34.00%]
4487 goto <bb 7>; [66.00%]
4489 <bb 6> [local count: 79512730]:
4491 <bb 7> [local count: 1073741824]:
4492 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4493 where bb 7 is *OTHER_BB, but the PHI values from the
4494 earlier bbs match the path through the empty bb
4498 e3
= EDGE_SUCC (test_bb
,
4499 e2
== EDGE_SUCC (test_bb
, 0) ? 1 : 0);
4502 e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4503 if (empty_block_p (e3
->dest
)
4504 && single_succ_p (e3
->dest
)
4505 && single_succ (e3
->dest
) == *other_bb
4506 && single_pred_p (e3
->dest
)
4507 && single_succ_edge (e3
->dest
)->flags
== EDGE_FALLTHRU
)
4510 e2
= single_succ_edge (e3
->dest
);
4512 e
= single_succ_edge (e3
->dest
);
4514 *test_swapped_p
= true;
4518 else if (gimple_phi_arg_def (phi
, e2
->dest_idx
)
4519 == gimple_assign_lhs (test_last
)
4520 && (integer_zerop (gimple_phi_arg_def (phi
,
4522 || integer_onep (gimple_phi_arg_def (phi
,
4533 /* Return true if BB doesn't have side-effects that would disallow
4534 range test optimization, all SSA_NAMEs set in the bb are consumed
4535 in the bb and there are no PHIs. */
4538 no_side_effect_bb (basic_block bb
)
4540 gimple_stmt_iterator gsi
;
4543 if (!gimple_seq_empty_p (phi_nodes (bb
)))
4545 last
= last_stmt (bb
);
4546 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4548 gimple
*stmt
= gsi_stmt (gsi
);
4550 imm_use_iterator imm_iter
;
4551 use_operand_p use_p
;
4553 if (is_gimple_debug (stmt
))
4555 if (gimple_has_side_effects (stmt
))
4559 if (!is_gimple_assign (stmt
))
4561 lhs
= gimple_assign_lhs (stmt
);
4562 if (TREE_CODE (lhs
) != SSA_NAME
)
4564 if (gimple_assign_rhs_could_trap_p (stmt
))
4566 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
4568 gimple
*use_stmt
= USE_STMT (use_p
);
4569 if (is_gimple_debug (use_stmt
))
4571 if (gimple_bb (use_stmt
) != bb
)
4578 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4579 return true and fill in *OPS recursively. */
4582 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
4585 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4589 if (!is_reassociable_op (stmt
, code
, loop
))
4592 rhs
[0] = gimple_assign_rhs1 (stmt
);
4593 rhs
[1] = gimple_assign_rhs2 (stmt
);
4594 gimple_set_visited (stmt
, true);
4595 for (i
= 0; i
< 2; i
++)
4596 if (TREE_CODE (rhs
[i
]) == SSA_NAME
4597 && !get_ops (rhs
[i
], code
, ops
, loop
)
4598 && has_single_use (rhs
[i
]))
4600 operand_entry
*oe
= operand_entry_pool
.allocate ();
4606 oe
->stmt_to_insert
= NULL
;
4607 ops
->safe_push (oe
);
4612 /* Find the ops that were added by get_ops starting from VAR, see if
4613 they were changed during update_range_test and if yes, create new
4617 update_ops (tree var
, enum tree_code code
, const vec
<operand_entry
*> &ops
,
4618 unsigned int *pidx
, class loop
*loop
)
4620 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4624 if (!is_reassociable_op (stmt
, code
, loop
))
4627 rhs
[0] = gimple_assign_rhs1 (stmt
);
4628 rhs
[1] = gimple_assign_rhs2 (stmt
);
4631 for (i
= 0; i
< 2; i
++)
4632 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
4634 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
4635 if (rhs
[2 + i
] == NULL_TREE
)
4637 if (has_single_use (rhs
[i
]))
4638 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
4640 rhs
[2 + i
] = rhs
[i
];
4643 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
4644 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
4646 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4647 var
= make_ssa_name (TREE_TYPE (var
));
4648 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
4650 gimple_set_uid (g
, gimple_uid (stmt
));
4651 gimple_set_visited (g
, true);
4652 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4657 /* Structure to track the initial value passed to get_ops and
4658 the range in the ops vector for each basic block. */
4660 struct inter_bb_range_test_entry
4663 unsigned int first_idx
, last_idx
;
4666 /* Inter-bb range test optimization.
4668 Returns TRUE if a gimple conditional is optimized to a true/false,
4669 otherwise return FALSE.
4671 This indicates to the caller that it should run a CFG cleanup pass
4672 once reassociation is completed. */
4675 maybe_optimize_range_tests (gimple
*stmt
)
4677 basic_block first_bb
= gimple_bb (stmt
);
4678 basic_block last_bb
= first_bb
;
4679 basic_block other_bb
= NULL
;
4683 auto_vec
<operand_entry
*> ops
;
4684 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
4685 bool any_changes
= false;
4686 bool cfg_cleanup_needed
= false;
4688 /* Consider only basic blocks that end with GIMPLE_COND or
4689 a cast statement satisfying final_range_test_p. All
4690 but the last bb in the first_bb .. last_bb range
4691 should end with GIMPLE_COND. */
4692 if (gimple_code (stmt
) == GIMPLE_COND
)
4694 if (EDGE_COUNT (first_bb
->succs
) != 2)
4695 return cfg_cleanup_needed
;
4697 else if (final_range_test_p (stmt
))
4698 other_bb
= single_succ (first_bb
);
4700 return cfg_cleanup_needed
;
4702 if (stmt_could_throw_p (cfun
, stmt
))
4703 return cfg_cleanup_needed
;
4705 /* As relative ordering of post-dominator sons isn't fixed,
4706 maybe_optimize_range_tests can be called first on any
4707 bb in the range we want to optimize. So, start searching
4708 backwards, if first_bb can be set to a predecessor. */
4709 while (single_pred_p (first_bb
))
4711 basic_block pred_bb
= single_pred (first_bb
);
4712 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, NULL
, true))
4714 if (!no_side_effect_bb (first_bb
))
4718 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4719 Before starting forward search in last_bb successors, find
4720 out the other_bb. */
4721 if (first_bb
== last_bb
)
4724 /* As non-GIMPLE_COND last stmt always terminates the range,
4725 if forward search didn't discover anything, just give up. */
4726 if (gimple_code (stmt
) != GIMPLE_COND
)
4727 return cfg_cleanup_needed
;
4728 /* Look at both successors. Either it ends with a GIMPLE_COND
4729 and satisfies suitable_cond_bb, or ends with a cast and
4730 other_bb is that cast's successor. */
4731 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
4732 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
4733 || e
->dest
== first_bb
)
4734 return cfg_cleanup_needed
;
4735 else if (single_pred_p (e
->dest
))
4737 stmt
= last_stmt (e
->dest
);
4739 && gimple_code (stmt
) == GIMPLE_COND
4740 && EDGE_COUNT (e
->dest
->succs
) == 2)
4742 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
,
4749 && final_range_test_p (stmt
)
4750 && find_edge (first_bb
, single_succ (e
->dest
)))
4752 other_bb
= single_succ (e
->dest
);
4753 if (other_bb
== first_bb
)
4757 if (other_bb
== NULL
)
4758 return cfg_cleanup_needed
;
4760 /* Now do the forward search, moving last_bb to successor bbs
4761 that aren't other_bb. */
4762 while (EDGE_COUNT (last_bb
->succs
) == 2)
4764 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
4765 if (e
->dest
!= other_bb
)
4769 if (!single_pred_p (e
->dest
))
4771 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, NULL
, false))
4773 if (!no_side_effect_bb (e
->dest
))
4777 if (first_bb
== last_bb
)
4778 return cfg_cleanup_needed
;
4779 /* Here basic blocks first_bb through last_bb's predecessor
4780 end with GIMPLE_COND, all of them have one of the edges to
4781 other_bb and another to another block in the range,
4782 all blocks except first_bb don't have side-effects and
4783 last_bb ends with either GIMPLE_COND, or cast satisfying
4784 final_range_test_p. */
4785 for (bb
= last_bb
; ; bb
= single_pred (bb
))
4787 enum tree_code code
;
4789 inter_bb_range_test_entry bb_ent
;
4791 bb_ent
.op
= NULL_TREE
;
4792 bb_ent
.first_idx
= ops
.length ();
4793 bb_ent
.last_idx
= bb_ent
.first_idx
;
4794 e
= find_edge (bb
, other_bb
);
4795 stmt
= last_stmt (bb
);
4796 gimple_set_visited (stmt
, true);
4797 if (gimple_code (stmt
) != GIMPLE_COND
)
4799 use_operand_p use_p
;
4804 lhs
= gimple_assign_lhs (stmt
);
4805 rhs
= gimple_assign_rhs1 (stmt
);
4806 gcc_assert (bb
== last_bb
);
4815 # _345 = PHI <_123(N), 1(...), 1(...)>
4817 or 0 instead of 1. If it is 0, the _234
4818 range test is anded together with all the
4819 other range tests, if it is 1, it is ored with
4821 single_imm_use (lhs
, &use_p
, &phi
);
4822 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
4823 e2
= find_edge (first_bb
, other_bb
);
4825 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
4826 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
4827 code
= BIT_AND_EXPR
;
4830 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
4831 code
= BIT_IOR_EXPR
;
4834 /* If _234 SSA_NAME_DEF_STMT is
4836 (or &, corresponding to 1/0 in the phi arguments,
4837 push into ops the individual range test arguments
4838 of the bitwise or resp. and, recursively. */
4839 if (TREE_CODE (rhs
) == SSA_NAME
4840 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4842 && !get_ops (rhs
, code
, &ops
,
4843 loop_containing_stmt (stmt
))
4844 && has_single_use (rhs
))
4846 /* Otherwise, push the _234 range test itself. */
4847 operand_entry
*oe
= operand_entry_pool
.allocate ();
4853 oe
->stmt_to_insert
= NULL
;
4858 else if (is_gimple_assign (stmt
)
4859 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4861 && !get_ops (lhs
, code
, &ops
,
4862 loop_containing_stmt (stmt
))
4863 && has_single_use (lhs
))
4865 operand_entry
*oe
= operand_entry_pool
.allocate ();
4876 bb_ent
.last_idx
= ops
.length ();
4879 bbinfo
.safe_push (bb_ent
);
4880 for (unsigned int i
= bb_ent
.first_idx
; i
< bb_ent
.last_idx
; ++i
)
4881 ops
[i
]->id
= bb
->index
;
4884 else if (bb
== last_bb
)
4886 /* For last_bb, handle also:
4888 goto <bb 6>; [34.00%]
4890 goto <bb 7>; [66.00%]
4892 <bb 6> [local count: 79512730]:
4894 <bb 7> [local count: 1073741824]:
4895 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4896 where bb 7 is OTHER_BB, but the PHI values from the
4897 earlier bbs match the path through the empty bb
4899 bool test_swapped_p
= false;
4900 bool ok
= suitable_cond_bb (single_pred (last_bb
), last_bb
,
4901 &other_bb
, &test_swapped_p
, true);
4904 e
= EDGE_SUCC (bb
, e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4906 /* Otherwise stmt is GIMPLE_COND. */
4907 code
= gimple_cond_code (stmt
);
4908 lhs
= gimple_cond_lhs (stmt
);
4909 rhs
= gimple_cond_rhs (stmt
);
4910 if (TREE_CODE (lhs
) == SSA_NAME
4911 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4912 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4913 || rhs
!= boolean_false_node
4914 /* Either push into ops the individual bitwise
4915 or resp. and operands, depending on which
4916 edge is other_bb. */
4917 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
4918 ^ (code
== EQ_EXPR
))
4919 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
4920 loop_containing_stmt (stmt
))))
4922 /* Or push the GIMPLE_COND stmt itself. */
4923 operand_entry
*oe
= operand_entry_pool
.allocate ();
4926 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
4927 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
4928 /* oe->op = NULL signs that there is no SSA_NAME
4929 for the range test, and oe->id instead is the
4930 basic block number, at which's end the GIMPLE_COND
4934 oe
->stmt_to_insert
= NULL
;
4939 else if (ops
.length () > bb_ent
.first_idx
)
4942 bb_ent
.last_idx
= ops
.length ();
4944 bbinfo
.safe_push (bb_ent
);
4945 for (unsigned int i
= bb_ent
.first_idx
; i
< bb_ent
.last_idx
; ++i
)
4946 ops
[i
]->id
= bb
->index
;
4950 if (ops
.length () > 1)
4951 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
, first_bb
);
4954 unsigned int idx
, max_idx
= 0;
4955 /* update_ops relies on has_single_use predicates returning the
4956 same values as it did during get_ops earlier. Additionally it
4957 never removes statements, only adds new ones and it should walk
4958 from the single imm use and check the predicate already before
4959 making those changes.
4960 On the other side, the handling of GIMPLE_COND directly can turn
4961 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4962 it needs to be done in a separate loop afterwards. */
4963 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4965 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4966 && bbinfo
[idx
].op
!= NULL_TREE
)
4971 stmt
= last_stmt (bb
);
4972 new_op
= update_ops (bbinfo
[idx
].op
,
4974 ops
[bbinfo
[idx
].first_idx
]->rank
,
4975 ops
, &bbinfo
[idx
].first_idx
,
4976 loop_containing_stmt (stmt
));
4977 if (new_op
== NULL_TREE
)
4979 gcc_assert (bb
== last_bb
);
4980 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
4982 if (bbinfo
[idx
].op
!= new_op
)
4984 imm_use_iterator iter
;
4985 use_operand_p use_p
;
4986 gimple
*use_stmt
, *cast_or_tcc_cmp_stmt
= NULL
;
4988 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
4989 if (is_gimple_debug (use_stmt
))
4991 else if (gimple_code (use_stmt
) == GIMPLE_COND
4992 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4993 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4994 SET_USE (use_p
, new_op
);
4995 else if ((is_gimple_assign (use_stmt
)
4997 (gimple_assign_rhs_code (use_stmt
))
4998 == tcc_comparison
)))
4999 cast_or_tcc_cmp_stmt
= use_stmt
;
5000 else if (gimple_assign_cast_p (use_stmt
))
5001 cast_or_tcc_cmp_stmt
= use_stmt
;
5005 if (cast_or_tcc_cmp_stmt
)
5007 gcc_assert (bb
== last_bb
);
5008 tree lhs
= gimple_assign_lhs (cast_or_tcc_cmp_stmt
);
5009 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
5010 enum tree_code rhs_code
5011 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt
)
5012 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt
)
5015 if (is_gimple_min_invariant (new_op
))
5017 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
5018 g
= gimple_build_assign (new_lhs
, new_op
);
5021 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
5022 gimple_stmt_iterator gsi
5023 = gsi_for_stmt (cast_or_tcc_cmp_stmt
);
5024 gimple_set_uid (g
, gimple_uid (cast_or_tcc_cmp_stmt
));
5025 gimple_set_visited (g
, true);
5026 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5027 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
5028 if (is_gimple_debug (use_stmt
))
5030 else if (gimple_code (use_stmt
) == GIMPLE_COND
5031 || gimple_code (use_stmt
) == GIMPLE_PHI
)
5032 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
5033 SET_USE (use_p
, new_lhs
);
5042 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
5044 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
5045 && bbinfo
[idx
].op
== NULL_TREE
5046 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
5048 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
5053 /* If we collapse the conditional to a true/false
5054 condition, then bubble that knowledge up to our caller. */
5055 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
5057 gimple_cond_make_false (cond_stmt
);
5058 cfg_cleanup_needed
= true;
5060 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
5062 gimple_cond_make_true (cond_stmt
);
5063 cfg_cleanup_needed
= true;
5067 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
5068 gimple_cond_set_lhs (cond_stmt
,
5069 ops
[bbinfo
[idx
].first_idx
]->op
);
5070 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
5072 update_stmt (cond_stmt
);
5078 /* The above changes could result in basic blocks after the first
5079 modified one, up to and including last_bb, to be executed even if
5080 they would not be in the original program. If the value ranges of
5081 assignment lhs' in those bbs were dependent on the conditions
5082 guarding those basic blocks which now can change, the VRs might
5083 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5084 are only used within the same bb, it should be not a big deal if
5085 we just reset all the VRs in those bbs. See PR68671. */
5086 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
5087 reset_flow_sensitive_info_in_bb (bb
);
5089 return cfg_cleanup_needed
;
5092 /* Return true if OPERAND is defined by a PHI node which uses the LHS
5093 of STMT in it's operands. This is also known as a "destructive
5094 update" operation. */
5097 is_phi_for_stmt (gimple
*stmt
, tree operand
)
5102 use_operand_p arg_p
;
5105 if (TREE_CODE (operand
) != SSA_NAME
)
5108 lhs
= gimple_assign_lhs (stmt
);
5110 def_stmt
= SSA_NAME_DEF_STMT (operand
);
5111 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
5115 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
5116 if (lhs
== USE_FROM_PTR (arg_p
))
5121 /* Remove def stmt of VAR if VAR has zero uses and recurse
5122 on rhs1 operand if so. */
5125 remove_visited_stmt_chain (tree var
)
5128 gimple_stmt_iterator gsi
;
5132 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
5134 stmt
= SSA_NAME_DEF_STMT (var
);
5135 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
5137 var
= gimple_assign_rhs1 (stmt
);
5138 gsi
= gsi_for_stmt (stmt
);
5139 reassoc_remove_stmt (&gsi
);
5140 release_defs (stmt
);
5147 /* This function checks three consequtive operands in
5148 passed operands vector OPS starting from OPINDEX and
5149 swaps two operands if it is profitable for binary operation
5150 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5152 We pair ops with the same rank if possible.
5154 The alternative we try is to see if STMT is a destructive
5155 update style statement, which is like:
5158 In that case, we want to use the destructive update form to
5159 expose the possible vectorizer sum reduction opportunity.
5160 In that case, the third operand will be the phi node. This
5161 check is not performed if STMT is null.
5163 We could, of course, try to be better as noted above, and do a
5164 lot of work to try to find these opportunities in >3 operand
5165 cases, but it is unlikely to be worth it. */
5168 swap_ops_for_binary_stmt (const vec
<operand_entry
*> &ops
,
5169 unsigned int opindex
, gimple
*stmt
)
5171 operand_entry
*oe1
, *oe2
, *oe3
;
5174 oe2
= ops
[opindex
+ 1];
5175 oe3
= ops
[opindex
+ 2];
5177 if ((oe1
->rank
== oe2
->rank
5178 && oe2
->rank
!= oe3
->rank
)
5179 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
5180 && !is_phi_for_stmt (stmt
, oe1
->op
)
5181 && !is_phi_for_stmt (stmt
, oe2
->op
)))
5182 std::swap (*oe1
, *oe3
);
5183 else if ((oe1
->rank
== oe3
->rank
5184 && oe2
->rank
!= oe3
->rank
)
5185 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
5186 && !is_phi_for_stmt (stmt
, oe1
->op
)
5187 && !is_phi_for_stmt (stmt
, oe3
->op
)))
5188 std::swap (*oe1
, *oe2
);
5191 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5192 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5193 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5194 after the returned stmt. */
5196 static inline gimple
*
5197 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
, bool &insert_before
)
5199 insert_before
= true;
5200 if (TREE_CODE (rhs1
) == SSA_NAME
5201 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
5203 stmt
= SSA_NAME_DEF_STMT (rhs1
);
5204 insert_before
= false;
5206 if (TREE_CODE (rhs2
) == SSA_NAME
5207 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
5209 stmt
= SSA_NAME_DEF_STMT (rhs2
);
5210 insert_before
= false;
5215 /* If the stmt that defines operand has to be inserted, insert it
5218 insert_stmt_before_use (gimple
*stmt
, gimple
*stmt_to_insert
)
5220 gcc_assert (is_gimple_assign (stmt_to_insert
));
5221 tree rhs1
= gimple_assign_rhs1 (stmt_to_insert
);
5222 tree rhs2
= gimple_assign_rhs2 (stmt_to_insert
);
5224 gimple
*insert_point
= find_insert_point (stmt
, rhs1
, rhs2
, insert_before
);
5225 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_point
);
5226 gimple_set_uid (stmt_to_insert
, gimple_uid (insert_point
));
5228 /* If the insert point is not stmt, then insert_point would be
5229 the point where operand rhs1 or rhs2 is defined. In this case,
5230 stmt_to_insert has to be inserted afterwards. This would
5231 only happen when the stmt insertion point is flexible. */
5233 gsi_insert_before (&gsi
, stmt_to_insert
, GSI_NEW_STMT
);
5235 insert_stmt_after (stmt_to_insert
, insert_point
);
5239 /* Recursively rewrite our linearized statements so that the operators
5240 match those in OPS[OPINDEX], putting the computation in rank
5241 order. Return new lhs.
5242 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5243 the current stmt and during recursive invocations.
5244 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5245 recursive invocations. */
5248 rewrite_expr_tree (gimple
*stmt
, enum tree_code rhs_code
, unsigned int opindex
,
5249 const vec
<operand_entry
*> &ops
, bool changed
,
5252 tree rhs1
= gimple_assign_rhs1 (stmt
);
5253 tree rhs2
= gimple_assign_rhs2 (stmt
);
5254 tree lhs
= gimple_assign_lhs (stmt
);
5257 /* The final recursion case for this function is that you have
5258 exactly two operations left.
5259 If we had exactly one op in the entire list to start with, we
5260 would have never called this function, and the tail recursion
5261 rewrites them one at a time. */
5262 if (opindex
+ 2 == ops
.length ())
5264 operand_entry
*oe1
, *oe2
;
5267 oe2
= ops
[opindex
+ 1];
5269 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
5271 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5272 unsigned int uid
= gimple_uid (stmt
);
5274 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5276 fprintf (dump_file
, "Transforming ");
5277 print_gimple_stmt (dump_file
, stmt
, 0);
5280 /* If the stmt that defines operand has to be inserted, insert it
5282 if (oe1
->stmt_to_insert
)
5283 insert_stmt_before_use (stmt
, oe1
->stmt_to_insert
);
5284 if (oe2
->stmt_to_insert
)
5285 insert_stmt_before_use (stmt
, oe2
->stmt_to_insert
);
5286 /* Even when changed is false, reassociation could have e.g. removed
5287 some redundant operations, so unless we are just swapping the
5288 arguments or unless there is no change at all (then we just
5289 return lhs), force creation of a new SSA_NAME. */
5290 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
5293 gimple
*insert_point
5294 = find_insert_point (stmt
, oe1
->op
, oe2
->op
, insert_before
);
5295 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5297 = gimple_build_assign (lhs
, rhs_code
,
5299 gimple_set_uid (stmt
, uid
);
5300 gimple_set_visited (stmt
, true);
5302 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5304 insert_stmt_after (stmt
, insert_point
);
5309 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
,
5312 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
5313 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
5317 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
5318 remove_visited_stmt_chain (rhs1
);
5320 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5322 fprintf (dump_file
, " into ");
5323 print_gimple_stmt (dump_file
, stmt
, 0);
5329 /* If we hit here, we should have 3 or more ops left. */
5330 gcc_assert (opindex
+ 2 < ops
.length ());
5332 /* Rewrite the next operator. */
5335 /* If the stmt that defines operand has to be inserted, insert it
5337 if (oe
->stmt_to_insert
)
5338 insert_stmt_before_use (stmt
, oe
->stmt_to_insert
);
5340 /* Recurse on the LHS of the binary operator, which is guaranteed to
5341 be the non-leaf side. */
5343 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), rhs_code
, opindex
+ 1, ops
,
5344 changed
|| oe
->op
!= rhs2
|| next_changed
,
5347 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
5349 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5351 fprintf (dump_file
, "Transforming ");
5352 print_gimple_stmt (dump_file
, stmt
, 0);
5355 /* If changed is false, this is either opindex == 0
5356 or all outer rhs2's were equal to corresponding oe->op,
5357 and powi_result is NULL.
5358 That means lhs is equivalent before and after reassociation.
5359 Otherwise ensure the old lhs SSA_NAME is not reused and
5360 create a new stmt as well, so that any debug stmts will be
5361 properly adjusted. */
5364 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5365 unsigned int uid
= gimple_uid (stmt
);
5367 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
,
5370 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5371 stmt
= gimple_build_assign (lhs
, rhs_code
,
5373 gimple_set_uid (stmt
, uid
);
5374 gimple_set_visited (stmt
, true);
5376 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5378 insert_stmt_after (stmt
, insert_point
);
5383 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
,
5386 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
5387 gimple_assign_set_rhs2 (stmt
, oe
->op
);
5391 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5393 fprintf (dump_file
, " into ");
5394 print_gimple_stmt (dump_file
, stmt
, 0);
5400 /* Find out how many cycles we need to compute statements chain.
5401 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5402 maximum number of independent statements we may execute per cycle. */
5405 get_required_cycles (int ops_num
, int cpu_width
)
5411 /* While we have more than 2 * cpu_width operands
5412 we may reduce number of operands by cpu_width
5414 res
= ops_num
/ (2 * cpu_width
);
5416 /* Remained operands count may be reduced twice per cycle
5417 until we have only one operand. */
5418 rest
= (unsigned)(ops_num
- res
* cpu_width
);
5419 elog
= exact_log2 (rest
);
5423 res
+= floor_log2 (rest
) + 1;
5428 /* Returns an optimal number of registers to use for computation of
5429 given statements. */
5432 get_reassociation_width (int ops_num
, enum tree_code opc
,
5435 int param_width
= param_tree_reassoc_width
;
5440 if (param_width
> 0)
5441 width
= param_width
;
5443 width
= targetm
.sched
.reassociation_width (opc
, mode
);
5448 /* Get the minimal time required for sequence computation. */
5449 cycles_best
= get_required_cycles (ops_num
, width
);
5451 /* Check if we may use less width and still compute sequence for
5452 the same time. It will allow us to reduce registers usage.
5453 get_required_cycles is monotonically increasing with lower width
5454 so we can perform a binary search for the minimal width that still
5455 results in the optimal cycle count. */
5457 while (width
> width_min
)
5459 int width_mid
= (width
+ width_min
) / 2;
5461 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
5463 else if (width_min
< width_mid
)
5464 width_min
= width_mid
;
5472 /* Recursively rewrite our linearized statements so that the operators
5473 match those in OPS[OPINDEX], putting the computation in rank
5474 order and trying to allow operations to be executed in
5478 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
5479 const vec
<operand_entry
*> &ops
)
5481 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
5482 int op_num
= ops
.length ();
5483 gcc_assert (op_num
> 0);
5484 int stmt_num
= op_num
- 1;
5485 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
5486 int op_index
= op_num
- 1;
5488 int ready_stmts_end
= 0;
5490 gimple
*stmt1
= NULL
, *stmt2
= NULL
;
5491 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
5493 /* We start expression rewriting from the top statements.
5494 So, in this loop we create a full list of statements
5495 we will work with. */
5496 stmts
[stmt_num
- 1] = stmt
;
5497 for (i
= stmt_num
- 2; i
>= 0; i
--)
5498 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
5500 for (i
= 0; i
< stmt_num
; i
++)
5504 /* Determine whether we should use results of
5505 already handled statements or not. */
5506 if (ready_stmts_end
== 0
5507 && (i
- stmt_index
>= width
|| op_index
< 1))
5508 ready_stmts_end
= i
;
5510 /* Now we choose operands for the next statement. Non zero
5511 value in ready_stmts_end means here that we should use
5512 the result of already generated statements as new operand. */
5513 if (ready_stmts_end
> 0)
5515 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
5516 if (ready_stmts_end
> stmt_index
)
5517 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
5518 else if (op_index
>= 0)
5520 operand_entry
*oe
= ops
[op_index
--];
5521 stmt2
= oe
->stmt_to_insert
;
5526 gcc_assert (stmt_index
< i
);
5527 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
5530 if (stmt_index
>= ready_stmts_end
)
5531 ready_stmts_end
= 0;
5536 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
5537 operand_entry
*oe2
= ops
[op_index
--];
5538 operand_entry
*oe1
= ops
[op_index
--];
5540 stmt2
= oe2
->stmt_to_insert
;
5542 stmt1
= oe1
->stmt_to_insert
;
5545 /* If we emit the last statement then we should put
5546 operands into the last statement. It will also
5548 if (op_index
< 0 && stmt_index
== i
)
5551 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5553 fprintf (dump_file
, "Transforming ");
5554 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5557 /* If the stmt that defines operand has to be inserted, insert it
5560 insert_stmt_before_use (stmts
[i
], stmt1
);
5562 insert_stmt_before_use (stmts
[i
], stmt2
);
5563 stmt1
= stmt2
= NULL
;
5565 /* We keep original statement only for the last one. All
5566 others are recreated. */
5567 if (i
== stmt_num
- 1)
5569 gimple_assign_set_rhs1 (stmts
[i
], op1
);
5570 gimple_assign_set_rhs2 (stmts
[i
], op2
);
5571 update_stmt (stmts
[i
]);
5575 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
5576 gimple_set_visited (stmts
[i
], true);
5578 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5580 fprintf (dump_file
, " into ");
5581 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5585 remove_visited_stmt_chain (last_rhs1
);
5588 /* Transform STMT, which is really (A +B) + (C + D) into the left
5589 linear form, ((A+B)+C)+D.
5590 Recurse on D if necessary. */
5593 linearize_expr (gimple
*stmt
)
5595 gimple_stmt_iterator gsi
;
5596 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
5597 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5598 gimple
*oldbinrhs
= binrhs
;
5599 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5600 gimple
*newbinrhs
= NULL
;
5601 class loop
*loop
= loop_containing_stmt (stmt
);
5602 tree lhs
= gimple_assign_lhs (stmt
);
5604 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
5605 && is_reassociable_op (binrhs
, rhscode
, loop
));
5607 gsi
= gsi_for_stmt (stmt
);
5609 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
5610 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
5611 gimple_assign_rhs_code (binrhs
),
5612 gimple_assign_lhs (binlhs
),
5613 gimple_assign_rhs2 (binrhs
));
5614 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
5615 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
5616 gimple_set_uid (binrhs
, gimple_uid (stmt
));
5618 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
5619 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5621 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5623 fprintf (dump_file
, "Linearized: ");
5624 print_gimple_stmt (dump_file
, stmt
, 0);
5627 reassociate_stats
.linearized
++;
5630 gsi
= gsi_for_stmt (oldbinrhs
);
5631 reassoc_remove_stmt (&gsi
);
5632 release_defs (oldbinrhs
);
5634 gimple_set_visited (stmt
, true);
5635 gimple_set_visited (binlhs
, true);
5636 gimple_set_visited (binrhs
, true);
5638 /* Tail recurse on the new rhs if it still needs reassociation. */
5639 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
5640 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5641 want to change the algorithm while converting to tuples. */
5642 linearize_expr (stmt
);
5645 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5646 it. Otherwise, return NULL. */
5649 get_single_immediate_use (tree lhs
)
5651 use_operand_p immuse
;
5654 if (TREE_CODE (lhs
) == SSA_NAME
5655 && single_imm_use (lhs
, &immuse
, &immusestmt
)
5656 && is_gimple_assign (immusestmt
))
5662 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5663 representing the negated value. Insertions of any necessary
5664 instructions go before GSI.
5665 This function is recursive in that, if you hand it "a_5" as the
5666 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5667 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5670 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
5672 gimple
*negatedefstmt
= NULL
;
5673 tree resultofnegate
;
5674 gimple_stmt_iterator gsi
;
5677 /* If we are trying to negate a name, defined by an add, negate the
5678 add operands instead. */
5679 if (TREE_CODE (tonegate
) == SSA_NAME
)
5680 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
5681 if (TREE_CODE (tonegate
) == SSA_NAME
5682 && is_gimple_assign (negatedefstmt
)
5683 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
5684 && has_single_use (gimple_assign_lhs (negatedefstmt
))
5685 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
5687 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
5688 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
5689 tree lhs
= gimple_assign_lhs (negatedefstmt
);
5692 gsi
= gsi_for_stmt (negatedefstmt
);
5693 rhs1
= negate_value (rhs1
, &gsi
);
5695 gsi
= gsi_for_stmt (negatedefstmt
);
5696 rhs2
= negate_value (rhs2
, &gsi
);
5698 gsi
= gsi_for_stmt (negatedefstmt
);
5699 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5700 gimple_set_visited (negatedefstmt
, true);
5701 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
5702 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
5703 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5707 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
5708 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
5709 NULL_TREE
, true, GSI_SAME_STMT
);
5711 uid
= gimple_uid (gsi_stmt (gsi
));
5712 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
5714 gimple
*stmt
= gsi_stmt (gsi
);
5715 if (gimple_uid (stmt
) != 0)
5717 gimple_set_uid (stmt
, uid
);
5719 return resultofnegate
;
5722 /* Return true if we should break up the subtract in STMT into an add
5723 with negate. This is true when we the subtract operands are really
5724 adds, or the subtract itself is used in an add expression. In
5725 either case, breaking up the subtract into an add with negate
5726 exposes the adds to reassociation. */
5729 should_break_up_subtract (gimple
*stmt
)
5731 tree lhs
= gimple_assign_lhs (stmt
);
5732 tree binlhs
= gimple_assign_rhs1 (stmt
);
5733 tree binrhs
= gimple_assign_rhs2 (stmt
);
5735 class loop
*loop
= loop_containing_stmt (stmt
);
5737 if (TREE_CODE (binlhs
) == SSA_NAME
5738 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
5741 if (TREE_CODE (binrhs
) == SSA_NAME
5742 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
5745 if (TREE_CODE (lhs
) == SSA_NAME
5746 && (immusestmt
= get_single_immediate_use (lhs
))
5747 && is_gimple_assign (immusestmt
)
5748 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
5749 || (gimple_assign_rhs_code (immusestmt
) == MINUS_EXPR
5750 && gimple_assign_rhs1 (immusestmt
) == lhs
)
5751 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
5756 /* Transform STMT from A - B into A + -B. */
5759 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
5761 tree rhs1
= gimple_assign_rhs1 (stmt
);
5762 tree rhs2
= gimple_assign_rhs2 (stmt
);
5764 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5766 fprintf (dump_file
, "Breaking up subtract ");
5767 print_gimple_stmt (dump_file
, stmt
, 0);
5770 rhs2
= negate_value (rhs2
, gsip
);
5771 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
5775 /* Determine whether STMT is a builtin call that raises an SSA name
5776 to an integer power and has only one use. If so, and this is early
5777 reassociation and unsafe math optimizations are permitted, place
5778 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5779 If any of these conditions does not hold, return FALSE. */
5782 acceptable_pow_call (gcall
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
5785 REAL_VALUE_TYPE c
, cint
;
5787 switch (gimple_call_combined_fn (stmt
))
5790 if (flag_errno_math
)
5793 *base
= gimple_call_arg (stmt
, 0);
5794 arg1
= gimple_call_arg (stmt
, 1);
5796 if (TREE_CODE (arg1
) != REAL_CST
)
5799 c
= TREE_REAL_CST (arg1
);
5801 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
5804 *exponent
= real_to_integer (&c
);
5805 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
5806 if (!real_identical (&c
, &cint
))
5812 *base
= gimple_call_arg (stmt
, 0);
5813 arg1
= gimple_call_arg (stmt
, 1);
5815 if (!tree_fits_shwi_p (arg1
))
5818 *exponent
= tree_to_shwi (arg1
);
5825 /* Expanding negative exponents is generally unproductive, so we don't
5826 complicate matters with those. Exponents of zero and one should
5827 have been handled by expression folding. */
5828 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
5834 /* Try to derive and add operand entry for OP to *OPS. Return false if
5838 try_special_add_to_ops (vec
<operand_entry
*> *ops
,
5839 enum tree_code code
,
5840 tree op
, gimple
* def_stmt
)
5842 tree base
= NULL_TREE
;
5843 HOST_WIDE_INT exponent
= 0;
5845 if (TREE_CODE (op
) != SSA_NAME
5846 || ! has_single_use (op
))
5849 if (code
== MULT_EXPR
5850 && reassoc_insert_powi_p
5851 && flag_unsafe_math_optimizations
5852 && is_gimple_call (def_stmt
)
5853 && acceptable_pow_call (as_a
<gcall
*> (def_stmt
), &base
, &exponent
))
5855 add_repeat_to_ops_vec (ops
, base
, exponent
);
5856 gimple_set_visited (def_stmt
, true);
5859 else if (code
== MULT_EXPR
5860 && is_gimple_assign (def_stmt
)
5861 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
5862 && !HONOR_SNANS (TREE_TYPE (op
))
5863 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op
))
5864 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op
)))
5865 && (!FLOAT_TYPE_P (TREE_TYPE (op
))
5866 || !DECIMAL_FLOAT_MODE_P (element_mode (op
))))
5868 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5869 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
5870 add_to_ops_vec (ops
, rhs1
);
5871 add_to_ops_vec (ops
, cst
);
5872 gimple_set_visited (def_stmt
, true);
5879 /* Recursively linearize a binary expression that is the RHS of STMT.
5880 Place the operands of the expression tree in the vector named OPS. */
5883 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
5884 bool is_associative
, bool set_visited
)
5886 tree binlhs
= gimple_assign_rhs1 (stmt
);
5887 tree binrhs
= gimple_assign_rhs2 (stmt
);
5888 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
5889 bool binlhsisreassoc
= false;
5890 bool binrhsisreassoc
= false;
5891 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5892 class loop
*loop
= loop_containing_stmt (stmt
);
5895 gimple_set_visited (stmt
, true);
5897 if (TREE_CODE (binlhs
) == SSA_NAME
)
5899 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
5900 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
5901 && !stmt_could_throw_p (cfun
, binlhsdef
));
5904 if (TREE_CODE (binrhs
) == SSA_NAME
)
5906 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
5907 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
5908 && !stmt_could_throw_p (cfun
, binrhsdef
));
5911 /* If the LHS is not reassociable, but the RHS is, we need to swap
5912 them. If neither is reassociable, there is nothing we can do, so
5913 just put them in the ops vector. If the LHS is reassociable,
5914 linearize it. If both are reassociable, then linearize the RHS
5917 if (!binlhsisreassoc
)
5919 /* If this is not a associative operation like division, give up. */
5920 if (!is_associative
)
5922 add_to_ops_vec (ops
, binrhs
);
5926 if (!binrhsisreassoc
)
5929 if (try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5930 /* If we add ops for the rhs we expect to be able to recurse
5931 to it via the lhs during expression rewrite so swap
5935 add_to_ops_vec (ops
, binrhs
);
5937 if (!try_special_add_to_ops (ops
, rhscode
, binlhs
, binlhsdef
))
5938 add_to_ops_vec (ops
, binlhs
);
5944 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5946 fprintf (dump_file
, "swapping operands of ");
5947 print_gimple_stmt (dump_file
, stmt
, 0);
5950 swap_ssa_operands (stmt
,
5951 gimple_assign_rhs1_ptr (stmt
),
5952 gimple_assign_rhs2_ptr (stmt
));
5955 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5957 fprintf (dump_file
, " is now ");
5958 print_gimple_stmt (dump_file
, stmt
, 0);
5960 if (!binrhsisreassoc
)
5963 /* We want to make it so the lhs is always the reassociative op,
5965 std::swap (binlhs
, binrhs
);
5967 else if (binrhsisreassoc
)
5969 linearize_expr (stmt
);
5970 binlhs
= gimple_assign_rhs1 (stmt
);
5971 binrhs
= gimple_assign_rhs2 (stmt
);
5974 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
5975 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
5977 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
5978 is_associative
, set_visited
);
5980 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5981 add_to_ops_vec (ops
, binrhs
);
5984 /* Repropagate the negates back into subtracts, since no other pass
5985 currently does it. */
5988 repropagate_negates (void)
5993 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
5995 gimple
*user
= get_single_immediate_use (negate
);
5996 if (!user
|| !is_gimple_assign (user
))
5999 tree negateop
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
6000 if (TREE_CODE (negateop
) == SSA_NAME
6001 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop
))
6004 /* The negate operand can be either operand of a PLUS_EXPR
6005 (it can be the LHS if the RHS is a constant for example).
6007 Force the negate operand to the RHS of the PLUS_EXPR, then
6008 transform the PLUS_EXPR into a MINUS_EXPR. */
6009 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
6011 /* If the negated operand appears on the LHS of the
6012 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6013 to force the negated operand to the RHS of the PLUS_EXPR. */
6014 if (gimple_assign_rhs1 (user
) == negate
)
6016 swap_ssa_operands (user
,
6017 gimple_assign_rhs1_ptr (user
),
6018 gimple_assign_rhs2_ptr (user
));
6021 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6022 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6023 if (gimple_assign_rhs2 (user
) == negate
)
6025 tree rhs1
= gimple_assign_rhs1 (user
);
6026 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
6027 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
,
6032 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
6034 if (gimple_assign_rhs1 (user
) == negate
)
6039 which we transform into
6042 This pushes down the negate which we possibly can merge
6043 into some other operation, hence insert it into the
6044 plus_negates vector. */
6045 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
6046 tree b
= gimple_assign_rhs2 (user
);
6047 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
6048 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
6049 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
6050 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, negateop
, b
);
6051 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
6052 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
6053 user
= gsi_stmt (gsi2
);
6055 reassoc_remove_stmt (&gsi
);
6056 release_defs (feed
);
6057 plus_negates
.safe_push (gimple_assign_lhs (user
));
6061 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6062 getting rid of one operation. */
6063 tree rhs1
= gimple_assign_rhs1 (user
);
6064 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
6065 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, negateop
);
6066 update_stmt (gsi_stmt (gsi
));
6072 /* Break up subtract operations in block BB.
6074 We do this top down because we don't know whether the subtract is
6075 part of a possible chain of reassociation except at the top.
6084 we want to break up k = t - q, but we won't until we've transformed q
6085 = b - r, which won't be broken up until we transform b = c - d.
6087 En passant, clear the GIMPLE visited flag on every statement
6088 and set UIDs within each basic block. */
6091 break_up_subtract_bb (basic_block bb
)
6093 gimple_stmt_iterator gsi
;
6095 unsigned int uid
= 1;
6097 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6099 gimple
*stmt
= gsi_stmt (gsi
);
6100 gimple_set_visited (stmt
, false);
6101 gimple_set_uid (stmt
, uid
++);
6103 if (!is_gimple_assign (stmt
)
6104 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt
)))
6105 || !can_reassociate_op_p (gimple_assign_lhs (stmt
)))
6108 /* Look for simple gimple subtract operations. */
6109 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
6111 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt
))
6112 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt
)))
6115 /* Check for a subtract used only in an addition. If this
6116 is the case, transform it into add of a negate for better
6117 reassociation. IE transform C = A-B into C = A + -B if C
6118 is only used in an addition. */
6119 if (should_break_up_subtract (stmt
))
6120 break_up_subtract (stmt
, &gsi
);
6122 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
6123 && can_reassociate_op_p (gimple_assign_rhs1 (stmt
)))
6124 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
6126 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
6128 son
= next_dom_son (CDI_DOMINATORS
, son
))
6129 break_up_subtract_bb (son
);
6132 /* Used for repeated factor analysis. */
6133 struct repeat_factor
6135 /* An SSA name that occurs in a multiply chain. */
6138 /* Cached rank of the factor. */
6141 /* Number of occurrences of the factor in the chain. */
6142 HOST_WIDE_INT count
;
6144 /* An SSA name representing the product of this factor and
6145 all factors appearing later in the repeated factor vector. */
6150 static vec
<repeat_factor
> repeat_factor_vec
;
6152 /* Used for sorting the repeat factor vector. Sort primarily by
6153 ascending occurrence count, secondarily by descending rank. */
6156 compare_repeat_factors (const void *x1
, const void *x2
)
6158 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
6159 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
6161 if (rf1
->count
!= rf2
->count
)
6162 return rf1
->count
- rf2
->count
;
6164 return rf2
->rank
- rf1
->rank
;
6167 /* Look for repeated operands in OPS in the multiply tree rooted at
6168 STMT. Replace them with an optimal sequence of multiplies and powi
6169 builtin calls, and remove the used operands from OPS. Return an
6170 SSA name representing the value of the replacement sequence. */
6173 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
6175 unsigned i
, j
, vec_len
;
6178 repeat_factor
*rf1
, *rf2
;
6179 repeat_factor rfnew
;
6180 tree result
= NULL_TREE
;
6181 tree target_ssa
, iter_result
;
6182 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
6183 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
6184 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
6185 gimple
*mul_stmt
, *pow_stmt
;
6187 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6188 target, unless type is integral. */
6189 if (!powi_fndecl
&& !INTEGRAL_TYPE_P (type
))
6192 /* Allocate the repeated factor vector. */
6193 repeat_factor_vec
.create (10);
6195 /* Scan the OPS vector for all SSA names in the product and build
6196 up a vector of occurrence counts for each factor. */
6197 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6199 if (TREE_CODE (oe
->op
) == SSA_NAME
)
6201 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6203 if (rf1
->factor
== oe
->op
)
6205 rf1
->count
+= oe
->count
;
6210 if (j
>= repeat_factor_vec
.length ())
6212 rfnew
.factor
= oe
->op
;
6213 rfnew
.rank
= oe
->rank
;
6214 rfnew
.count
= oe
->count
;
6215 rfnew
.repr
= NULL_TREE
;
6216 repeat_factor_vec
.safe_push (rfnew
);
6221 /* Sort the repeated factor vector by (a) increasing occurrence count,
6222 and (b) decreasing rank. */
6223 repeat_factor_vec
.qsort (compare_repeat_factors
);
6225 /* It is generally best to combine as many base factors as possible
6226 into a product before applying __builtin_powi to the result.
6227 However, the sort order chosen for the repeated factor vector
6228 allows us to cache partial results for the product of the base
6229 factors for subsequent use. When we already have a cached partial
6230 result from a previous iteration, it is best to make use of it
6231 before looking for another __builtin_pow opportunity.
6233 As an example, consider x * x * y * y * y * z * z * z * z.
6234 We want to first compose the product x * y * z, raise it to the
6235 second power, then multiply this by y * z, and finally multiply
6236 by z. This can be done in 5 multiplies provided we cache y * z
6237 for use in both expressions:
6245 If we instead ignored the cached y * z and first multiplied by
6246 the __builtin_pow opportunity z * z, we would get the inferior:
6255 vec_len
= repeat_factor_vec
.length ();
6257 /* Repeatedly look for opportunities to create a builtin_powi call. */
6260 HOST_WIDE_INT power
;
6262 /* First look for the largest cached product of factors from
6263 preceding iterations. If found, create a builtin_powi for
6264 it if the minimum occurrence count for its factors is at
6265 least 2, or just use this cached product as our next
6266 multiplicand if the minimum occurrence count is 1. */
6267 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6269 if (rf1
->repr
&& rf1
->count
> 0)
6279 iter_result
= rf1
->repr
;
6281 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6285 fputs ("Multiplying by cached product ", dump_file
);
6286 for (elt
= j
; elt
< vec_len
; elt
++)
6288 rf
= &repeat_factor_vec
[elt
];
6289 print_generic_expr (dump_file
, rf
->factor
);
6290 if (elt
< vec_len
- 1)
6291 fputs (" * ", dump_file
);
6293 fputs ("\n", dump_file
);
6298 if (INTEGRAL_TYPE_P (type
))
6300 gcc_assert (power
> 1);
6301 gimple_stmt_iterator gsip
= gsi
;
6303 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6305 gimple_stmt_iterator gsic
= gsi
;
6306 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6308 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6309 gimple_set_visited (gsi_stmt (gsic
), true);
6315 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6317 = gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6318 build_int_cst (integer_type_node
,
6320 gimple_call_set_lhs (pow_stmt
, iter_result
);
6321 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6322 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6323 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6326 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6330 fputs ("Building __builtin_pow call for cached product (",
6332 for (elt
= j
; elt
< vec_len
; elt
++)
6334 rf
= &repeat_factor_vec
[elt
];
6335 print_generic_expr (dump_file
, rf
->factor
);
6336 if (elt
< vec_len
- 1)
6337 fputs (" * ", dump_file
);
6339 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
6346 /* Otherwise, find the first factor in the repeated factor
6347 vector whose occurrence count is at least 2. If no such
6348 factor exists, there are no builtin_powi opportunities
6350 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6352 if (rf1
->count
>= 2)
6361 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6365 fputs ("Building __builtin_pow call for (", dump_file
);
6366 for (elt
= j
; elt
< vec_len
; elt
++)
6368 rf
= &repeat_factor_vec
[elt
];
6369 print_generic_expr (dump_file
, rf
->factor
);
6370 if (elt
< vec_len
- 1)
6371 fputs (" * ", dump_file
);
6373 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
6376 reassociate_stats
.pows_created
++;
6378 /* Visit each element of the vector in reverse order (so that
6379 high-occurrence elements are visited first, and within the
6380 same occurrence count, lower-ranked elements are visited
6381 first). Form a linear product of all elements in this order
6382 whose occurrencce count is at least that of element J.
6383 Record the SSA name representing the product of each element
6384 with all subsequent elements in the vector. */
6385 if (j
== vec_len
- 1)
6386 rf1
->repr
= rf1
->factor
;
6389 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
6393 rf1
= &repeat_factor_vec
[ii
];
6394 rf2
= &repeat_factor_vec
[ii
+ 1];
6396 /* Init the last factor's representative to be itself. */
6398 rf2
->repr
= rf2
->factor
;
6403 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6404 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
6406 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6407 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6408 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6409 rf1
->repr
= target_ssa
;
6411 /* Don't reprocess the multiply we just introduced. */
6412 gimple_set_visited (mul_stmt
, true);
6416 /* Form a call to __builtin_powi for the maximum product
6417 just formed, raised to the power obtained earlier. */
6418 rf1
= &repeat_factor_vec
[j
];
6419 if (INTEGRAL_TYPE_P (type
))
6421 gcc_assert (power
> 1);
6422 gimple_stmt_iterator gsip
= gsi
;
6424 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6426 gimple_stmt_iterator gsic
= gsi
;
6427 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6429 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6430 gimple_set_visited (gsi_stmt (gsic
), true);
6436 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6437 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6438 build_int_cst (integer_type_node
,
6440 gimple_call_set_lhs (pow_stmt
, iter_result
);
6441 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6442 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6443 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6447 /* If we previously formed at least one other builtin_powi call,
6448 form the product of this one and those others. */
6451 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6452 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
6453 result
, iter_result
);
6454 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6455 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6456 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6457 gimple_set_visited (mul_stmt
, true);
6458 result
= new_result
;
6461 result
= iter_result
;
6463 /* Decrement the occurrence count of each element in the product
6464 by the count found above, and remove this many copies of each
6466 for (i
= j
; i
< vec_len
; i
++)
6471 rf1
= &repeat_factor_vec
[i
];
6472 rf1
->count
-= power
;
6474 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
6476 if (oe
->op
== rf1
->factor
)
6480 ops
->ordered_remove (n
);
6496 /* At this point all elements in the repeated factor vector have a
6497 remaining occurrence count of 0 or 1, and those with a count of 1
6498 don't have cached representatives. Re-sort the ops vector and
6500 ops
->qsort (sort_by_operand_rank
);
6501 repeat_factor_vec
.release ();
6503 /* Return the final product computed herein. Note that there may
6504 still be some elements with single occurrence count left in OPS;
6505 those will be handled by the normal reassociation logic. */
6509 /* Attempt to optimize
6510 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6511 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6514 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
6518 unsigned int length
= ops
->length ();
6519 tree cst
= ops
->last ()->op
;
6521 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
6524 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6526 if (TREE_CODE (oe
->op
) == SSA_NAME
6527 && has_single_use (oe
->op
))
6529 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6530 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
6533 switch (gimple_call_combined_fn (old_call
))
6536 CASE_CFN_COPYSIGN_FN
:
6537 arg0
= gimple_call_arg (old_call
, 0);
6538 arg1
= gimple_call_arg (old_call
, 1);
6539 /* The first argument of copysign must be a constant,
6540 otherwise there's nothing to do. */
6541 if (TREE_CODE (arg0
) == REAL_CST
)
6543 tree type
= TREE_TYPE (arg0
);
6544 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
6545 /* If we couldn't fold to a single constant, skip it.
6546 That happens e.g. for inexact multiplication when
6548 if (mul
== NULL_TREE
)
6550 /* Instead of adjusting OLD_CALL, let's build a new
6551 call to not leak the LHS and prevent keeping bogus
6552 debug statements. DCE will clean up the old call. */
6554 if (gimple_call_internal_p (old_call
))
6555 new_call
= gimple_build_call_internal
6556 (IFN_COPYSIGN
, 2, mul
, arg1
);
6558 new_call
= gimple_build_call
6559 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
6560 tree lhs
= make_ssa_name (type
);
6561 gimple_call_set_lhs (new_call
, lhs
);
6562 gimple_set_location (new_call
,
6563 gimple_location (old_call
));
6564 insert_stmt_after (new_call
, old_call
);
6565 /* We've used the constant, get rid of it. */
6567 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
6568 /* Handle the CST1 < 0 case by negating the result. */
6571 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
6573 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
6574 insert_stmt_after (negate_stmt
, new_call
);
6579 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6581 fprintf (dump_file
, "Optimizing copysign: ");
6582 print_generic_expr (dump_file
, cst
);
6583 fprintf (dump_file
, " * COPYSIGN (");
6584 print_generic_expr (dump_file
, arg0
);
6585 fprintf (dump_file
, ", ");
6586 print_generic_expr (dump_file
, arg1
);
6587 fprintf (dump_file
, ") into %sCOPYSIGN (",
6588 cst1_neg
? "-" : "");
6589 print_generic_expr (dump_file
, mul
);
6590 fprintf (dump_file
, ", ");
6591 print_generic_expr (dump_file
, arg1
);
6592 fprintf (dump_file
, "\n");
6605 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6608 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
6612 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6614 fprintf (dump_file
, "Transforming ");
6615 print_gimple_stmt (dump_file
, stmt
, 0);
6618 rhs1
= gimple_assign_rhs1 (stmt
);
6619 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
6621 remove_visited_stmt_chain (rhs1
);
6623 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6625 fprintf (dump_file
, " into ");
6626 print_gimple_stmt (dump_file
, stmt
, 0);
6630 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6633 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
6634 tree rhs1
, tree rhs2
)
6636 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6638 fprintf (dump_file
, "Transforming ");
6639 print_gimple_stmt (dump_file
, stmt
, 0);
6642 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
6643 update_stmt (gsi_stmt (*gsi
));
6644 remove_visited_stmt_chain (rhs1
);
6646 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6648 fprintf (dump_file
, " into ");
6649 print_gimple_stmt (dump_file
, stmt
, 0);
6653 /* Reassociate expressions in basic block BB and its post-dominator as
6656 Bubble up return status from maybe_optimize_range_tests. */
6659 reassociate_bb (basic_block bb
)
6661 gimple_stmt_iterator gsi
;
6663 gimple
*stmt
= last_stmt (bb
);
6664 bool cfg_cleanup_needed
= false;
6666 if (stmt
&& !gimple_visited_p (stmt
))
6667 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
6669 bool do_prev
= false;
6670 for (gsi
= gsi_last_bb (bb
);
6671 !gsi_end_p (gsi
); do_prev
? gsi_prev (&gsi
) : (void) 0)
6674 stmt
= gsi_stmt (gsi
);
6676 if (is_gimple_assign (stmt
)
6677 && !stmt_could_throw_p (cfun
, stmt
))
6679 tree lhs
, rhs1
, rhs2
;
6680 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
6682 /* If this was part of an already processed statement,
6683 we don't need to touch it again. */
6684 if (gimple_visited_p (stmt
))
6686 /* This statement might have become dead because of previous
6688 if (has_zero_uses (gimple_get_lhs (stmt
)))
6690 reassoc_remove_stmt (&gsi
);
6691 release_defs (stmt
);
6692 /* We might end up removing the last stmt above which
6693 places the iterator to the end of the sequence.
6694 Reset it to the last stmt in this case and make sure
6695 we don't do gsi_prev in that case. */
6696 if (gsi_end_p (gsi
))
6698 gsi
= gsi_last_bb (bb
);
6705 /* If this is not a gimple binary expression, there is
6706 nothing for us to do with it. */
6707 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
6710 lhs
= gimple_assign_lhs (stmt
);
6711 rhs1
= gimple_assign_rhs1 (stmt
);
6712 rhs2
= gimple_assign_rhs2 (stmt
);
6714 /* For non-bit or min/max operations we can't associate
6715 all types. Verify that here. */
6716 if ((rhs_code
!= BIT_IOR_EXPR
6717 && rhs_code
!= BIT_AND_EXPR
6718 && rhs_code
!= BIT_XOR_EXPR
6719 && rhs_code
!= MIN_EXPR
6720 && rhs_code
!= MAX_EXPR
6721 && !can_reassociate_type_p (TREE_TYPE (lhs
)))
6722 || !can_reassociate_op_p (rhs1
)
6723 || !can_reassociate_op_p (rhs2
))
6726 if (associative_tree_code (rhs_code
))
6728 auto_vec
<operand_entry
*> ops
;
6729 tree powi_result
= NULL_TREE
;
6730 bool is_vector
= VECTOR_TYPE_P (TREE_TYPE (lhs
));
6732 /* There may be no immediate uses left by the time we
6733 get here because we may have eliminated them all. */
6734 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
6737 gimple_set_visited (stmt
, true);
6738 linearize_expr_tree (&ops
, stmt
, true, true);
6739 ops
.qsort (sort_by_operand_rank
);
6740 int orig_len
= ops
.length ();
6741 optimize_ops_list (rhs_code
, &ops
);
6742 if (undistribute_ops_list (rhs_code
, &ops
,
6743 loop_containing_stmt (stmt
)))
6745 ops
.qsort (sort_by_operand_rank
);
6746 optimize_ops_list (rhs_code
, &ops
);
6748 if (undistribute_bitref_for_vector (rhs_code
, &ops
,
6749 loop_containing_stmt (stmt
)))
6751 ops
.qsort (sort_by_operand_rank
);
6752 optimize_ops_list (rhs_code
, &ops
);
6754 if (rhs_code
== PLUS_EXPR
6755 && transform_add_to_multiply (&ops
))
6756 ops
.qsort (sort_by_operand_rank
);
6758 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
6761 optimize_vec_cond_expr (rhs_code
, &ops
);
6763 optimize_range_tests (rhs_code
, &ops
, NULL
);
6766 if (rhs_code
== MULT_EXPR
&& !is_vector
)
6768 attempt_builtin_copysign (&ops
);
6770 if (reassoc_insert_powi_p
6771 && (flag_unsafe_math_optimizations
6772 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))))
6773 powi_result
= attempt_builtin_powi (stmt
, &ops
);
6776 operand_entry
*last
;
6777 bool negate_result
= false;
6778 if (ops
.length () > 1
6779 && rhs_code
== MULT_EXPR
)
6782 if ((integer_minus_onep (last
->op
)
6783 || real_minus_onep (last
->op
))
6784 && !HONOR_SNANS (TREE_TYPE (lhs
))
6785 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs
))
6786 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs
))))
6789 negate_result
= true;
6794 /* If the operand vector is now empty, all operands were
6795 consumed by the __builtin_powi optimization. */
6796 if (ops
.length () == 0)
6797 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
6798 else if (ops
.length () == 1)
6800 tree last_op
= ops
.last ()->op
;
6802 /* If the stmt that defines operand has to be inserted, insert it
6804 if (ops
.last ()->stmt_to_insert
)
6805 insert_stmt_before_use (stmt
, ops
.last ()->stmt_to_insert
);
6807 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
6810 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
6814 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
6815 int ops_num
= ops
.length ();
6818 /* For binary bit operations, if there are at least 3
6819 operands and the last operand in OPS is a constant,
6820 move it to the front. This helps ensure that we generate
6821 (X & Y) & C rather than (X & C) & Y. The former will
6822 often match a canonical bit test when we get to RTL. */
6823 if (ops
.length () > 2
6824 && (rhs_code
== BIT_AND_EXPR
6825 || rhs_code
== BIT_IOR_EXPR
6826 || rhs_code
== BIT_XOR_EXPR
)
6827 && TREE_CODE (ops
.last ()->op
) == INTEGER_CST
)
6828 std::swap (*ops
[0], *ops
[ops_num
- 1]);
6830 /* Only rewrite the expression tree to parallel in the
6831 last reassoc pass to avoid useless work back-and-forth
6832 with initial linearization. */
6833 if (!reassoc_insert_powi_p
6834 && ops
.length () > 3
6835 && (width
= get_reassociation_width (ops_num
, rhs_code
,
6838 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6840 "Width = %d was chosen for reassociation\n",
6842 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
6847 /* When there are three operands left, we want
6848 to make sure the ones that get the double
6849 binary op are chosen wisely. */
6850 int len
= ops
.length ();
6852 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
6854 new_lhs
= rewrite_expr_tree (stmt
, rhs_code
, 0, ops
,
6860 /* If we combined some repeated factors into a
6861 __builtin_powi call, multiply that result by the
6862 reassociated operands. */
6865 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
6866 tree type
= TREE_TYPE (lhs
);
6867 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
6869 gimple_set_lhs (lhs_stmt
, target_ssa
);
6870 update_stmt (lhs_stmt
);
6873 target_ssa
= new_lhs
;
6876 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
6877 powi_result
, target_ssa
);
6878 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6879 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6880 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
6886 stmt
= SSA_NAME_DEF_STMT (lhs
);
6887 tree tmp
= make_ssa_name (TREE_TYPE (lhs
));
6888 gimple_set_lhs (stmt
, tmp
);
6891 gassign
*neg_stmt
= gimple_build_assign (lhs
, NEGATE_EXPR
,
6893 gimple_set_uid (neg_stmt
, gimple_uid (stmt
));
6894 gsi_insert_after (&gsi
, neg_stmt
, GSI_NEW_STMT
);
6900 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
6902 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
6903 cfg_cleanup_needed
|= reassociate_bb (son
);
6905 return cfg_cleanup_needed
;
6908 /* Add jumps around shifts for range tests turned into bit tests.
6909 For each SSA_NAME VAR we have code like:
6910 VAR = ...; // final stmt of range comparison
6911 // bit test here...;
6912 OTHERVAR = ...; // final stmt of the bit test sequence
6913 RES = VAR | OTHERVAR;
6914 Turn the above into:
6921 // bit test here...;
6924 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6932 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
6934 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
6937 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
6939 && is_gimple_assign (use_stmt
)
6940 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
6941 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
6943 basic_block cond_bb
= gimple_bb (def_stmt
);
6944 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
6945 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
6947 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6948 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
6949 build_zero_cst (TREE_TYPE (var
)),
6950 NULL_TREE
, NULL_TREE
);
6951 location_t loc
= gimple_location (use_stmt
);
6952 gimple_set_location (g
, loc
);
6953 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
6955 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
6956 etrue
->probability
= profile_probability::even ();
6957 edge efalse
= find_edge (cond_bb
, then_bb
);
6958 efalse
->flags
= EDGE_FALSE_VALUE
;
6959 efalse
->probability
-= etrue
->probability
;
6960 then_bb
->count
-= etrue
->count ();
6962 tree othervar
= NULL_TREE
;
6963 if (gimple_assign_rhs1 (use_stmt
) == var
)
6964 othervar
= gimple_assign_rhs2 (use_stmt
);
6965 else if (gimple_assign_rhs2 (use_stmt
) == var
)
6966 othervar
= gimple_assign_rhs1 (use_stmt
);
6969 tree lhs
= gimple_assign_lhs (use_stmt
);
6970 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
6971 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
6972 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
6973 gsi
= gsi_for_stmt (use_stmt
);
6974 gsi_remove (&gsi
, true);
6976 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
6977 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
6979 reassoc_branch_fixups
.release ();
6982 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
6983 void debug_ops_vector (vec
<operand_entry
*> ops
);
6985 /* Dump the operand entry vector OPS to FILE. */
6988 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
6993 FOR_EACH_VEC_ELT (ops
, i
, oe
)
6995 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
6996 print_generic_expr (file
, oe
->op
);
6997 fprintf (file
, "\n");
7001 /* Dump the operand entry vector OPS to STDERR. */
7004 debug_ops_vector (vec
<operand_entry
*> ops
)
7006 dump_ops_vector (stderr
, ops
);
7009 /* Bubble up return status from reassociate_bb. */
7014 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
7015 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
7018 /* Initialize the reassociation pass. */
7025 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
7027 /* Find the loops, so that we can prevent moving calculations in
7029 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
7031 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
7033 next_operand_entry_id
= 0;
7035 /* Reverse RPO (Reverse Post Order) will give us something where
7036 deeper loops come later. */
7037 pre_and_rev_post_order_compute (NULL
, bbs
, false);
7038 bb_rank
= XCNEWVEC (int64_t, last_basic_block_for_fn (cfun
));
7039 operand_rank
= new hash_map
<tree
, int64_t>;
7041 /* Give each default definition a distinct rank. This includes
7042 parameters and the static chain. Walk backwards over all
7043 SSA names so that we get proper rank ordering according
7044 to tree_swap_operands_p. */
7045 for (i
= num_ssa_names
- 1; i
> 0; --i
)
7047 tree name
= ssa_name (i
);
7048 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
7049 insert_operand_rank (name
, ++rank
);
7052 /* Set up rank for each BB */
7053 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
7054 bb_rank
[bbs
[i
]] = ++rank
<< 16;
7057 calculate_dominance_info (CDI_POST_DOMINATORS
);
7058 plus_negates
= vNULL
;
7061 /* Cleanup after the reassociation pass, and print stats if
7067 statistics_counter_event (cfun
, "Linearized",
7068 reassociate_stats
.linearized
);
7069 statistics_counter_event (cfun
, "Constants eliminated",
7070 reassociate_stats
.constants_eliminated
);
7071 statistics_counter_event (cfun
, "Ops eliminated",
7072 reassociate_stats
.ops_eliminated
);
7073 statistics_counter_event (cfun
, "Statements rewritten",
7074 reassociate_stats
.rewritten
);
7075 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
7076 reassociate_stats
.pows_encountered
);
7077 statistics_counter_event (cfun
, "Built-in powi calls created",
7078 reassociate_stats
.pows_created
);
7080 delete operand_rank
;
7081 bitmap_clear (biased_names
);
7082 operand_entry_pool
.release ();
7084 plus_negates
.release ();
7085 free_dominance_info (CDI_POST_DOMINATORS
);
7086 loop_optimizer_finalize ();
7089 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7090 insertion of __builtin_powi calls.
7092 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7093 optimization of a gimple conditional. Otherwise returns zero. */
7096 execute_reassoc (bool insert_powi_p
, bool bias_loop_carried_phi_ranks_p
)
7098 reassoc_insert_powi_p
= insert_powi_p
;
7099 reassoc_bias_loop_carried_phi_ranks_p
= bias_loop_carried_phi_ranks_p
;
7103 bool cfg_cleanup_needed
;
7104 cfg_cleanup_needed
= do_reassoc ();
7105 repropagate_negates ();
7109 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
7114 const pass_data pass_data_reassoc
=
7116 GIMPLE_PASS
, /* type */
7117 "reassoc", /* name */
7118 OPTGROUP_NONE
, /* optinfo_flags */
7119 TV_TREE_REASSOC
, /* tv_id */
7120 ( PROP_cfg
| PROP_ssa
), /* properties_required */
7121 0, /* properties_provided */
7122 0, /* properties_destroyed */
7123 0, /* todo_flags_start */
7124 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
7127 class pass_reassoc
: public gimple_opt_pass
7130 pass_reassoc (gcc::context
*ctxt
)
7131 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
7134 /* opt_pass methods: */
7135 opt_pass
* clone () final override
{ return new pass_reassoc (m_ctxt
); }
7136 void set_pass_param (unsigned int n
, bool param
) final override
7138 gcc_assert (n
== 0);
7139 insert_powi_p
= param
;
7140 bias_loop_carried_phi_ranks_p
= !param
;
7142 bool gate (function
*) final override
{ return flag_tree_reassoc
!= 0; }
7143 unsigned int execute (function
*) final override
7145 return execute_reassoc (insert_powi_p
, bias_loop_carried_phi_ranks_p
);
7149 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7150 point 3a in the pass header comment. */
7152 bool bias_loop_carried_phi_ranks_p
;
7153 }; // class pass_reassoc
7158 make_pass_reassoc (gcc::context
*ctxt
)
7160 return new pass_reassoc (ctxt
);