1 /* Reassociation for trees.
2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "gimple-fold.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
46 #include "tree-ssa-loop.h"
49 #include "langhooks.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
58 /* This is a simple global reassociation pass. It is, in part, based
59 on the LLVM pass of the same name (They do some things more/less
60 than we do, in different orders, etc).
62 It consists of five steps:
64 1. Breaking up subtract operations into addition + negate, where
65 it would promote the reassociation of adds.
67 2. Left linearization of the expression trees, so that (A+B)+(C+D)
68 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
69 During linearization, we place the operands of the binary
70 expressions into a vector of operand_entry_*
72 3. Optimization of the operand lists, eliminating things like a +
75 3a. Combine repeated factors with the same occurrence counts
76 into a __builtin_powi call that will later be optimized into
77 an optimal number of multiplies.
79 4. Rewrite the expression trees we linearized and optimized so
80 they are in proper rank order.
82 5. Repropagate negates, as nothing else will clean it up ATM.
84 A bit of theory on #4, since nobody seems to write anything down
85 about why it makes sense to do it the way they do it:
87 We could do this much nicer theoretically, but don't (for reasons
88 explained after how to do it theoretically nice :P).
90 In order to promote the most redundancy elimination, you want
91 binary expressions whose operands are the same rank (or
92 preferably, the same value) exposed to the redundancy eliminator,
93 for possible elimination.
95 So the way to do this if we really cared, is to build the new op
96 tree from the leaves to the roots, merging as you go, and putting the
97 new op on the end of the worklist, until you are left with one
98 thing on the worklist.
100 IE if you have to rewrite the following set of operands (listed with
101 rank in parentheses), with opcode PLUS_EXPR:
103 a (1), b (1), c (1), d (2), e (2)
106 We start with our merge worklist empty, and the ops list with all of
109 You want to first merge all leaves of the same rank, as much as
112 So first build a binary op of
114 mergetmp = a + b, and put "mergetmp" on the merge worklist.
116 Because there is no three operand form of PLUS_EXPR, c is not going to
117 be exposed to redundancy elimination as a rank 1 operand.
119 So you might as well throw it on the merge worklist (you could also
120 consider it to now be a rank two operand, and merge it with d and e,
121 but in this case, you then have evicted e from a binary op. So at
122 least in this situation, you can't win.)
124 Then build a binary op of d + e
127 and put mergetmp2 on the merge worklist.
129 so merge worklist = {mergetmp, c, mergetmp2}
131 Continue building binary ops of these operations until you have only
132 one operation left on the worklist.
137 mergetmp3 = mergetmp + c
139 worklist = {mergetmp2, mergetmp3}
141 mergetmp4 = mergetmp2 + mergetmp3
143 worklist = {mergetmp4}
145 because we have one operation left, we can now just set the original
146 statement equal to the result of that operation.
148 This will at least expose a + b and d + e to redundancy elimination
149 as binary operations.
151 For extra points, you can reuse the old statements to build the
152 mergetmps, since you shouldn't run out.
154 So why don't we do this?
156 Because it's expensive, and rarely will help. Most trees we are
157 reassociating have 3 or less ops. If they have 2 ops, they already
158 will be written into a nice single binary op. If you have 3 ops, a
159 single simple check suffices to tell you whether the first two are of the
160 same rank. If so, you know to order it
163 newstmt = mergetmp + op3
167 newstmt = mergetmp + op1
169 If all three are of the same rank, you can't expose them all in a
170 single binary operator anyway, so the above is *still* the best you
173 Thus, this is what we do. When we have three ops left, we check to see
174 what order to put them in, and call it a day. As a nod to vector sum
175 reduction, we check if any of the ops are really a phi node that is a
176 destructive update for the associating op, and keep the destructive
177 update together for vector sum reduction recognition. */
179 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
180 point 3a in the pass header comment. */
181 static bool reassoc_insert_powi_p
;
187 int constants_eliminated
;
190 int pows_encountered
;
195 static object_allocator
<operand_entry
> operand_entry_pool
196 ("operand entry pool");
198 /* This is used to assign a unique ID to each struct operand_entry
199 so that qsort results are identical on different hosts. */
200 static unsigned int next_operand_entry_id
;
202 /* Starting rank number for a given basic block, so that we can rank
203 operations using unmovable instructions in that BB based on the bb
205 static int64_t *bb_rank
;
207 /* Operand->rank hashtable. */
208 static hash_map
<tree
, int64_t> *operand_rank
;
210 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
211 all basic blocks the CFG should be adjusted - basic blocks
212 split right after that SSA_NAME's definition statement and before
213 the only use, which must be a bit ior. */
214 static vec
<tree
> reassoc_branch_fixups
;
217 static int64_t get_rank (tree
);
218 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
220 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
221 possibly added by gsi_remove. */
224 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
226 gimple
*stmt
= gsi_stmt (*gsi
);
228 if (!MAY_HAVE_DEBUG_BIND_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
229 return gsi_remove (gsi
, true);
231 gimple_stmt_iterator prev
= *gsi
;
233 unsigned uid
= gimple_uid (stmt
);
234 basic_block bb
= gimple_bb (stmt
);
235 bool ret
= gsi_remove (gsi
, true);
236 if (!gsi_end_p (prev
))
239 prev
= gsi_start_bb (bb
);
240 gimple
*end_stmt
= gsi_stmt (*gsi
);
241 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
243 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
244 gimple_set_uid (stmt
, uid
);
250 /* Bias amount for loop-carried phis. We want this to be larger than
251 the depth of any reassociation tree we can see, but not larger than
252 the rank difference between two blocks. */
253 #define PHI_LOOP_BIAS (1 << 15)
255 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
256 an innermost loop, and the phi has only a single use which is inside
257 the loop, then the rank is the block rank of the loop latch plus an
258 extra bias for the loop-carried dependence. This causes expressions
259 calculated into an accumulator variable to be independent for each
260 iteration of the loop. If STMT is some other phi, the rank is the
261 block rank of its containing block. */
263 phi_rank (gimple
*stmt
)
265 basic_block bb
= gimple_bb (stmt
);
266 class loop
*father
= bb
->loop_father
;
272 /* We only care about real loops (those with a latch). */
274 return bb_rank
[bb
->index
];
276 /* Interesting phis must be in headers of innermost loops. */
277 if (bb
!= father
->header
279 return bb_rank
[bb
->index
];
281 /* Ignore virtual SSA_NAMEs. */
282 res
= gimple_phi_result (stmt
);
283 if (virtual_operand_p (res
))
284 return bb_rank
[bb
->index
];
286 /* The phi definition must have a single use, and that use must be
287 within the loop. Otherwise this isn't an accumulator pattern. */
288 if (!single_imm_use (res
, &use
, &use_stmt
)
289 || gimple_bb (use_stmt
)->loop_father
!= father
)
290 return bb_rank
[bb
->index
];
292 /* Look for phi arguments from within the loop. If found, bias this phi. */
293 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
295 tree arg
= gimple_phi_arg_def (stmt
, i
);
296 if (TREE_CODE (arg
) == SSA_NAME
297 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
299 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
300 if (gimple_bb (def_stmt
)->loop_father
== father
)
301 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
305 /* Must be an uninteresting phi. */
306 return bb_rank
[bb
->index
];
309 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
310 loop-carried dependence of an innermost loop, return TRUE; else
313 loop_carried_phi (tree exp
)
318 if (TREE_CODE (exp
) != SSA_NAME
319 || SSA_NAME_IS_DEFAULT_DEF (exp
))
322 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
324 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
327 /* Non-loop-carried phis have block rank. Loop-carried phis have
328 an additional bias added in. If this phi doesn't have block rank,
329 it's biased and should not be propagated. */
330 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
332 if (phi_rank (phi_stmt
) != block_rank
)
338 /* Return the maximum of RANK and the rank that should be propagated
339 from expression OP. For most operands, this is just the rank of OP.
340 For loop-carried phis, the value is zero to avoid undoing the bias
341 in favor of the phi. */
343 propagate_rank (int64_t rank
, tree op
)
347 if (loop_carried_phi (op
))
350 op_rank
= get_rank (op
);
352 return MAX (rank
, op_rank
);
355 /* Look up the operand rank structure for expression E. */
357 static inline int64_t
358 find_operand_rank (tree e
)
360 int64_t *slot
= operand_rank
->get (e
);
361 return slot
? *slot
: -1;
364 /* Insert {E,RANK} into the operand rank hashtable. */
367 insert_operand_rank (tree e
, int64_t rank
)
369 gcc_assert (rank
> 0);
370 gcc_assert (!operand_rank
->put (e
, rank
));
373 /* Given an expression E, return the rank of the expression. */
378 /* SSA_NAME's have the rank of the expression they are the result
380 For globals and uninitialized values, the rank is 0.
381 For function arguments, use the pre-setup rank.
382 For PHI nodes, stores, asm statements, etc, we use the rank of
384 For simple operations, the rank is the maximum rank of any of
385 its operands, or the bb_rank, whichever is less.
386 I make no claims that this is optimal, however, it gives good
389 /* We make an exception to the normal ranking system to break
390 dependences of accumulator variables in loops. Suppose we
391 have a simple one-block loop containing:
398 As shown, each iteration of the calculation into x is fully
399 dependent upon the iteration before it. We would prefer to
400 see this in the form:
407 If the loop is unrolled, the calculations of b and c from
408 different iterations can be interleaved.
410 To obtain this result during reassociation, we bias the rank
411 of the phi definition x_1 upward, when it is recognized as an
412 accumulator pattern. The artificial rank causes it to be
413 added last, providing the desired independence. */
415 if (TREE_CODE (e
) == SSA_NAME
)
422 /* If we already have a rank for this expression, use that. */
423 rank
= find_operand_rank (e
);
427 stmt
= SSA_NAME_DEF_STMT (e
);
428 if (gimple_code (stmt
) == GIMPLE_PHI
)
429 rank
= phi_rank (stmt
);
431 else if (!is_gimple_assign (stmt
))
432 rank
= bb_rank
[gimple_bb (stmt
)->index
];
436 /* Otherwise, find the maximum rank for the operands. As an
437 exception, remove the bias from loop-carried phis when propagating
438 the rank so that dependent operations are not also biased. */
439 /* Simply walk over all SSA uses - this takes advatage of the
440 fact that non-SSA operands are is_gimple_min_invariant and
443 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
444 rank
= propagate_rank (rank
, op
);
449 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
451 fprintf (dump_file
, "Rank for ");
452 print_generic_expr (dump_file
, e
);
453 fprintf (dump_file
, " is %" PRId64
"\n", rank
);
456 /* Note the rank in the hashtable so we don't recompute it. */
457 insert_operand_rank (e
, rank
);
461 /* Constants, globals, etc., are rank 0 */
466 /* We want integer ones to end up last no matter what, since they are
467 the ones we can do the most with. */
468 #define INTEGER_CONST_TYPE 1 << 4
469 #define FLOAT_ONE_CONST_TYPE 1 << 3
470 #define FLOAT_CONST_TYPE 1 << 2
471 #define OTHER_CONST_TYPE 1 << 1
473 /* Classify an invariant tree into integer, float, or other, so that
474 we can sort them to be near other constants of the same type. */
476 constant_type (tree t
)
478 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
479 return INTEGER_CONST_TYPE
;
480 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
482 /* Sort -1.0 and 1.0 constants last, while in some cases
483 const_binop can't optimize some inexact operations, multiplication
484 by -1.0 or 1.0 can be always merged with others. */
485 if (real_onep (t
) || real_minus_onep (t
))
486 return FLOAT_ONE_CONST_TYPE
;
487 return FLOAT_CONST_TYPE
;
490 return OTHER_CONST_TYPE
;
493 /* qsort comparison function to sort operand entries PA and PB by rank
494 so that the sorted array is ordered by rank in decreasing order. */
496 sort_by_operand_rank (const void *pa
, const void *pb
)
498 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
499 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
501 if (oeb
->rank
!= oea
->rank
)
502 return oeb
->rank
> oea
->rank
? 1 : -1;
504 /* It's nicer for optimize_expression if constants that are likely
505 to fold when added/multiplied/whatever are put next to each
506 other. Since all constants have rank 0, order them by type. */
509 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
510 return constant_type (oea
->op
) - constant_type (oeb
->op
);
512 /* To make sorting result stable, we use unique IDs to determine
514 return oeb
->id
> oea
->id
? 1 : -1;
517 if (TREE_CODE (oea
->op
) != SSA_NAME
)
519 if (TREE_CODE (oeb
->op
) != SSA_NAME
)
520 return oeb
->id
> oea
->id
? 1 : -1;
524 else if (TREE_CODE (oeb
->op
) != SSA_NAME
)
527 /* Lastly, make sure the versions that are the same go next to each
529 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
531 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
532 versions of removed SSA_NAMEs, so if possible, prefer to sort
533 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
535 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
536 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
537 basic_block bba
= gimple_bb (stmta
);
538 basic_block bbb
= gimple_bb (stmtb
);
541 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
542 but the other might not. */
547 /* If neither is, compare bb_rank. */
548 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
549 return (bb_rank
[bbb
->index
] >> 16) - (bb_rank
[bba
->index
] >> 16);
552 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
553 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
557 return SSA_NAME_VERSION (oeb
->op
) > SSA_NAME_VERSION (oea
->op
) ? 1 : -1;
560 return oeb
->id
> oea
->id
? 1 : -1;
563 /* Add an operand entry to *OPS for the tree operand OP. */
566 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
, gimple
*stmt_to_insert
= NULL
)
568 operand_entry
*oe
= operand_entry_pool
.allocate ();
571 oe
->rank
= get_rank (op
);
572 oe
->id
= next_operand_entry_id
++;
574 oe
->stmt_to_insert
= stmt_to_insert
;
578 /* Add an operand entry to *OPS for the tree operand OP with repeat
582 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
583 HOST_WIDE_INT repeat
)
585 operand_entry
*oe
= operand_entry_pool
.allocate ();
588 oe
->rank
= get_rank (op
);
589 oe
->id
= next_operand_entry_id
++;
591 oe
->stmt_to_insert
= NULL
;
594 reassociate_stats
.pows_encountered
++;
597 /* Returns true if we can associate the SSA def OP. */
600 can_reassociate_op_p (tree op
)
602 if (TREE_CODE (op
) == SSA_NAME
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op
))
604 /* Make sure asm goto outputs do not participate in reassociation since
605 we have no way to find an insertion place after asm goto. */
606 if (TREE_CODE (op
) == SSA_NAME
607 && gimple_code (SSA_NAME_DEF_STMT (op
)) == GIMPLE_ASM
608 && gimple_asm_nlabels (as_a
<gasm
*> (SSA_NAME_DEF_STMT (op
))) != 0)
613 /* Returns true if we can reassociate operations of TYPE.
614 That is for integral or non-saturating fixed-point types, and for
615 floating point type when associative-math is enabled. */
618 can_reassociate_type_p (tree type
)
620 if ((ANY_INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
621 || NON_SAT_FIXED_POINT_TYPE_P (type
)
622 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
627 /* Return true if STMT is reassociable operation containing a binary
628 operation with tree code CODE, and is inside LOOP. */
631 is_reassociable_op (gimple
*stmt
, enum tree_code code
, class loop
*loop
)
633 basic_block bb
= gimple_bb (stmt
);
635 if (gimple_bb (stmt
) == NULL
)
638 if (!flow_bb_inside_loop_p (loop
, bb
))
641 if (is_gimple_assign (stmt
)
642 && gimple_assign_rhs_code (stmt
) == code
643 && has_single_use (gimple_assign_lhs (stmt
)))
645 tree rhs1
= gimple_assign_rhs1 (stmt
);
646 tree rhs2
= gimple_assign_rhs2 (stmt
);
647 if (!can_reassociate_op_p (rhs1
)
648 || (rhs2
&& !can_reassociate_op_p (rhs2
)))
657 /* Return true if STMT is a nop-conversion. */
660 gimple_nop_conversion_p (gimple
*stmt
)
662 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
664 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
665 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
666 TREE_TYPE (gimple_assign_rhs1 (ass
))))
672 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
673 operand of the negate operation. Otherwise, return NULL. */
676 get_unary_op (tree name
, enum tree_code opcode
)
678 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
680 /* Look through nop conversions (sign changes). */
681 if (gimple_nop_conversion_p (stmt
)
682 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
683 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
685 if (!is_gimple_assign (stmt
))
688 if (gimple_assign_rhs_code (stmt
) == opcode
)
689 return gimple_assign_rhs1 (stmt
);
693 /* Return true if OP1 and OP2 have the same value if casted to either type. */
696 ops_equal_values_p (tree op1
, tree op2
)
702 if (TREE_CODE (op1
) == SSA_NAME
)
704 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
705 if (gimple_nop_conversion_p (stmt
))
707 op1
= gimple_assign_rhs1 (stmt
);
713 if (TREE_CODE (op2
) == SSA_NAME
)
715 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
716 if (gimple_nop_conversion_p (stmt
))
718 op2
= gimple_assign_rhs1 (stmt
);
729 /* If CURR and LAST are a pair of ops that OPCODE allows us to
730 eliminate through equivalences, do so, remove them from OPS, and
731 return true. Otherwise, return false. */
734 eliminate_duplicate_pair (enum tree_code opcode
,
735 vec
<operand_entry
*> *ops
,
742 /* If we have two of the same op, and the opcode is & |, min, or max,
743 we can eliminate one of them.
744 If we have two of the same op, and the opcode is ^, we can
745 eliminate both of them. */
747 if (last
&& last
->op
== curr
->op
)
755 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
757 fprintf (dump_file
, "Equivalence: ");
758 print_generic_expr (dump_file
, curr
->op
);
759 fprintf (dump_file
, " [&|minmax] ");
760 print_generic_expr (dump_file
, last
->op
);
761 fprintf (dump_file
, " -> ");
762 print_generic_stmt (dump_file
, last
->op
);
765 ops
->ordered_remove (i
);
766 reassociate_stats
.ops_eliminated
++;
771 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
773 fprintf (dump_file
, "Equivalence: ");
774 print_generic_expr (dump_file
, curr
->op
);
775 fprintf (dump_file
, " ^ ");
776 print_generic_expr (dump_file
, last
->op
);
777 fprintf (dump_file
, " -> nothing\n");
780 reassociate_stats
.ops_eliminated
+= 2;
782 if (ops
->length () == 2)
785 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
790 ops
->ordered_remove (i
-1);
791 ops
->ordered_remove (i
-1);
803 static vec
<tree
> plus_negates
;
805 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
806 expression, look in OPS for a corresponding positive operation to cancel
807 it out. If we find one, remove the other from OPS, replace
808 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
812 eliminate_plus_minus_pair (enum tree_code opcode
,
813 vec
<operand_entry
*> *ops
,
814 unsigned int currindex
,
822 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
825 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
826 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
827 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
830 /* Any non-negated version will have a rank that is one less than
831 the current rank. So once we hit those ranks, if we don't find
834 for (i
= currindex
+ 1;
835 ops
->iterate (i
, &oe
)
836 && oe
->rank
>= curr
->rank
- 1 ;
840 && ops_equal_values_p (oe
->op
, negateop
))
842 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
844 fprintf (dump_file
, "Equivalence: ");
845 print_generic_expr (dump_file
, negateop
);
846 fprintf (dump_file
, " + -");
847 print_generic_expr (dump_file
, oe
->op
);
848 fprintf (dump_file
, " -> 0\n");
851 ops
->ordered_remove (i
);
852 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
853 ops
->ordered_remove (currindex
);
854 reassociate_stats
.ops_eliminated
++;
859 && ops_equal_values_p (oe
->op
, notop
))
861 tree op_type
= TREE_TYPE (oe
->op
);
863 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
865 fprintf (dump_file
, "Equivalence: ");
866 print_generic_expr (dump_file
, notop
);
867 fprintf (dump_file
, " + ~");
868 print_generic_expr (dump_file
, oe
->op
);
869 fprintf (dump_file
, " -> -1\n");
872 ops
->ordered_remove (i
);
873 add_to_ops_vec (ops
, build_all_ones_cst (op_type
));
874 ops
->ordered_remove (currindex
);
875 reassociate_stats
.ops_eliminated
++;
881 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
882 save it for later inspection in repropagate_negates(). */
883 if (negateop
!= NULL_TREE
884 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
885 plus_negates
.safe_push (curr
->op
);
890 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
891 bitwise not expression, look in OPS for a corresponding operand to
892 cancel it out. If we find one, remove the other from OPS, replace
893 OPS[CURRINDEX] with 0, and return true. Otherwise, return
897 eliminate_not_pairs (enum tree_code opcode
,
898 vec
<operand_entry
*> *ops
,
899 unsigned int currindex
,
906 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
907 || TREE_CODE (curr
->op
) != SSA_NAME
)
910 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
911 if (notop
== NULL_TREE
)
914 /* Any non-not version will have a rank that is one less than
915 the current rank. So once we hit those ranks, if we don't find
918 for (i
= currindex
+ 1;
919 ops
->iterate (i
, &oe
)
920 && oe
->rank
>= curr
->rank
- 1;
925 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
927 fprintf (dump_file
, "Equivalence: ");
928 print_generic_expr (dump_file
, notop
);
929 if (opcode
== BIT_AND_EXPR
)
930 fprintf (dump_file
, " & ~");
931 else if (opcode
== BIT_IOR_EXPR
)
932 fprintf (dump_file
, " | ~");
933 print_generic_expr (dump_file
, oe
->op
);
934 if (opcode
== BIT_AND_EXPR
)
935 fprintf (dump_file
, " -> 0\n");
936 else if (opcode
== BIT_IOR_EXPR
)
937 fprintf (dump_file
, " -> -1\n");
940 if (opcode
== BIT_AND_EXPR
)
941 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
942 else if (opcode
== BIT_IOR_EXPR
)
943 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
945 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
947 ops
->quick_push (oe
);
955 /* Use constant value that may be present in OPS to try to eliminate
956 operands. Note that this function is only really used when we've
957 eliminated ops for other reasons, or merged constants. Across
958 single statements, fold already does all of this, plus more. There
959 is little point in duplicating logic, so I've only included the
960 identities that I could ever construct testcases to trigger. */
963 eliminate_using_constants (enum tree_code opcode
,
964 vec
<operand_entry
*> *ops
)
966 operand_entry
*oelast
= ops
->last ();
967 tree type
= TREE_TYPE (oelast
->op
);
969 if (oelast
->rank
== 0
970 && (ANY_INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
975 if (integer_zerop (oelast
->op
))
977 if (ops
->length () != 1)
979 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
980 fprintf (dump_file
, "Found & 0, removing all other ops\n");
982 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
985 ops
->quick_push (oelast
);
989 else if (integer_all_onesp (oelast
->op
))
991 if (ops
->length () != 1)
993 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
994 fprintf (dump_file
, "Found & -1, removing\n");
996 reassociate_stats
.ops_eliminated
++;
1001 if (integer_all_onesp (oelast
->op
))
1003 if (ops
->length () != 1)
1005 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1006 fprintf (dump_file
, "Found | -1, removing all other ops\n");
1008 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1011 ops
->quick_push (oelast
);
1015 else if (integer_zerop (oelast
->op
))
1017 if (ops
->length () != 1)
1019 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1020 fprintf (dump_file
, "Found | 0, removing\n");
1022 reassociate_stats
.ops_eliminated
++;
1027 if (integer_zerop (oelast
->op
)
1028 || (FLOAT_TYPE_P (type
)
1029 && !HONOR_NANS (type
)
1030 && !HONOR_SIGNED_ZEROS (type
)
1031 && real_zerop (oelast
->op
)))
1033 if (ops
->length () != 1)
1035 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1036 fprintf (dump_file
, "Found * 0, removing all other ops\n");
1038 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1040 ops
->quick_push (oelast
);
1044 else if (integer_onep (oelast
->op
)
1045 || (FLOAT_TYPE_P (type
)
1046 && !HONOR_SNANS (type
)
1047 && real_onep (oelast
->op
)))
1049 if (ops
->length () != 1)
1051 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1052 fprintf (dump_file
, "Found * 1, removing\n");
1054 reassociate_stats
.ops_eliminated
++;
1062 if (integer_zerop (oelast
->op
)
1063 || (FLOAT_TYPE_P (type
)
1064 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1065 && fold_real_zero_addition_p (type
, 0, oelast
->op
,
1066 opcode
== MINUS_EXPR
)))
1068 if (ops
->length () != 1)
1070 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1071 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1073 reassociate_stats
.ops_eliminated
++;
1085 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1088 /* Structure for tracking and counting operands. */
1092 enum tree_code oecode
;
1097 /* The heap for the oecount hashtable and the sorted list of operands. */
1098 static vec
<oecount
> cvec
;
1101 /* Oecount hashtable helpers. */
1103 struct oecount_hasher
: int_hash
<int, 0, 1>
1105 static inline hashval_t
hash (int);
1106 static inline bool equal (int, int);
1109 /* Hash function for oecount. */
1112 oecount_hasher::hash (int p
)
1114 const oecount
*c
= &cvec
[p
- 42];
1115 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1118 /* Comparison function for oecount. */
1121 oecount_hasher::equal (int p1
, int p2
)
1123 const oecount
*c1
= &cvec
[p1
- 42];
1124 const oecount
*c2
= &cvec
[p2
- 42];
1125 return c1
->oecode
== c2
->oecode
&& c1
->op
== c2
->op
;
1128 /* Comparison function for qsort sorting oecount elements by count. */
1131 oecount_cmp (const void *p1
, const void *p2
)
1133 const oecount
*c1
= (const oecount
*)p1
;
1134 const oecount
*c2
= (const oecount
*)p2
;
1135 if (c1
->cnt
!= c2
->cnt
)
1136 return c1
->cnt
> c2
->cnt
? 1 : -1;
1138 /* If counts are identical, use unique IDs to stabilize qsort. */
1139 return c1
->id
> c2
->id
? 1 : -1;
1142 /* Return TRUE iff STMT represents a builtin call that raises OP
1143 to some exponent. */
1146 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1148 if (!is_gimple_call (stmt
))
1151 switch (gimple_call_combined_fn (stmt
))
1155 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1162 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1163 in place and return the result. Assumes that stmt_is_power_of_op
1164 was previously called for STMT and returned TRUE. */
1166 static HOST_WIDE_INT
1167 decrement_power (gimple
*stmt
)
1169 REAL_VALUE_TYPE c
, cint
;
1170 HOST_WIDE_INT power
;
1173 switch (gimple_call_combined_fn (stmt
))
1176 arg1
= gimple_call_arg (stmt
, 1);
1177 c
= TREE_REAL_CST (arg1
);
1178 power
= real_to_integer (&c
) - 1;
1179 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1180 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1184 arg1
= gimple_call_arg (stmt
, 1);
1185 power
= TREE_INT_CST_LOW (arg1
) - 1;
1186 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1194 /* Replace SSA defined by STMT and replace all its uses with new
1195 SSA. Also return the new SSA. */
1198 make_new_ssa_for_def (gimple
*stmt
, enum tree_code opcode
, tree op
)
1202 imm_use_iterator iter
;
1203 tree new_lhs
, new_debug_lhs
= NULL_TREE
;
1204 tree lhs
= gimple_get_lhs (stmt
);
1206 new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
1207 gimple_set_lhs (stmt
, new_lhs
);
1209 /* Also need to update GIMPLE_DEBUGs. */
1210 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
1212 tree repl
= new_lhs
;
1213 if (is_gimple_debug (use_stmt
))
1215 if (new_debug_lhs
== NULL_TREE
)
1217 new_debug_lhs
= make_node (DEBUG_EXPR_DECL
);
1219 = gimple_build_debug_bind (new_debug_lhs
,
1220 build2 (opcode
, TREE_TYPE (lhs
),
1223 DECL_ARTIFICIAL (new_debug_lhs
) = 1;
1224 TREE_TYPE (new_debug_lhs
) = TREE_TYPE (lhs
);
1225 SET_DECL_MODE (new_debug_lhs
, TYPE_MODE (TREE_TYPE (lhs
)));
1226 gimple_set_uid (def_temp
, gimple_uid (stmt
));
1227 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1228 gsi_insert_after (&gsi
, def_temp
, GSI_SAME_STMT
);
1230 repl
= new_debug_lhs
;
1232 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1233 SET_USE (use
, repl
);
1234 update_stmt (use_stmt
);
1239 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1240 uses with new SSAs. Also do this for the stmt that defines DEF
1241 if *DEF is not OP. */
1244 make_new_ssa_for_all_defs (tree
*def
, enum tree_code opcode
, tree op
,
1245 vec
<gimple
*> &stmts_to_fix
)
1251 && TREE_CODE (*def
) == SSA_NAME
1252 && (stmt
= SSA_NAME_DEF_STMT (*def
))
1253 && gimple_code (stmt
) != GIMPLE_NOP
)
1254 *def
= make_new_ssa_for_def (stmt
, opcode
, op
);
1256 FOR_EACH_VEC_ELT (stmts_to_fix
, i
, stmt
)
1257 make_new_ssa_for_def (stmt
, opcode
, op
);
1260 /* Find the single immediate use of STMT's LHS, and replace it
1261 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1262 replace *DEF with OP as well. */
1265 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1270 gimple_stmt_iterator gsi
;
1272 if (is_gimple_call (stmt
))
1273 lhs
= gimple_call_lhs (stmt
);
1275 lhs
= gimple_assign_lhs (stmt
);
1277 gcc_assert (has_single_use (lhs
));
1278 single_imm_use (lhs
, &use
, &use_stmt
);
1282 if (TREE_CODE (op
) != SSA_NAME
)
1283 update_stmt (use_stmt
);
1284 gsi
= gsi_for_stmt (stmt
);
1285 unlink_stmt_vdef (stmt
);
1286 reassoc_remove_stmt (&gsi
);
1287 release_defs (stmt
);
1290 /* Walks the linear chain with result *DEF searching for an operation
1291 with operand OP and code OPCODE removing that from the chain. *DEF
1292 is updated if there is only one operand but no operation left. */
1295 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1297 tree orig_def
= *def
;
1298 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1299 /* PR72835 - Record the stmt chain that has to be updated such that
1300 we dont use the same LHS when the values computed are different. */
1301 auto_vec
<gimple
*, 64> stmts_to_fix
;
1307 if (opcode
== MULT_EXPR
)
1309 if (stmt_is_power_of_op (stmt
, op
))
1311 if (decrement_power (stmt
) == 1)
1313 if (stmts_to_fix
.length () > 0)
1314 stmts_to_fix
.pop ();
1315 propagate_op_to_single_use (op
, stmt
, def
);
1319 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
)
1321 if (gimple_assign_rhs1 (stmt
) == op
)
1323 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1324 if (stmts_to_fix
.length () > 0)
1325 stmts_to_fix
.pop ();
1326 propagate_op_to_single_use (cst
, stmt
, def
);
1329 else if (integer_minus_onep (op
)
1330 || real_minus_onep (op
))
1332 gimple_assign_set_rhs_code
1333 (stmt
, TREE_CODE (gimple_assign_rhs1 (stmt
)));
1339 name
= gimple_assign_rhs1 (stmt
);
1341 /* If this is the operation we look for and one of the operands
1342 is ours simply propagate the other operand into the stmts
1344 if (gimple_assign_rhs_code (stmt
) == opcode
1346 || gimple_assign_rhs2 (stmt
) == op
))
1349 name
= gimple_assign_rhs2 (stmt
);
1350 if (stmts_to_fix
.length () > 0)
1351 stmts_to_fix
.pop ();
1352 propagate_op_to_single_use (name
, stmt
, def
);
1356 /* We might have a multiply of two __builtin_pow* calls, and
1357 the operand might be hiding in the rightmost one. Likewise
1358 this can happen for a negate. */
1359 if (opcode
== MULT_EXPR
1360 && gimple_assign_rhs_code (stmt
) == opcode
1361 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1362 && has_single_use (gimple_assign_rhs2 (stmt
)))
1364 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1365 if (stmt_is_power_of_op (stmt2
, op
))
1367 if (decrement_power (stmt2
) == 1)
1368 propagate_op_to_single_use (op
, stmt2
, def
);
1370 stmts_to_fix
.safe_push (stmt2
);
1373 else if (is_gimple_assign (stmt2
)
1374 && gimple_assign_rhs_code (stmt2
) == NEGATE_EXPR
)
1376 if (gimple_assign_rhs1 (stmt2
) == op
)
1378 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1379 propagate_op_to_single_use (cst
, stmt2
, def
);
1382 else if (integer_minus_onep (op
)
1383 || real_minus_onep (op
))
1385 stmts_to_fix
.safe_push (stmt2
);
1386 gimple_assign_set_rhs_code
1387 (stmt2
, TREE_CODE (gimple_assign_rhs1 (stmt2
)));
1393 /* Continue walking the chain. */
1394 gcc_assert (name
!= op
1395 && TREE_CODE (name
) == SSA_NAME
);
1396 stmt
= SSA_NAME_DEF_STMT (name
);
1397 stmts_to_fix
.safe_push (stmt
);
1401 if (stmts_to_fix
.length () > 0 || *def
== orig_def
)
1402 make_new_ssa_for_all_defs (def
, opcode
, op
, stmts_to_fix
);
1405 /* Returns true if statement S1 dominates statement S2. Like
1406 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1409 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1411 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1413 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1414 SSA_NAME. Assume it lives at the beginning of function and
1415 thus dominates everything. */
1416 if (!bb1
|| s1
== s2
)
1419 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1425 /* PHIs in the same basic block are assumed to be
1426 executed all in parallel, if only one stmt is a PHI,
1427 it dominates the other stmt in the same basic block. */
1428 if (gimple_code (s1
) == GIMPLE_PHI
)
1431 if (gimple_code (s2
) == GIMPLE_PHI
)
1434 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1436 if (gimple_uid (s1
) < gimple_uid (s2
))
1439 if (gimple_uid (s1
) > gimple_uid (s2
))
1442 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1443 unsigned int uid
= gimple_uid (s1
);
1444 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1446 gimple
*s
= gsi_stmt (gsi
);
1447 if (gimple_uid (s
) != uid
)
1456 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1459 /* Insert STMT after INSERT_POINT. */
1462 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1464 gimple_stmt_iterator gsi
;
1467 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1468 bb
= gimple_bb (insert_point
);
1469 else if (!stmt_ends_bb_p (insert_point
))
1471 gsi
= gsi_for_stmt (insert_point
);
1472 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1473 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1476 else if (gimple_code (insert_point
) == GIMPLE_ASM
)
1477 /* We have no idea where to insert - it depends on where the
1478 uses will be placed. */
1481 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1482 thus if it must end a basic block, it should be a call that can
1483 throw, or some assignment that can throw. If it throws, the LHS
1484 of it will not be initialized though, so only valid places using
1485 the SSA_NAME should be dominated by the fallthru edge. */
1486 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1487 gsi
= gsi_after_labels (bb
);
1488 if (gsi_end_p (gsi
))
1490 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1491 gimple_set_uid (stmt
,
1492 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1495 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1496 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1499 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1500 the result. Places the statement after the definition of either
1501 OP1 or OP2. Returns the new statement. */
1504 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1506 gimple
*op1def
= NULL
, *op2def
= NULL
;
1507 gimple_stmt_iterator gsi
;
1511 /* Create the addition statement. */
1512 op
= make_ssa_name (type
);
1513 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1515 /* Find an insertion place and insert. */
1516 if (TREE_CODE (op1
) == SSA_NAME
)
1517 op1def
= SSA_NAME_DEF_STMT (op1
);
1518 if (TREE_CODE (op2
) == SSA_NAME
)
1519 op2def
= SSA_NAME_DEF_STMT (op2
);
1520 if ((!op1def
|| gimple_nop_p (op1def
))
1521 && (!op2def
|| gimple_nop_p (op2def
)))
1523 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1524 if (gsi_end_p (gsi
))
1526 gimple_stmt_iterator gsi2
1527 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1528 gimple_set_uid (sum
,
1529 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1532 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1533 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1537 gimple
*insert_point
;
1538 if ((!op1def
|| gimple_nop_p (op1def
))
1539 || (op2def
&& !gimple_nop_p (op2def
)
1540 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1541 insert_point
= op2def
;
1543 insert_point
= op1def
;
1544 insert_stmt_after (sum
, insert_point
);
1551 /* Perform un-distribution of divisions and multiplications.
1552 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1553 to (A + B) / X for real X.
1555 The algorithm is organized as follows.
1557 - First we walk the addition chain *OPS looking for summands that
1558 are defined by a multiplication or a real division. This results
1559 in the candidates bitmap with relevant indices into *OPS.
1561 - Second we build the chains of multiplications or divisions for
1562 these candidates, counting the number of occurrences of (operand, code)
1563 pairs in all of the candidates chains.
1565 - Third we sort the (operand, code) pairs by number of occurrence and
1566 process them starting with the pair with the most uses.
1568 * For each such pair we walk the candidates again to build a
1569 second candidate bitmap noting all multiplication/division chains
1570 that have at least one occurrence of (operand, code).
1572 * We build an alternate addition chain only covering these
1573 candidates with one (operand, code) operation removed from their
1574 multiplication/division chain.
1576 * The first candidate gets replaced by the alternate addition chain
1577 multiplied/divided by the operand.
1579 * All candidate chains get disabled for further processing and
1580 processing of (operand, code) pairs continues.
1582 The alternate addition chains built are re-processed by the main
1583 reassociation algorithm which allows optimizing a * x * y + b * y * x
1584 to (a + b ) * x * y in one invocation of the reassociation pass. */
1587 undistribute_ops_list (enum tree_code opcode
,
1588 vec
<operand_entry
*> *ops
, class loop
*loop
)
1590 unsigned int length
= ops
->length ();
1593 unsigned nr_candidates
, nr_candidates2
;
1594 sbitmap_iterator sbi0
;
1595 vec
<operand_entry
*> *subops
;
1596 bool changed
= false;
1597 unsigned int next_oecount_id
= 0;
1600 || opcode
!= PLUS_EXPR
)
1603 /* Build a list of candidates to process. */
1604 auto_sbitmap
candidates (length
);
1605 bitmap_clear (candidates
);
1607 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1609 enum tree_code dcode
;
1612 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1614 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1615 if (!is_gimple_assign (oe1def
))
1617 dcode
= gimple_assign_rhs_code (oe1def
);
1618 if ((dcode
!= MULT_EXPR
1619 && dcode
!= RDIV_EXPR
)
1620 || !is_reassociable_op (oe1def
, dcode
, loop
))
1623 bitmap_set_bit (candidates
, i
);
1627 if (nr_candidates
< 2)
1630 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1632 fprintf (dump_file
, "searching for un-distribute opportunities ");
1633 print_generic_expr (dump_file
,
1634 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, TDF_NONE
);
1635 fprintf (dump_file
, " %d\n", nr_candidates
);
1638 /* Build linearized sub-operand lists and the counting table. */
1641 hash_table
<oecount_hasher
> ctable (15);
1643 /* ??? Macro arguments cannot have multi-argument template types in
1644 them. This typedef is needed to workaround that limitation. */
1645 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1646 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1647 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1650 enum tree_code oecode
;
1653 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1654 oecode
= gimple_assign_rhs_code (oedef
);
1655 linearize_expr_tree (&subops
[i
], oedef
,
1656 associative_tree_code (oecode
), false);
1658 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1665 c
.id
= next_oecount_id
++;
1668 idx
= cvec
.length () + 41;
1669 slot
= ctable
.find_slot (idx
, INSERT
);
1677 cvec
[*slot
- 42].cnt
++;
1682 /* Sort the counting table. */
1683 cvec
.qsort (oecount_cmp
);
1685 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1688 fprintf (dump_file
, "Candidates:\n");
1689 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1691 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1692 c
->oecode
== MULT_EXPR
1693 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1694 print_generic_expr (dump_file
, c
->op
);
1695 fprintf (dump_file
, "\n");
1699 /* Process the (operand, code) pairs in order of most occurrence. */
1700 auto_sbitmap
candidates2 (length
);
1701 while (!cvec
.is_empty ())
1703 oecount
*c
= &cvec
.last ();
1707 /* Now collect the operands in the outer chain that contain
1708 the common operand in their inner chain. */
1709 bitmap_clear (candidates2
);
1711 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1714 enum tree_code oecode
;
1716 tree op
= (*ops
)[i
]->op
;
1718 /* If we undistributed in this chain already this may be
1720 if (TREE_CODE (op
) != SSA_NAME
)
1723 oedef
= SSA_NAME_DEF_STMT (op
);
1724 oecode
= gimple_assign_rhs_code (oedef
);
1725 if (oecode
!= c
->oecode
)
1728 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1730 if (oe1
->op
== c
->op
)
1732 bitmap_set_bit (candidates2
, i
);
1739 if (nr_candidates2
>= 2)
1741 operand_entry
*oe1
, *oe2
;
1743 int first
= bitmap_first_set_bit (candidates2
);
1745 /* Build the new addition chain. */
1746 oe1
= (*ops
)[first
];
1747 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1749 fprintf (dump_file
, "Building (");
1750 print_generic_expr (dump_file
, oe1
->op
);
1752 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1753 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1757 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1759 fprintf (dump_file
, " + ");
1760 print_generic_expr (dump_file
, oe2
->op
);
1762 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1763 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1764 oe1
->op
, oe2
->op
, opcode
);
1765 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1767 oe1
->op
= gimple_get_lhs (sum
);
1770 /* Apply the multiplication/division. */
1771 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1772 oe1
->op
, c
->op
, c
->oecode
);
1773 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1775 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1776 print_generic_expr (dump_file
, c
->op
);
1777 fprintf (dump_file
, "\n");
1780 /* Record it in the addition chain and disable further
1781 undistribution with this op. */
1782 oe1
->op
= gimple_assign_lhs (prod
);
1783 oe1
->rank
= get_rank (oe1
->op
);
1784 subops
[first
].release ();
1792 for (i
= 0; i
< ops
->length (); ++i
)
1793 subops
[i
].release ();
1800 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1801 first: element index for each relevant BIT_FIELD_REF.
1802 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1803 typedef std::pair
<unsigned, unsigned> v_info_elem
;
1806 auto_vec
<v_info_elem
, 32> vec
;
1808 typedef v_info
*v_info_ptr
;
1810 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1812 sort_by_mach_mode (const void *p_i
, const void *p_j
)
1814 const tree tr1
= *((const tree
*) p_i
);
1815 const tree tr2
= *((const tree
*) p_j
);
1816 unsigned int mode1
= TYPE_MODE (TREE_TYPE (tr1
));
1817 unsigned int mode2
= TYPE_MODE (TREE_TYPE (tr2
));
1820 else if (mode1
< mode2
)
1822 if (SSA_NAME_VERSION (tr1
) < SSA_NAME_VERSION (tr2
))
1824 else if (SSA_NAME_VERSION (tr1
) > SSA_NAME_VERSION (tr2
))
1829 /* Cleanup hash map for VECTOR information. */
1831 cleanup_vinfo_map (hash_map
<tree
, v_info_ptr
> &info_map
)
1833 for (hash_map
<tree
, v_info_ptr
>::iterator it
= info_map
.begin ();
1834 it
!= info_map
.end (); ++it
)
1836 v_info_ptr info
= (*it
).second
;
1838 (*it
).second
= NULL
;
1842 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1843 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1845 Vs = (V1 + V2 + ... + Vn)
1846 Vs[0] + Vs[1] + ... + Vs[k]
1848 The basic steps are listed below:
1850 1) Check the addition chain *OPS by looking those summands coming from
1851 VECTOR bit_field_ref on VECTOR type. Put the information into
1852 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1854 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1855 continuous, they can cover the whole VECTOR perfectly without any holes.
1856 Obtain one VECTOR list which contain candidates to be transformed.
1858 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1859 candidates with same mode, build the addition statements for them and
1860 generate BIT_FIELD_REFs accordingly.
1863 The current implementation requires the whole VECTORs should be fully
1864 covered, but it can be extended to support partial, checking adjacent
1865 but not fill the whole, it may need some cost model to define the
1866 boundary to do or not.
1869 undistribute_bitref_for_vector (enum tree_code opcode
,
1870 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1872 if (ops
->length () <= 1)
1875 if (opcode
!= PLUS_EXPR
1876 && opcode
!= MULT_EXPR
1877 && opcode
!= BIT_XOR_EXPR
1878 && opcode
!= BIT_IOR_EXPR
1879 && opcode
!= BIT_AND_EXPR
)
1882 hash_map
<tree
, v_info_ptr
> v_info_map
;
1886 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1887 information into map. */
1888 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1890 enum tree_code dcode
;
1893 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1895 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1896 if (!is_gimple_assign (oe1def
))
1898 dcode
= gimple_assign_rhs_code (oe1def
);
1899 if (dcode
!= BIT_FIELD_REF
|| !is_reassociable_op (oe1def
, dcode
, loop
))
1902 tree rhs
= gimple_assign_rhs1 (oe1def
);
1903 tree vec
= TREE_OPERAND (rhs
, 0);
1904 tree vec_type
= TREE_TYPE (vec
);
1906 if (TREE_CODE (vec
) != SSA_NAME
|| !VECTOR_TYPE_P (vec_type
))
1909 /* Ignore it if target machine can't support this VECTOR type. */
1910 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1913 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1914 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
1917 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
1918 || !is_a
<scalar_mode
> (TYPE_MODE (TREE_TYPE (rhs
))))
1921 /* The type of BIT_FIELD_REF might not be equal to the element type of
1922 the vector. We want to use a vector type with element type the
1923 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1924 if (!useless_type_conversion_p (TREE_TYPE (rhs
), TREE_TYPE (vec_type
)))
1926 machine_mode simd_mode
;
1927 unsigned HOST_WIDE_INT size
, nunits
;
1928 unsigned HOST_WIDE_INT elem_size
1929 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs
)));
1930 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type
)).is_constant (&size
))
1932 if (size
<= elem_size
|| (size
% elem_size
) != 0)
1934 nunits
= size
/ elem_size
;
1935 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs
)),
1936 nunits
).exists (&simd_mode
))
1938 vec_type
= build_vector_type_for_mode (TREE_TYPE (rhs
), simd_mode
);
1940 /* Ignore it if target machine can't support this VECTOR type. */
1941 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1944 /* Check const vector type, constrain BIT_FIELD_REF offset and
1946 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
1949 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type
)),
1950 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec
)))))
1954 tree elem_type
= TREE_TYPE (vec_type
);
1955 unsigned HOST_WIDE_INT elem_size
= tree_to_uhwi (TYPE_SIZE (elem_type
));
1956 if (maybe_ne (bit_field_size (rhs
), elem_size
))
1960 if (!constant_multiple_p (bit_field_offset (rhs
), elem_size
, &idx
))
1963 /* Ignore it if target machine can't support this type of VECTOR
1965 optab op_tab
= optab_for_tree_code (opcode
, vec_type
, optab_vector
);
1966 if (optab_handler (op_tab
, TYPE_MODE (vec_type
)) == CODE_FOR_nothing
)
1970 v_info_ptr
&info
= v_info_map
.get_or_insert (vec
, &existed
);
1974 info
->vec_type
= vec_type
;
1976 else if (!types_compatible_p (vec_type
, info
->vec_type
))
1978 info
->vec
.safe_push (std::make_pair (idx
, i
));
1981 /* At least two VECTOR to combine. */
1982 if (v_info_map
.elements () <= 1)
1984 cleanup_vinfo_map (v_info_map
);
1988 /* Verify all VECTOR candidates by checking two conditions:
1989 1) sorted offsets are adjacent, no holes.
1990 2) can fill the whole VECTOR perfectly.
1991 And add the valid candidates to a vector for further handling. */
1992 auto_vec
<tree
> valid_vecs (v_info_map
.elements ());
1993 for (hash_map
<tree
, v_info_ptr
>::iterator it
= v_info_map
.begin ();
1994 it
!= v_info_map
.end (); ++it
)
1996 tree cand_vec
= (*it
).first
;
1997 v_info_ptr cand_info
= (*it
).second
;
1998 unsigned int num_elems
1999 = TYPE_VECTOR_SUBPARTS (cand_info
->vec_type
).to_constant ();
2000 if (cand_info
->vec
.length () != num_elems
)
2002 sbitmap holes
= sbitmap_alloc (num_elems
);
2003 bitmap_ones (holes
);
2006 FOR_EACH_VEC_ELT (cand_info
->vec
, i
, curr
)
2008 if (!bitmap_bit_p (holes
, curr
->first
))
2014 bitmap_clear_bit (holes
, curr
->first
);
2016 if (valid
&& bitmap_empty_p (holes
))
2017 valid_vecs
.quick_push (cand_vec
);
2018 sbitmap_free (holes
);
2021 /* At least two VECTOR to combine. */
2022 if (valid_vecs
.length () <= 1)
2024 cleanup_vinfo_map (v_info_map
);
2028 valid_vecs
.qsort (sort_by_mach_mode
);
2029 /* Go through all candidates by machine mode order, query the mode_to_total
2030 to get the total number for each mode and skip the single one. */
2031 for (unsigned i
= 0; i
< valid_vecs
.length () - 1; ++i
)
2033 tree tvec
= valid_vecs
[i
];
2034 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (tvec
));
2036 /* Skip modes with only a single candidate. */
2037 if (TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) != mode
)
2040 unsigned int idx
, j
;
2042 tree sum_vec
= tvec
;
2043 v_info_ptr info_ptr
= *(v_info_map
.get (tvec
));
2045 tree vec_type
= info_ptr
->vec_type
;
2047 /* Build the sum for all candidates with same mode. */
2050 sum
= build_and_add_sum (vec_type
, sum_vec
,
2051 valid_vecs
[i
+ 1], opcode
);
2052 if (!useless_type_conversion_p (vec_type
,
2053 TREE_TYPE (valid_vecs
[i
+ 1])))
2055 /* Update the operands only after build_and_add_sum,
2056 so that we don't have to repeat the placement algorithm
2057 of build_and_add_sum. */
2058 gimple_stmt_iterator gsi
= gsi_for_stmt (sum
);
2059 tree vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
,
2061 tree lhs
= make_ssa_name (vec_type
);
2062 gimple
*g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2063 gimple_set_uid (g
, gimple_uid (sum
));
2064 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2065 gimple_assign_set_rhs2 (sum
, lhs
);
2066 if (sum_vec
== tvec
)
2068 vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
, sum_vec
);
2069 lhs
= make_ssa_name (vec_type
);
2070 g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2071 gimple_set_uid (g
, gimple_uid (sum
));
2072 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2073 gimple_assign_set_rhs1 (sum
, lhs
);
2077 sum_vec
= gimple_get_lhs (sum
);
2078 info_ptr
= *(v_info_map
.get (valid_vecs
[i
+ 1]));
2079 gcc_assert (types_compatible_p (vec_type
, info_ptr
->vec_type
));
2080 /* Update those related ops of current candidate VECTOR. */
2081 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2084 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2085 /* Set this then op definition will get DCEd later. */
2086 gimple_set_visited (def
, true);
2087 if (opcode
== PLUS_EXPR
2088 || opcode
== BIT_XOR_EXPR
2089 || opcode
== BIT_IOR_EXPR
)
2090 (*ops
)[idx
]->op
= build_zero_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2091 else if (opcode
== MULT_EXPR
)
2092 (*ops
)[idx
]->op
= build_one_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2095 gcc_assert (opcode
== BIT_AND_EXPR
);
2097 = build_all_ones_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2099 (*ops
)[idx
]->rank
= 0;
2101 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2103 fprintf (dump_file
, "Generating addition -> ");
2104 print_gimple_stmt (dump_file
, sum
, 0);
2108 while ((i
< valid_vecs
.length () - 1)
2109 && TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) == mode
);
2111 /* Referring to first valid VECTOR with this mode, generate the
2112 BIT_FIELD_REF statements accordingly. */
2113 info_ptr
= *(v_info_map
.get (tvec
));
2115 tree elem_type
= TREE_TYPE (vec_type
);
2116 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2119 tree dst
= make_ssa_name (elem_type
);
2120 tree pos
= bitsize_int (elem
->first
2121 * tree_to_uhwi (TYPE_SIZE (elem_type
)));
2122 tree bfr
= build3 (BIT_FIELD_REF
, elem_type
, sum_vec
,
2123 TYPE_SIZE (elem_type
), pos
);
2124 gimple
*gs
= gimple_build_assign (dst
, BIT_FIELD_REF
, bfr
);
2125 insert_stmt_after (gs
, sum
);
2126 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2127 /* Set this then op definition will get DCEd later. */
2128 gimple_set_visited (def
, true);
2129 (*ops
)[idx
]->op
= gimple_assign_lhs (gs
);
2130 (*ops
)[idx
]->rank
= get_rank ((*ops
)[idx
]->op
);
2131 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2133 fprintf (dump_file
, "Generating bit_field_ref -> ");
2134 print_gimple_stmt (dump_file
, gs
, 0);
2139 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2140 fprintf (dump_file
, "undistributiong bit_field_ref for vector done.\n");
2142 cleanup_vinfo_map (v_info_map
);
2147 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2148 expression, examine the other OPS to see if any of them are comparisons
2149 of the same values, which we may be able to combine or eliminate.
2150 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2153 eliminate_redundant_comparison (enum tree_code opcode
,
2154 vec
<operand_entry
*> *ops
,
2155 unsigned int currindex
,
2156 operand_entry
*curr
)
2159 enum tree_code lcode
, rcode
;
2160 gimple
*def1
, *def2
;
2164 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
2167 /* Check that CURR is a comparison. */
2168 if (TREE_CODE (curr
->op
) != SSA_NAME
)
2170 def1
= SSA_NAME_DEF_STMT (curr
->op
);
2171 if (!is_gimple_assign (def1
))
2173 lcode
= gimple_assign_rhs_code (def1
);
2174 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
2176 op1
= gimple_assign_rhs1 (def1
);
2177 op2
= gimple_assign_rhs2 (def1
);
2179 /* Now look for a similar comparison in the remaining OPS. */
2180 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
2184 if (TREE_CODE (oe
->op
) != SSA_NAME
)
2186 def2
= SSA_NAME_DEF_STMT (oe
->op
);
2187 if (!is_gimple_assign (def2
))
2189 rcode
= gimple_assign_rhs_code (def2
);
2190 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
2193 /* If we got here, we have a match. See if we can combine the
2195 tree type
= TREE_TYPE (gimple_assign_lhs (def1
));
2196 if (opcode
== BIT_IOR_EXPR
)
2197 t
= maybe_fold_or_comparisons (type
,
2199 rcode
, gimple_assign_rhs1 (def2
),
2200 gimple_assign_rhs2 (def2
));
2202 t
= maybe_fold_and_comparisons (type
,
2204 rcode
, gimple_assign_rhs1 (def2
),
2205 gimple_assign_rhs2 (def2
));
2209 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2210 always give us a boolean_type_node value back. If the original
2211 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2212 we need to convert. */
2213 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
2214 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
2216 if (TREE_CODE (t
) != INTEGER_CST
2217 && !operand_equal_p (t
, curr
->op
, 0))
2219 enum tree_code subcode
;
2220 tree newop1
, newop2
;
2221 if (!COMPARISON_CLASS_P (t
))
2223 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2224 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2225 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2226 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
2230 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2232 fprintf (dump_file
, "Equivalence: ");
2233 print_generic_expr (dump_file
, curr
->op
);
2234 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
2235 print_generic_expr (dump_file
, oe
->op
);
2236 fprintf (dump_file
, " -> ");
2237 print_generic_expr (dump_file
, t
);
2238 fprintf (dump_file
, "\n");
2241 /* Now we can delete oe, as it has been subsumed by the new combined
2243 ops
->ordered_remove (i
);
2244 reassociate_stats
.ops_eliminated
++;
2246 /* If t is the same as curr->op, we're done. Otherwise we must
2247 replace curr->op with t. Special case is if we got a constant
2248 back, in which case we add it to the end instead of in place of
2249 the current entry. */
2250 if (TREE_CODE (t
) == INTEGER_CST
)
2252 ops
->ordered_remove (currindex
);
2253 add_to_ops_vec (ops
, t
);
2255 else if (!operand_equal_p (t
, curr
->op
, 0))
2258 enum tree_code subcode
;
2261 gcc_assert (COMPARISON_CLASS_P (t
));
2262 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2263 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2264 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2265 gcc_checking_assert (is_gimple_val (newop1
)
2266 && is_gimple_val (newop2
));
2267 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
2268 curr
->op
= gimple_get_lhs (sum
);
2277 /* Transform repeated addition of same values into multiply with
2280 transform_add_to_multiply (vec
<operand_entry
*> *ops
)
2283 tree op
= NULL_TREE
;
2285 int i
, start
= -1, end
= 0, count
= 0;
2286 auto_vec
<std::pair
<int, int> > indxs
;
2287 bool changed
= false;
2289 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2290 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2291 || !flag_unsafe_math_optimizations
))
2294 /* Look for repeated operands. */
2295 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2303 else if (operand_equal_p (oe
->op
, op
, 0))
2311 indxs
.safe_push (std::make_pair (start
, end
));
2319 indxs
.safe_push (std::make_pair (start
, end
));
2321 for (j
= indxs
.length () - 1; j
>= 0; --j
)
2323 /* Convert repeated operand addition to multiplication. */
2324 start
= indxs
[j
].first
;
2325 end
= indxs
[j
].second
;
2326 op
= (*ops
)[start
]->op
;
2327 count
= end
- start
+ 1;
2328 for (i
= end
; i
>= start
; --i
)
2329 ops
->unordered_remove (i
);
2330 tree tmp
= make_ssa_name (TREE_TYPE (op
));
2331 tree cst
= build_int_cst (integer_type_node
, count
);
2333 = gimple_build_assign (tmp
, MULT_EXPR
,
2334 op
, fold_convert (TREE_TYPE (op
), cst
));
2335 gimple_set_visited (mul_stmt
, true);
2336 add_to_ops_vec (ops
, tmp
, mul_stmt
);
2344 /* Perform various identities and other optimizations on the list of
2345 operand entries, stored in OPS. The tree code for the binary
2346 operation between all the operands is OPCODE. */
2349 optimize_ops_list (enum tree_code opcode
,
2350 vec
<operand_entry
*> *ops
)
2352 unsigned int length
= ops
->length ();
2355 operand_entry
*oelast
= NULL
;
2356 bool iterate
= false;
2361 oelast
= ops
->last ();
2363 /* If the last two are constants, pop the constants off, merge them
2364 and try the next two. */
2365 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
2367 operand_entry
*oelm1
= (*ops
)[length
- 2];
2369 if (oelm1
->rank
== 0
2370 && is_gimple_min_invariant (oelm1
->op
)
2371 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
2372 TREE_TYPE (oelast
->op
)))
2374 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
2375 oelm1
->op
, oelast
->op
);
2377 if (folded
&& is_gimple_min_invariant (folded
))
2379 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2380 fprintf (dump_file
, "Merging constants\n");
2385 add_to_ops_vec (ops
, folded
);
2386 reassociate_stats
.constants_eliminated
++;
2388 optimize_ops_list (opcode
, ops
);
2394 eliminate_using_constants (opcode
, ops
);
2397 for (i
= 0; ops
->iterate (i
, &oe
);)
2401 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
2403 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
2404 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
2405 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
2418 optimize_ops_list (opcode
, ops
);
2421 /* The following functions are subroutines to optimize_range_tests and allow
2422 it to try to change a logical combination of comparisons into a range
2426 X == 2 || X == 5 || X == 3 || X == 4
2430 (unsigned) (X - 2) <= 3
2432 For more information see comments above fold_test_range in fold-const.c,
2433 this implementation is for GIMPLE. */
2437 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2440 dump_range_entry (FILE *file
, struct range_entry
*r
, bool skip_exp
)
2443 print_generic_expr (file
, r
->exp
);
2444 fprintf (file
, " %c[", r
->in_p
? '+' : '-');
2445 print_generic_expr (file
, r
->low
);
2447 print_generic_expr (file
, r
->high
);
2451 /* Dump the range entry R to STDERR. */
2454 debug_range_entry (struct range_entry
*r
)
2456 dump_range_entry (stderr
, r
, false);
2457 fputc ('\n', stderr
);
2460 /* This is similar to make_range in fold-const.c, but on top of
2461 GIMPLE instead of trees. If EXP is non-NULL, it should be
2462 an SSA_NAME and STMT argument is ignored, otherwise STMT
2463 argument should be a GIMPLE_COND. */
2466 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
2470 bool is_bool
, strict_overflow_p
;
2474 r
->strict_overflow_p
= false;
2476 r
->high
= NULL_TREE
;
2477 if (exp
!= NULL_TREE
2478 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
2481 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2482 and see if we can refine the range. Some of the cases below may not
2483 happen, but it doesn't seem worth worrying about this. We "continue"
2484 the outer loop when we've changed something; otherwise we "break"
2485 the switch, which will "break" the while. */
2486 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
2489 strict_overflow_p
= false;
2491 if (exp
== NULL_TREE
)
2493 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
2495 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
2500 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
2505 enum tree_code code
;
2506 tree arg0
, arg1
, exp_type
;
2510 if (exp
!= NULL_TREE
)
2512 if (TREE_CODE (exp
) != SSA_NAME
2513 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
2516 stmt
= SSA_NAME_DEF_STMT (exp
);
2517 if (!is_gimple_assign (stmt
))
2520 code
= gimple_assign_rhs_code (stmt
);
2521 arg0
= gimple_assign_rhs1 (stmt
);
2522 arg1
= gimple_assign_rhs2 (stmt
);
2523 exp_type
= TREE_TYPE (exp
);
2527 code
= gimple_cond_code (stmt
);
2528 arg0
= gimple_cond_lhs (stmt
);
2529 arg1
= gimple_cond_rhs (stmt
);
2530 exp_type
= boolean_type_node
;
2533 if (TREE_CODE (arg0
) != SSA_NAME
2534 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0
))
2536 loc
= gimple_location (stmt
);
2540 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
2541 /* Ensure the range is either +[-,0], +[0,0],
2542 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2543 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2544 or similar expression of unconditional true or
2545 false, it should not be negated. */
2546 && ((high
&& integer_zerop (high
))
2547 || (low
&& integer_onep (low
))))
2560 if ((TYPE_PRECISION (exp_type
) == 1
2561 || TREE_CODE (exp_type
) == BOOLEAN_TYPE
)
2562 && TYPE_PRECISION (TREE_TYPE (arg0
)) > 1)
2565 else if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
2567 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
2572 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2587 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2589 &strict_overflow_p
);
2590 if (nexp
!= NULL_TREE
)
2593 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2606 r
->strict_overflow_p
= strict_overflow_p
;
2610 /* Comparison function for qsort. Sort entries
2611 without SSA_NAME exp first, then with SSA_NAMEs sorted
2612 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2613 by increasing ->low and if ->low is the same, by increasing
2614 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2618 range_entry_cmp (const void *a
, const void *b
)
2620 const struct range_entry
*p
= (const struct range_entry
*) a
;
2621 const struct range_entry
*q
= (const struct range_entry
*) b
;
2623 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2625 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2627 /* Group range_entries for the same SSA_NAME together. */
2628 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2630 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2632 /* If ->low is different, NULL low goes first, then by
2634 if (p
->low
!= NULL_TREE
)
2636 if (q
->low
!= NULL_TREE
)
2638 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2640 if (tem
&& integer_onep (tem
))
2642 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2644 if (tem
&& integer_onep (tem
))
2650 else if (q
->low
!= NULL_TREE
)
2652 /* If ->high is different, NULL high goes last, before that by
2654 if (p
->high
!= NULL_TREE
)
2656 if (q
->high
!= NULL_TREE
)
2658 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2660 if (tem
&& integer_onep (tem
))
2662 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2664 if (tem
&& integer_onep (tem
))
2670 else if (q
->high
!= NULL_TREE
)
2672 /* If both ranges are the same, sort below by ascending idx. */
2677 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2680 if (p
->idx
< q
->idx
)
2684 gcc_checking_assert (p
->idx
> q
->idx
);
2689 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2690 insert needed statements BEFORE or after GSI. */
2693 force_into_ssa_name (gimple_stmt_iterator
*gsi
, tree expr
, bool before
)
2695 enum gsi_iterator_update m
= before
? GSI_SAME_STMT
: GSI_CONTINUE_LINKING
;
2696 tree ret
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
, before
, m
);
2697 if (TREE_CODE (ret
) != SSA_NAME
)
2699 gimple
*g
= gimple_build_assign (make_ssa_name (TREE_TYPE (ret
)), ret
);
2701 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2703 gsi_insert_after (gsi
, g
, GSI_CONTINUE_LINKING
);
2704 ret
= gimple_assign_lhs (g
);
2709 /* Helper routine of optimize_range_test.
2710 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2711 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2712 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2713 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2714 an array of COUNT pointers to other ranges. Return
2715 true if the range merge has been successful.
2716 If OPCODE is ERROR_MARK, this is called from within
2717 maybe_optimize_range_tests and is performing inter-bb range optimization.
2718 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2722 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2723 struct range_entry
**otherrangep
,
2724 unsigned int count
, enum tree_code opcode
,
2725 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2726 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2728 operand_entry
*oe
= (*ops
)[range
->idx
];
2730 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2731 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2732 location_t loc
= gimple_location (stmt
);
2733 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2734 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2736 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2737 gimple_stmt_iterator gsi
;
2738 unsigned int i
, uid
;
2740 if (tem
== NULL_TREE
)
2743 /* If op is default def SSA_NAME, there is no place to insert the
2744 new comparison. Give up, unless we can use OP itself as the
2746 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2748 if (op
== range
->exp
2749 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2750 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2752 || (TREE_CODE (tem
) == EQ_EXPR
2753 && TREE_OPERAND (tem
, 0) == op
2754 && integer_onep (TREE_OPERAND (tem
, 1))))
2755 && opcode
!= BIT_IOR_EXPR
2756 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2765 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2766 warning_at (loc
, OPT_Wstrict_overflow
,
2767 "assuming signed overflow does not occur "
2768 "when simplifying range test");
2770 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2772 struct range_entry
*r
;
2773 fprintf (dump_file
, "Optimizing range tests ");
2774 dump_range_entry (dump_file
, range
, false);
2775 for (i
= 0; i
< count
; i
++)
2782 && r
->exp
!= range
->exp
2783 && TREE_CODE (r
->exp
) == SSA_NAME
)
2785 fprintf (dump_file
, " and ");
2786 dump_range_entry (dump_file
, r
, false);
2790 fprintf (dump_file
, " and");
2791 dump_range_entry (dump_file
, r
, true);
2794 fprintf (dump_file
, "\n into ");
2795 print_generic_expr (dump_file
, tem
);
2796 fprintf (dump_file
, "\n");
2799 if (opcode
== BIT_IOR_EXPR
2800 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2801 tem
= invert_truthvalue_loc (loc
, tem
);
2803 tem
= fold_convert_loc (loc
, optype
, tem
);
2806 gsi
= gsi_for_stmt (stmt
);
2807 uid
= gimple_uid (stmt
);
2815 gcc_checking_assert (tem
== op
);
2816 /* In rare cases range->exp can be equal to lhs of stmt.
2817 In that case we have to insert after the stmt rather then before
2818 it. If stmt is a PHI, insert it at the start of the basic block. */
2819 else if (op
!= range
->exp
)
2821 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2822 tem
= force_into_ssa_name (&gsi
, tem
, true);
2825 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2827 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2828 tem
= force_into_ssa_name (&gsi
, tem
, false);
2832 gsi
= gsi_after_labels (gimple_bb (stmt
));
2833 if (!gsi_end_p (gsi
))
2834 uid
= gimple_uid (gsi_stmt (gsi
));
2837 gsi
= gsi_start_bb (gimple_bb (stmt
));
2839 while (!gsi_end_p (gsi
))
2841 uid
= gimple_uid (gsi_stmt (gsi
));
2845 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2846 tem
= force_into_ssa_name (&gsi
, tem
, true);
2847 if (gsi_end_p (gsi
))
2848 gsi
= gsi_last_bb (gimple_bb (stmt
));
2852 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2853 if (gimple_uid (gsi_stmt (gsi
)))
2856 gimple_set_uid (gsi_stmt (gsi
), uid
);
2863 range
->strict_overflow_p
= false;
2865 for (i
= 0; i
< count
; i
++)
2868 range
= otherrange
+ i
;
2870 range
= otherrangep
[i
];
2871 oe
= (*ops
)[range
->idx
];
2872 /* Now change all the other range test immediate uses, so that
2873 those tests will be optimized away. */
2874 if (opcode
== ERROR_MARK
)
2877 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2878 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2880 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2881 ? boolean_false_node
: boolean_true_node
);
2884 oe
->op
= error_mark_node
;
2885 range
->exp
= NULL_TREE
;
2886 range
->low
= NULL_TREE
;
2887 range
->high
= NULL_TREE
;
2892 /* Optimize X == CST1 || X == CST2
2893 if popcount (CST1 ^ CST2) == 1 into
2894 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2895 Similarly for ranges. E.g.
2896 X != 2 && X != 3 && X != 10 && X != 11
2897 will be transformed by the previous optimization into
2898 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2899 and this loop can transform that into
2900 !(((X & ~8) - 2U) <= 1U). */
2903 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2904 tree lowi
, tree lowj
, tree highi
, tree highj
,
2905 vec
<operand_entry
*> *ops
,
2906 struct range_entry
*rangei
,
2907 struct range_entry
*rangej
)
2909 tree lowxor
, highxor
, tem
, exp
;
2910 /* Check lowi ^ lowj == highi ^ highj and
2911 popcount (lowi ^ lowj) == 1. */
2912 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2913 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2915 if (!integer_pow2p (lowxor
))
2917 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2918 if (!tree_int_cst_equal (lowxor
, highxor
))
2922 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
2923 int prec
= GET_MODE_PRECISION (mode
);
2924 if (TYPE_PRECISION (type
) < prec
2925 || (wi::to_wide (TYPE_MIN_VALUE (type
))
2926 != wi::min_value (prec
, TYPE_SIGN (type
)))
2927 || (wi::to_wide (TYPE_MAX_VALUE (type
))
2928 != wi::max_value (prec
, TYPE_SIGN (type
))))
2930 type
= build_nonstandard_integer_type (prec
, TYPE_UNSIGNED (type
));
2931 exp
= fold_convert (type
, exp
);
2932 lowxor
= fold_convert (type
, lowxor
);
2933 lowi
= fold_convert (type
, lowi
);
2934 highi
= fold_convert (type
, highi
);
2936 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2937 exp
= fold_build2 (BIT_AND_EXPR
, type
, exp
, tem
);
2938 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2939 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2940 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2941 NULL
, rangei
->in_p
, lowj
, highj
,
2942 rangei
->strict_overflow_p
2943 || rangej
->strict_overflow_p
))
2948 /* Optimize X == CST1 || X == CST2
2949 if popcount (CST2 - CST1) == 1 into
2950 ((X - CST1) & ~(CST2 - CST1)) == 0.
2951 Similarly for ranges. E.g.
2952 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2953 || X == 75 || X == 45
2954 will be transformed by the previous optimization into
2955 (X - 43U) <= 3U || (X - 75U) <= 3U
2956 and this loop can transform that into
2957 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2959 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2960 tree lowi
, tree lowj
, tree highi
, tree highj
,
2961 vec
<operand_entry
*> *ops
,
2962 struct range_entry
*rangei
,
2963 struct range_entry
*rangej
)
2965 tree tem1
, tem2
, mask
;
2966 /* Check highi - lowi == highj - lowj. */
2967 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2968 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2970 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2971 if (!tree_int_cst_equal (tem1
, tem2
))
2973 /* Check popcount (lowj - lowi) == 1. */
2974 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2975 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2977 if (!integer_pow2p (tem1
))
2980 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
2981 int prec
= GET_MODE_PRECISION (mode
);
2982 if (TYPE_PRECISION (type
) < prec
2983 || (wi::to_wide (TYPE_MIN_VALUE (type
))
2984 != wi::min_value (prec
, TYPE_SIGN (type
)))
2985 || (wi::to_wide (TYPE_MAX_VALUE (type
))
2986 != wi::max_value (prec
, TYPE_SIGN (type
))))
2987 type
= build_nonstandard_integer_type (prec
, 1);
2989 type
= unsigned_type_for (type
);
2990 tem1
= fold_convert (type
, tem1
);
2991 tem2
= fold_convert (type
, tem2
);
2992 lowi
= fold_convert (type
, lowi
);
2993 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2994 tem1
= fold_build2 (MINUS_EXPR
, type
,
2995 fold_convert (type
, rangei
->exp
), lowi
);
2996 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2997 lowj
= build_int_cst (type
, 0);
2998 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2999 NULL
, rangei
->in_p
, lowj
, tem2
,
3000 rangei
->strict_overflow_p
3001 || rangej
->strict_overflow_p
))
3006 /* It does some common checks for function optimize_range_tests_xor and
3007 optimize_range_tests_diff.
3008 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3009 Else it calls optimize_range_tests_diff. */
3012 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
3013 bool optimize_xor
, vec
<operand_entry
*> *ops
,
3014 struct range_entry
*ranges
)
3017 bool any_changes
= false;
3018 for (i
= first
; i
< length
; i
++)
3020 tree lowi
, highi
, lowj
, highj
, type
, tem
;
3022 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3024 type
= TREE_TYPE (ranges
[i
].exp
);
3025 if (!INTEGRAL_TYPE_P (type
))
3027 lowi
= ranges
[i
].low
;
3028 if (lowi
== NULL_TREE
)
3029 lowi
= TYPE_MIN_VALUE (type
);
3030 highi
= ranges
[i
].high
;
3031 if (highi
== NULL_TREE
)
3033 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
3036 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
3038 lowj
= ranges
[j
].low
;
3039 if (lowj
== NULL_TREE
)
3041 highj
= ranges
[j
].high
;
3042 if (highj
== NULL_TREE
)
3043 highj
= TYPE_MAX_VALUE (type
);
3044 /* Check lowj > highi. */
3045 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
3047 if (tem
== NULL_TREE
|| !integer_onep (tem
))
3050 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
3052 ranges
+ i
, ranges
+ j
);
3054 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
3056 ranges
+ i
, ranges
+ j
);
3067 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3068 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3069 EXP on success, NULL otherwise. */
3072 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
3073 wide_int
*mask
, tree
*totallowp
)
3075 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
3076 if (tem
== NULL_TREE
3077 || TREE_CODE (tem
) != INTEGER_CST
3078 || TREE_OVERFLOW (tem
)
3079 || tree_int_cst_sgn (tem
) == -1
3080 || compare_tree_int (tem
, prec
) != -1)
3083 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
3084 *mask
= wi::shifted_mask (0, max
, false, prec
);
3085 if (TREE_CODE (exp
) == BIT_AND_EXPR
3086 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3088 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
3089 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
3090 if (wi::popcount (msk
) == 1
3091 && wi::ltu_p (msk
, prec
- max
))
3093 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
3094 max
+= msk
.to_uhwi ();
3095 exp
= TREE_OPERAND (exp
, 0);
3096 if (integer_zerop (low
)
3097 && TREE_CODE (exp
) == PLUS_EXPR
3098 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3100 tree ret
= TREE_OPERAND (exp
, 0);
3103 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
3104 TYPE_PRECISION (TREE_TYPE (low
))));
3105 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
3111 else if (!tree_int_cst_lt (totallow
, tbias
))
3113 bias
= wi::to_widest (tbias
);
3114 bias
-= wi::to_widest (totallow
);
3115 if (bias
>= 0 && bias
< prec
- max
)
3117 *mask
= wi::lshift (*mask
, bias
);
3125 if (!tree_int_cst_lt (totallow
, low
))
3127 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
3128 if (tem
== NULL_TREE
3129 || TREE_CODE (tem
) != INTEGER_CST
3130 || TREE_OVERFLOW (tem
)
3131 || compare_tree_int (tem
, prec
- max
) == 1)
3134 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
3138 /* Attempt to optimize small range tests using bit test.
3140 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3141 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3142 has been by earlier optimizations optimized into:
3143 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3144 As all the 43 through 82 range is less than 64 numbers,
3145 for 64-bit word targets optimize that into:
3146 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3149 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
3150 vec
<operand_entry
*> *ops
,
3151 struct range_entry
*ranges
)
3154 bool any_changes
= false;
3155 int prec
= GET_MODE_BITSIZE (word_mode
);
3156 auto_vec
<struct range_entry
*, 64> candidates
;
3158 for (i
= first
; i
< length
- 1; i
++)
3160 tree lowi
, highi
, lowj
, highj
, type
;
3162 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3164 type
= TREE_TYPE (ranges
[i
].exp
);
3165 if (!INTEGRAL_TYPE_P (type
))
3167 lowi
= ranges
[i
].low
;
3168 if (lowi
== NULL_TREE
)
3169 lowi
= TYPE_MIN_VALUE (type
);
3170 highi
= ranges
[i
].high
;
3171 if (highi
== NULL_TREE
)
3174 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
3175 highi
, &mask
, &lowi
);
3176 if (exp
== NULL_TREE
)
3178 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3179 candidates
.truncate (0);
3180 int end
= MIN (i
+ 64, length
);
3181 for (j
= i
+ 1; j
< end
; j
++)
3184 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
3186 if (ranges
[j
].exp
== exp
)
3188 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
3190 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
3193 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
3195 exp2
= TREE_OPERAND (exp2
, 0);
3205 lowj
= ranges
[j
].low
;
3206 if (lowj
== NULL_TREE
)
3208 highj
= ranges
[j
].high
;
3209 if (highj
== NULL_TREE
)
3210 highj
= TYPE_MAX_VALUE (type
);
3212 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
3213 highj
, &mask2
, NULL
);
3217 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3218 candidates
.safe_push (&ranges
[j
]);
3221 /* If every possible relative value of the expression is a valid shift
3222 amount, then we can merge the entry test in the bit test. In this
3223 case, if we would need otherwise 2 or more comparisons, then use
3224 the bit test; in the other cases, the threshold is 3 comparisons. */
3225 bool entry_test_needed
;
3227 if (TREE_CODE (exp
) == SSA_NAME
3228 && get_range_query (cfun
)->range_of_expr (r
, exp
)
3229 && r
.kind () == VR_RANGE
3230 && wi::leu_p (r
.upper_bound () - r
.lower_bound (), prec
- 1))
3232 wide_int min
= r
.lower_bound ();
3233 wide_int ilowi
= wi::to_wide (lowi
);
3234 if (wi::lt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3236 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3237 mask
= wi::lshift (mask
, ilowi
- min
);
3239 else if (wi::gt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3241 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3242 mask
= wi::lrshift (mask
, min
- ilowi
);
3244 entry_test_needed
= false;
3247 entry_test_needed
= true;
3248 if (candidates
.length () >= (entry_test_needed
? 2 : 1))
3250 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
3251 wi::to_widest (lowi
)
3252 + prec
- 1 - wi::clz (mask
));
3253 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3255 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
3256 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3257 location_t loc
= gimple_location (stmt
);
3258 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
3260 /* See if it isn't cheaper to pretend the minimum value of the
3261 range is 0, if maximum value is small enough.
3262 We can avoid then subtraction of the minimum value, but the
3263 mask constant could be perhaps more expensive. */
3264 if (compare_tree_int (lowi
, 0) > 0
3265 && compare_tree_int (high
, prec
) < 0)
3268 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
3269 rtx reg
= gen_raw_REG (word_mode
, 10000);
3270 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
3271 cost_diff
= set_src_cost (gen_rtx_PLUS (word_mode
, reg
,
3273 word_mode
, speed_p
);
3274 rtx r
= immed_wide_int_const (mask
, word_mode
);
3275 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3276 word_mode
, speed_p
);
3277 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
3278 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3279 word_mode
, speed_p
);
3282 mask
= wi::lshift (mask
, m
);
3283 lowi
= build_zero_cst (TREE_TYPE (lowi
));
3288 if (entry_test_needed
)
3290 tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
3292 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
3297 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
3298 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
3299 fold_convert_loc (loc
, etype
, exp
),
3300 fold_convert_loc (loc
, etype
, lowi
));
3301 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
3302 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
3303 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
3304 build_int_cst (word_type
, 1), exp
);
3305 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
3306 wide_int_to_tree (word_type
, mask
));
3307 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
3308 build_zero_cst (word_type
));
3309 if (is_gimple_val (exp
))
3312 /* The shift might have undefined behavior if TEM is true,
3313 but reassociate_bb isn't prepared to have basic blocks
3314 split when it is running. So, temporarily emit a code
3315 with BIT_IOR_EXPR instead of &&, and fix it up in
3317 gimple_seq seq
= NULL
;
3320 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
3321 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
3322 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
3325 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
3326 gimple_seq_add_seq_without_update (&seq
, seq2
);
3327 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
3328 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
3331 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
3332 BIT_IOR_EXPR
, tem
, exp
);
3333 gimple_set_location (g
, loc
);
3334 gimple_seq_add_stmt_without_update (&seq
, g
);
3335 exp
= gimple_assign_lhs (g
);
3337 tree val
= build_zero_cst (optype
);
3338 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
3339 candidates
.length (), opcode
, ops
, exp
,
3340 seq
, false, val
, val
, strict_overflow_p
))
3344 reassoc_branch_fixups
.safe_push (tem
);
3347 gimple_seq_discard (seq
);
3353 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3354 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3355 Also, handle x < C && y < C && z < C where C is power of two as
3356 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3357 as (x | y | z) < 0. */
3360 optimize_range_tests_cmp_bitwise (enum tree_code opcode
, int first
, int length
,
3361 vec
<operand_entry
*> *ops
,
3362 struct range_entry
*ranges
)
3366 bool any_changes
= false;
3367 auto_vec
<int, 128> buckets
;
3368 auto_vec
<int, 32> chains
;
3369 auto_vec
<struct range_entry
*, 32> candidates
;
3371 for (i
= first
; i
< length
; i
++)
3375 if (ranges
[i
].exp
== NULL_TREE
3376 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3377 || TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) <= 1
3378 || TREE_CODE (TREE_TYPE (ranges
[i
].exp
)) == BOOLEAN_TYPE
)
3381 if (ranges
[i
].low
!= NULL_TREE
3382 && ranges
[i
].high
!= NULL_TREE
3384 && tree_int_cst_equal (ranges
[i
].low
, ranges
[i
].high
))
3386 idx
= !integer_zerop (ranges
[i
].low
);
3387 if (idx
&& !integer_all_onesp (ranges
[i
].low
))
3390 else if (ranges
[i
].high
!= NULL_TREE
3391 && TREE_CODE (ranges
[i
].high
) == INTEGER_CST
3394 wide_int w
= wi::to_wide (ranges
[i
].high
);
3395 int prec
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
));
3396 int l
= wi::clz (w
);
3400 || w
!= wi::mask (prec
- l
, false, prec
))
3402 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3403 && ranges
[i
].low
== NULL_TREE
)
3405 && integer_zerop (ranges
[i
].low
))))
3408 else if (ranges
[i
].high
== NULL_TREE
3409 && ranges
[i
].low
!= NULL_TREE
3410 /* Perform this optimization only in the last
3411 reassoc pass, as it interferes with the reassociation
3412 itself or could also with VRP etc. which might not
3413 be able to virtually undo the optimization. */
3414 && !reassoc_insert_powi_p
3415 && !TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3416 && integer_zerop (ranges
[i
].low
))
3421 b
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) * 4 + idx
;
3422 if (buckets
.length () <= b
)
3423 buckets
.safe_grow_cleared (b
+ 1, true);
3424 if (chains
.length () <= (unsigned) i
)
3425 chains
.safe_grow (i
+ 1, true);
3426 chains
[i
] = buckets
[b
];
3430 FOR_EACH_VEC_ELT (buckets
, b
, i
)
3431 if (i
&& chains
[i
- 1])
3436 /* When ranges[X - 1].high + 1 is a power of two,
3437 we need to process the same bucket up to
3438 precision - 1 times, each time split the entries
3439 with the same high bound into one chain and the
3440 rest into another one to be processed later. */
3443 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3445 if (tree_int_cst_equal (ranges
[i
- 1].high
,
3446 ranges
[j
- 1].high
))
3448 chains
[this_prev
- 1] = j
;
3451 else if (other_prev
== 0)
3458 chains
[other_prev
- 1] = j
;
3462 chains
[this_prev
- 1] = 0;
3464 chains
[other_prev
- 1] = 0;
3465 if (chains
[i
- 1] == 0)
3472 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3474 gimple
*gk
= SSA_NAME_DEF_STMT (ranges
[k
- 1].exp
);
3475 gimple
*gj
= SSA_NAME_DEF_STMT (ranges
[j
- 1].exp
);
3476 if (reassoc_stmt_dominates_stmt_p (gk
, gj
))
3479 tree type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3480 tree type2
= NULL_TREE
;
3481 bool strict_overflow_p
= false;
3482 candidates
.truncate (0);
3483 for (j
= i
; j
; j
= chains
[j
- 1])
3485 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3486 strict_overflow_p
|= ranges
[j
- 1].strict_overflow_p
;
3489 /* For the signed < 0 cases, the types should be
3490 really compatible (all signed with the same precision,
3491 instead put ranges that have different in_p from
3493 if (!useless_type_conversion_p (type1
, type
))
3495 if (ranges
[j
- 1].in_p
!= ranges
[k
- 1].in_p
)
3496 candidates
.safe_push (&ranges
[j
- 1]);
3501 || useless_type_conversion_p (type1
, type
))
3503 else if (type2
== NULL_TREE
3504 || useless_type_conversion_p (type2
, type
))
3506 if (type2
== NULL_TREE
)
3508 candidates
.safe_push (&ranges
[j
- 1]);
3511 unsigned l
= candidates
.length ();
3512 for (j
= i
; j
; j
= chains
[j
- 1])
3514 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3519 if (!useless_type_conversion_p (type1
, type
))
3521 if (ranges
[j
- 1].in_p
== ranges
[k
- 1].in_p
)
3522 candidates
.safe_push (&ranges
[j
- 1]);
3525 if (useless_type_conversion_p (type1
, type
))
3527 else if (type2
== NULL_TREE
3528 || useless_type_conversion_p (type2
, type
))
3530 candidates
.safe_push (&ranges
[j
- 1]);
3532 gimple_seq seq
= NULL
;
3533 tree op
= NULL_TREE
;
3535 struct range_entry
*r
;
3536 candidates
.safe_push (&ranges
[k
- 1]);
3537 FOR_EACH_VEC_ELT (candidates
, id
, r
)
3540 enum tree_code code
;
3548 code
= (b
% 4) == 3 ? BIT_NOT_EXPR
: NOP_EXPR
;
3549 g
= gimple_build_assign (make_ssa_name (type1
), code
, op
);
3550 gimple_seq_add_stmt_without_update (&seq
, g
);
3551 op
= gimple_assign_lhs (g
);
3553 tree type
= TREE_TYPE (r
->exp
);
3555 if (id
>= l
&& !useless_type_conversion_p (type1
, type
))
3557 g
= gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, exp
);
3558 gimple_seq_add_stmt_without_update (&seq
, g
);
3559 exp
= gimple_assign_lhs (g
);
3562 code
= r
->in_p
? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3564 code
= (b
% 4) == 1 ? BIT_AND_EXPR
: BIT_IOR_EXPR
;
3565 g
= gimple_build_assign (make_ssa_name (id
>= l
? type1
: type2
),
3567 gimple_seq_add_stmt_without_update (&seq
, g
);
3568 op
= gimple_assign_lhs (g
);
3571 if (update_range_test (&ranges
[k
- 1], NULL
, candidates
.address (),
3572 candidates
.length (), opcode
, ops
, op
,
3573 seq
, ranges
[k
- 1].in_p
, ranges
[k
- 1].low
,
3574 ranges
[k
- 1].high
, strict_overflow_p
))
3577 gimple_seq_discard (seq
);
3578 if ((b
% 4) == 2 && buckets
[b
] != i
)
3579 /* There is more work to do for this bucket. */
3586 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3587 a >= 0 && a < b into (unsigned) a < (unsigned) b
3588 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3591 optimize_range_tests_var_bound (enum tree_code opcode
, int first
, int length
,
3592 vec
<operand_entry
*> *ops
,
3593 struct range_entry
*ranges
,
3594 basic_block first_bb
)
3597 bool any_changes
= false;
3598 hash_map
<tree
, int> *map
= NULL
;
3600 for (i
= first
; i
< length
; i
++)
3602 if (ranges
[i
].exp
== NULL_TREE
3603 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3607 tree type
= TREE_TYPE (ranges
[i
].exp
);
3608 if (!INTEGRAL_TYPE_P (type
)
3609 || TYPE_UNSIGNED (type
)
3610 || ranges
[i
].low
== NULL_TREE
3611 || !integer_zerop (ranges
[i
].low
)
3612 || ranges
[i
].high
!= NULL_TREE
)
3614 /* EXP >= 0 here. */
3616 map
= new hash_map
<tree
, int>;
3617 map
->put (ranges
[i
].exp
, i
);
3623 for (i
= 0; i
< length
; i
++)
3625 bool in_p
= ranges
[i
].in_p
;
3626 if (ranges
[i
].low
== NULL_TREE
3627 || ranges
[i
].high
== NULL_TREE
)
3629 if (!integer_zerop (ranges
[i
].low
)
3630 || !integer_zerop (ranges
[i
].high
))
3633 && TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) == 1
3634 && TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3635 && integer_onep (ranges
[i
].low
)
3636 && integer_onep (ranges
[i
].high
))
3647 if (TREE_CODE (ranges
[i
].exp
) != SSA_NAME
)
3649 stmt
= SSA_NAME_DEF_STMT (ranges
[i
].exp
);
3650 if (!is_gimple_assign (stmt
))
3652 ccode
= gimple_assign_rhs_code (stmt
);
3653 rhs1
= gimple_assign_rhs1 (stmt
);
3654 rhs2
= gimple_assign_rhs2 (stmt
);
3658 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3659 stmt
= last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3660 if (gimple_code (stmt
) != GIMPLE_COND
)
3662 ccode
= gimple_cond_code (stmt
);
3663 rhs1
= gimple_cond_lhs (stmt
);
3664 rhs2
= gimple_cond_rhs (stmt
);
3667 if (TREE_CODE (rhs1
) != SSA_NAME
3668 || rhs2
== NULL_TREE
3669 || TREE_CODE (rhs2
) != SSA_NAME
)
3683 ccode
= invert_tree_comparison (ccode
, false);
3688 std::swap (rhs1
, rhs2
);
3689 ccode
= swap_tree_comparison (ccode
);
3698 int *idx
= map
->get (rhs1
);
3702 /* maybe_optimize_range_tests allows statements without side-effects
3703 in the basic blocks as long as they are consumed in the same bb.
3704 Make sure rhs2's def stmt is not among them, otherwise we can't
3705 use safely get_nonzero_bits on it. E.g. in:
3706 # RANGE [-83, 1] NONZERO 173
3707 # k_32 = PHI <k_47(13), k_12(9)>
3710 goto <bb 5>; [26.46%]
3712 goto <bb 9>; [73.54%]
3714 <bb 5> [local count: 140323371]:
3715 # RANGE [0, 1] NONZERO 1
3717 # RANGE [0, 4] NONZERO 4
3719 # RANGE [0, 4] NONZERO 4
3720 iftmp.0_44 = (char) _21;
3721 if (k_32 < iftmp.0_44)
3722 goto <bb 6>; [84.48%]
3724 goto <bb 9>; [15.52%]
3725 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3726 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3727 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3728 those stmts even for negative k_32 and the value ranges would be no
3729 longer guaranteed and so the optimization would be invalid. */
3730 while (opcode
== ERROR_MARK
)
3732 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3733 basic_block bb2
= gimple_bb (g
);
3736 && dominated_by_p (CDI_DOMINATORS
, bb2
, first_bb
))
3738 /* As an exception, handle a few common cases. */
3739 if (gimple_assign_cast_p (g
)
3740 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g
))))
3742 tree op0
= gimple_assign_rhs1 (g
);
3743 if (TYPE_UNSIGNED (TREE_TYPE (op0
))
3744 && (TYPE_PRECISION (TREE_TYPE (rhs2
))
3745 > TYPE_PRECISION (TREE_TYPE (op0
))))
3746 /* Zero-extension is always ok. */
3748 else if (TYPE_PRECISION (TREE_TYPE (rhs2
))
3749 == TYPE_PRECISION (TREE_TYPE (op0
))
3750 && TREE_CODE (op0
) == SSA_NAME
)
3752 /* Cast from signed to unsigned or vice versa. Retry
3753 with the op0 as new rhs2. */
3758 else if (is_gimple_assign (g
)
3759 && gimple_assign_rhs_code (g
) == BIT_AND_EXPR
3760 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
3761 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g
))))
3762 /* Masking with INTEGER_CST with MSB clear is always ok
3769 if (rhs2
== NULL_TREE
)
3772 wide_int nz
= get_nonzero_bits (rhs2
);
3776 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3777 and RHS2 is known to be RHS2 >= 0. */
3778 tree utype
= unsigned_type_for (TREE_TYPE (rhs1
));
3780 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
3781 if ((ranges
[*idx
].strict_overflow_p
3782 || ranges
[i
].strict_overflow_p
)
3783 && issue_strict_overflow_warning (wc
))
3784 warning_at (gimple_location (stmt
), OPT_Wstrict_overflow
,
3785 "assuming signed overflow does not occur "
3786 "when simplifying range test");
3788 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3790 struct range_entry
*r
= &ranges
[*idx
];
3791 fprintf (dump_file
, "Optimizing range test ");
3792 print_generic_expr (dump_file
, r
->exp
);
3793 fprintf (dump_file
, " +[");
3794 print_generic_expr (dump_file
, r
->low
);
3795 fprintf (dump_file
, ", ");
3796 print_generic_expr (dump_file
, r
->high
);
3797 fprintf (dump_file
, "] and comparison ");
3798 print_generic_expr (dump_file
, rhs1
);
3799 fprintf (dump_file
, " %s ", op_symbol_code (ccode
));
3800 print_generic_expr (dump_file
, rhs2
);
3801 fprintf (dump_file
, "\n into (");
3802 print_generic_expr (dump_file
, utype
);
3803 fprintf (dump_file
, ") ");
3804 print_generic_expr (dump_file
, rhs1
);
3805 fprintf (dump_file
, " %s (", op_symbol_code (ccode
));
3806 print_generic_expr (dump_file
, utype
);
3807 fprintf (dump_file
, ") ");
3808 print_generic_expr (dump_file
, rhs2
);
3809 fprintf (dump_file
, "\n");
3812 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3814 if (opcode
== BIT_IOR_EXPR
3815 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3818 ccode
= invert_tree_comparison (ccode
, false);
3821 unsigned int uid
= gimple_uid (stmt
);
3822 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3823 gimple
*g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs1
);
3824 gimple_set_uid (g
, uid
);
3825 rhs1
= gimple_assign_lhs (g
);
3826 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3827 if (!useless_type_conversion_p (utype
, TREE_TYPE (rhs2
)))
3829 g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs2
);
3830 gimple_set_uid (g
, uid
);
3831 rhs2
= gimple_assign_lhs (g
);
3832 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3834 if (tree_swap_operands_p (rhs1
, rhs2
))
3836 std::swap (rhs1
, rhs2
);
3837 ccode
= swap_tree_comparison (ccode
);
3839 if (gimple_code (stmt
) == GIMPLE_COND
)
3841 gcond
*c
= as_a
<gcond
*> (stmt
);
3842 gimple_cond_set_code (c
, ccode
);
3843 gimple_cond_set_lhs (c
, rhs1
);
3844 gimple_cond_set_rhs (c
, rhs2
);
3849 tree ctype
= oe
->op
? TREE_TYPE (oe
->op
) : boolean_type_node
;
3850 if (!INTEGRAL_TYPE_P (ctype
)
3851 || (TREE_CODE (ctype
) != BOOLEAN_TYPE
3852 && TYPE_PRECISION (ctype
) != 1))
3853 ctype
= boolean_type_node
;
3854 g
= gimple_build_assign (make_ssa_name (ctype
), ccode
, rhs1
, rhs2
);
3855 gimple_set_uid (g
, uid
);
3856 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3857 if (oe
->op
&& ctype
!= TREE_TYPE (oe
->op
))
3859 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (oe
->op
)),
3860 NOP_EXPR
, gimple_assign_lhs (g
));
3861 gimple_set_uid (g
, uid
);
3862 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3864 ranges
[i
].exp
= gimple_assign_lhs (g
);
3865 oe
->op
= ranges
[i
].exp
;
3866 ranges
[i
].low
= build_zero_cst (TREE_TYPE (ranges
[i
].exp
));
3867 ranges
[i
].high
= ranges
[i
].low
;
3869 ranges
[i
].strict_overflow_p
= false;
3870 oe
= (*ops
)[ranges
[*idx
].idx
];
3871 /* Now change all the other range test immediate uses, so that
3872 those tests will be optimized away. */
3873 if (opcode
== ERROR_MARK
)
3876 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
3877 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
3879 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
3880 ? boolean_false_node
: boolean_true_node
);
3883 oe
->op
= error_mark_node
;
3884 ranges
[*idx
].exp
= NULL_TREE
;
3885 ranges
[*idx
].low
= NULL_TREE
;
3886 ranges
[*idx
].high
= NULL_TREE
;
3894 /* Optimize range tests, similarly how fold_range_test optimizes
3895 it on trees. The tree code for the binary
3896 operation between all the operands is OPCODE.
3897 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3898 maybe_optimize_range_tests for inter-bb range optimization.
3899 In that case if oe->op is NULL, oe->id is bb->index whose
3900 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3902 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3905 optimize_range_tests (enum tree_code opcode
,
3906 vec
<operand_entry
*> *ops
, basic_block first_bb
)
3908 unsigned int length
= ops
->length (), i
, j
, first
;
3910 struct range_entry
*ranges
;
3911 bool any_changes
= false;
3916 ranges
= XNEWVEC (struct range_entry
, length
);
3917 for (i
= 0; i
< length
; i
++)
3921 init_range_entry (ranges
+ i
, oe
->op
,
3924 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
3925 /* For | invert it now, we will invert it again before emitting
3926 the optimized expression. */
3927 if (opcode
== BIT_IOR_EXPR
3928 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3929 ranges
[i
].in_p
= !ranges
[i
].in_p
;
3932 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
3933 for (i
= 0; i
< length
; i
++)
3934 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
3937 /* Try to merge ranges. */
3938 for (first
= i
; i
< length
; i
++)
3940 tree low
= ranges
[i
].low
;
3941 tree high
= ranges
[i
].high
;
3942 int in_p
= ranges
[i
].in_p
;
3943 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3944 int update_fail_count
= 0;
3946 for (j
= i
+ 1; j
< length
; j
++)
3948 if (ranges
[i
].exp
!= ranges
[j
].exp
)
3950 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
3951 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
3953 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3959 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
3960 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
3961 low
, high
, strict_overflow_p
))
3966 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3967 while update_range_test would fail. */
3968 else if (update_fail_count
== 64)
3971 ++update_fail_count
;
3974 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
3977 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
3978 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
3980 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
3981 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
3983 any_changes
|= optimize_range_tests_var_bound (opcode
, first
, length
, ops
,
3985 any_changes
|= optimize_range_tests_cmp_bitwise (opcode
, first
, length
,
3988 if (any_changes
&& opcode
!= ERROR_MARK
)
3991 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3993 if (oe
->op
== error_mark_node
)
4002 XDELETEVEC (ranges
);
4006 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4007 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4008 otherwise the comparison code. TYPE is a return value that is set
4009 to type of comparison. */
4012 ovce_extract_ops (tree var
, gassign
**rets
, bool *reti
, tree
*type
,
4013 tree
*lhs
, tree
*rhs
, gassign
**vcond
)
4015 if (TREE_CODE (var
) != SSA_NAME
)
4018 gassign
*stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (var
));
4024 /* ??? If we start creating more COND_EXPR, we could perform
4025 this same optimization with them. For now, simplify. */
4026 if (gimple_assign_rhs_code (stmt
) != VEC_COND_EXPR
)
4029 tree cond
= gimple_assign_rhs1 (stmt
);
4030 tree_code cmp
= TREE_CODE (cond
);
4031 if (cmp
!= SSA_NAME
)
4034 gassign
*assign
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (cond
));
4036 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign
)) != tcc_comparison
)
4039 cmp
= gimple_assign_rhs_code (assign
);
4041 *lhs
= gimple_assign_rhs1 (assign
);
4043 *rhs
= gimple_assign_rhs2 (assign
);
4045 /* ??? For now, allow only canonical true and false result vectors.
4046 We could expand this to other constants should the need arise,
4047 but at the moment we don't create them. */
4048 tree t
= gimple_assign_rhs2 (stmt
);
4049 tree f
= gimple_assign_rhs3 (stmt
);
4051 if (integer_all_onesp (t
))
4053 else if (integer_all_onesp (f
))
4055 cmp
= invert_tree_comparison (cmp
, false);
4060 if (!integer_zerop (f
))
4069 *type
= TREE_TYPE (cond
);
4073 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4074 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4077 optimize_vec_cond_expr (tree_code opcode
, vec
<operand_entry
*> *ops
)
4079 unsigned int length
= ops
->length (), i
, j
;
4080 bool any_changes
= false;
4085 for (i
= 0; i
< length
; ++i
)
4087 tree elt0
= (*ops
)[i
]->op
;
4089 gassign
*stmt0
, *vcond0
;
4091 tree type
, lhs0
, rhs0
;
4092 tree_code cmp0
= ovce_extract_ops (elt0
, &stmt0
, &invert
, &type
, &lhs0
,
4094 if (cmp0
== ERROR_MARK
)
4097 for (j
= i
+ 1; j
< length
; ++j
)
4099 tree
&elt1
= (*ops
)[j
]->op
;
4101 gassign
*stmt1
, *vcond1
;
4103 tree_code cmp1
= ovce_extract_ops (elt1
, &stmt1
, NULL
, NULL
, &lhs1
,
4105 if (cmp1
== ERROR_MARK
)
4109 if (opcode
== BIT_AND_EXPR
)
4110 comb
= maybe_fold_and_comparisons (type
, cmp0
, lhs0
, rhs0
,
4112 else if (opcode
== BIT_IOR_EXPR
)
4113 comb
= maybe_fold_or_comparisons (type
, cmp0
, lhs0
, rhs0
,
4121 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4123 fprintf (dump_file
, "Transforming ");
4124 print_generic_expr (dump_file
, gimple_assign_lhs (stmt0
));
4125 fprintf (dump_file
, " %c ", opcode
== BIT_AND_EXPR
? '&' : '|');
4126 print_generic_expr (dump_file
, gimple_assign_lhs (stmt1
));
4127 fprintf (dump_file
, " into ");
4128 print_generic_expr (dump_file
, comb
);
4129 fputc ('\n', dump_file
);
4132 gimple_stmt_iterator gsi
= gsi_for_stmt (vcond0
);
4133 tree exp
= force_gimple_operand_gsi (&gsi
, comb
, true, NULL_TREE
,
4134 true, GSI_SAME_STMT
);
4136 swap_ssa_operands (vcond0
, gimple_assign_rhs2_ptr (vcond0
),
4137 gimple_assign_rhs3_ptr (vcond0
));
4138 gimple_assign_set_rhs1 (vcond0
, exp
);
4139 update_stmt (vcond0
);
4141 elt1
= error_mark_node
;
4150 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4152 if (oe
->op
== error_mark_node
)
4164 /* Return true if STMT is a cast like:
4170 # _345 = PHI <_123(N), 1(...), 1(...)>
4171 where _234 has bool type, _123 has single use and
4172 bb N has a single successor M. This is commonly used in
4173 the last block of a range test.
4175 Also Return true if STMT is tcc_compare like:
4181 # _345 = PHI <_234(N), 1(...), 1(...)>
4183 where _234 has booltype, single use and
4184 bb N has a single successor M. This is commonly used in
4185 the last block of a range test. */
4188 final_range_test_p (gimple
*stmt
)
4190 basic_block bb
, rhs_bb
, lhs_bb
;
4193 use_operand_p use_p
;
4196 if (!gimple_assign_cast_p (stmt
)
4197 && (!is_gimple_assign (stmt
)
4198 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4199 != tcc_comparison
)))
4201 bb
= gimple_bb (stmt
);
4202 if (!single_succ_p (bb
))
4204 e
= single_succ_edge (bb
);
4205 if (e
->flags
& EDGE_COMPLEX
)
4208 lhs
= gimple_assign_lhs (stmt
);
4209 rhs
= gimple_assign_rhs1 (stmt
);
4210 if (gimple_assign_cast_p (stmt
)
4211 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4212 || TREE_CODE (rhs
) != SSA_NAME
4213 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
))
4216 if (!gimple_assign_cast_p (stmt
)
4217 && (TREE_CODE (TREE_TYPE (lhs
)) != BOOLEAN_TYPE
))
4220 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4221 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
4224 if (gimple_code (use_stmt
) != GIMPLE_PHI
4225 || gimple_bb (use_stmt
) != e
->dest
)
4228 /* And that the rhs is defined in the same loop. */
4229 if (gimple_assign_cast_p (stmt
))
4231 if (TREE_CODE (rhs
) != SSA_NAME
4232 || !(rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
)))
4233 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
4238 if (TREE_CODE (lhs
) != SSA_NAME
4239 || !(lhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
)))
4240 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), lhs_bb
))
4247 /* Return true if BB is suitable basic block for inter-bb range test
4248 optimization. If BACKWARD is true, BB should be the only predecessor
4249 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4250 or compared with to find a common basic block to which all conditions
4251 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4252 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4253 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4254 block points to an empty block that falls through into *OTHER_BB and
4255 the phi args match that path. */
4258 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
4259 bool *test_swapped_p
, bool backward
)
4261 edge_iterator ei
, ei2
;
4265 bool other_edge_seen
= false;
4270 /* Check last stmt first. */
4271 stmt
= last_stmt (bb
);
4273 || (gimple_code (stmt
) != GIMPLE_COND
4274 && (backward
|| !final_range_test_p (stmt
)))
4275 || gimple_visited_p (stmt
)
4276 || stmt_could_throw_p (cfun
, stmt
)
4279 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
4282 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4283 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4284 to *OTHER_BB (if not set yet, try to find it out). */
4285 if (EDGE_COUNT (bb
->succs
) != 2)
4287 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
4289 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4291 if (e
->dest
== test_bb
)
4300 if (*other_bb
== NULL
)
4302 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
4303 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4305 else if (e
->dest
== e2
->dest
)
4306 *other_bb
= e
->dest
;
4307 if (*other_bb
== NULL
)
4310 if (e
->dest
== *other_bb
)
4311 other_edge_seen
= true;
4315 if (*other_bb
== NULL
|| !other_edge_seen
)
4318 else if (single_succ (bb
) != *other_bb
)
4321 /* Now check all PHIs of *OTHER_BB. */
4322 e
= find_edge (bb
, *other_bb
);
4323 e2
= find_edge (test_bb
, *other_bb
);
4325 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4327 gphi
*phi
= gsi
.phi ();
4328 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4329 corresponding to BB and TEST_BB predecessor must be the same. */
4330 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
4331 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
4333 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4334 one of the PHIs should have the lhs of the last stmt in
4335 that block as PHI arg and that PHI should have 0 or 1
4336 corresponding to it in all other range test basic blocks
4340 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
4341 == gimple_assign_lhs (stmt
)
4342 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
4343 || integer_onep (gimple_phi_arg_def (phi
,
4349 gimple
*test_last
= last_stmt (test_bb
);
4350 if (gimple_code (test_last
) == GIMPLE_COND
)
4352 if (backward
? e2
->src
!= test_bb
: e
->src
!= bb
)
4355 /* For last_bb, handle also:
4357 goto <bb 6>; [34.00%]
4359 goto <bb 7>; [66.00%]
4361 <bb 6> [local count: 79512730]:
4363 <bb 7> [local count: 1073741824]:
4364 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4365 where bb 7 is *OTHER_BB, but the PHI values from the
4366 earlier bbs match the path through the empty bb
4370 e3
= EDGE_SUCC (test_bb
,
4371 e2
== EDGE_SUCC (test_bb
, 0) ? 1 : 0);
4374 e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4375 if (empty_block_p (e3
->dest
)
4376 && single_succ_p (e3
->dest
)
4377 && single_succ (e3
->dest
) == *other_bb
4378 && single_pred_p (e3
->dest
)
4379 && single_succ_edge (e3
->dest
)->flags
== EDGE_FALLTHRU
)
4382 e2
= single_succ_edge (e3
->dest
);
4384 e
= single_succ_edge (e3
->dest
);
4386 *test_swapped_p
= true;
4390 else if (gimple_phi_arg_def (phi
, e2
->dest_idx
)
4391 == gimple_assign_lhs (test_last
)
4392 && (integer_zerop (gimple_phi_arg_def (phi
,
4394 || integer_onep (gimple_phi_arg_def (phi
,
4405 /* Return true if BB doesn't have side-effects that would disallow
4406 range test optimization, all SSA_NAMEs set in the bb are consumed
4407 in the bb and there are no PHIs. */
4410 no_side_effect_bb (basic_block bb
)
4412 gimple_stmt_iterator gsi
;
4415 if (!gimple_seq_empty_p (phi_nodes (bb
)))
4417 last
= last_stmt (bb
);
4418 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4420 gimple
*stmt
= gsi_stmt (gsi
);
4422 imm_use_iterator imm_iter
;
4423 use_operand_p use_p
;
4425 if (is_gimple_debug (stmt
))
4427 if (gimple_has_side_effects (stmt
))
4431 if (!is_gimple_assign (stmt
))
4433 lhs
= gimple_assign_lhs (stmt
);
4434 if (TREE_CODE (lhs
) != SSA_NAME
)
4436 if (gimple_assign_rhs_could_trap_p (stmt
))
4438 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
4440 gimple
*use_stmt
= USE_STMT (use_p
);
4441 if (is_gimple_debug (use_stmt
))
4443 if (gimple_bb (use_stmt
) != bb
)
4450 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4451 return true and fill in *OPS recursively. */
4454 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
4457 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4461 if (!is_reassociable_op (stmt
, code
, loop
))
4464 rhs
[0] = gimple_assign_rhs1 (stmt
);
4465 rhs
[1] = gimple_assign_rhs2 (stmt
);
4466 gimple_set_visited (stmt
, true);
4467 for (i
= 0; i
< 2; i
++)
4468 if (TREE_CODE (rhs
[i
]) == SSA_NAME
4469 && !get_ops (rhs
[i
], code
, ops
, loop
)
4470 && has_single_use (rhs
[i
]))
4472 operand_entry
*oe
= operand_entry_pool
.allocate ();
4478 oe
->stmt_to_insert
= NULL
;
4479 ops
->safe_push (oe
);
4484 /* Find the ops that were added by get_ops starting from VAR, see if
4485 they were changed during update_range_test and if yes, create new
4489 update_ops (tree var
, enum tree_code code
, const vec
<operand_entry
*> &ops
,
4490 unsigned int *pidx
, class loop
*loop
)
4492 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4496 if (!is_reassociable_op (stmt
, code
, loop
))
4499 rhs
[0] = gimple_assign_rhs1 (stmt
);
4500 rhs
[1] = gimple_assign_rhs2 (stmt
);
4503 for (i
= 0; i
< 2; i
++)
4504 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
4506 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
4507 if (rhs
[2 + i
] == NULL_TREE
)
4509 if (has_single_use (rhs
[i
]))
4510 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
4512 rhs
[2 + i
] = rhs
[i
];
4515 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
4516 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
4518 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4519 var
= make_ssa_name (TREE_TYPE (var
));
4520 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
4522 gimple_set_uid (g
, gimple_uid (stmt
));
4523 gimple_set_visited (g
, true);
4524 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4529 /* Structure to track the initial value passed to get_ops and
4530 the range in the ops vector for each basic block. */
4532 struct inter_bb_range_test_entry
4535 unsigned int first_idx
, last_idx
;
4538 /* Inter-bb range test optimization.
4540 Returns TRUE if a gimple conditional is optimized to a true/false,
4541 otherwise return FALSE.
4543 This indicates to the caller that it should run a CFG cleanup pass
4544 once reassociation is completed. */
4547 maybe_optimize_range_tests (gimple
*stmt
)
4549 basic_block first_bb
= gimple_bb (stmt
);
4550 basic_block last_bb
= first_bb
;
4551 basic_block other_bb
= NULL
;
4555 auto_vec
<operand_entry
*> ops
;
4556 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
4557 bool any_changes
= false;
4558 bool cfg_cleanup_needed
= false;
4560 /* Consider only basic blocks that end with GIMPLE_COND or
4561 a cast statement satisfying final_range_test_p. All
4562 but the last bb in the first_bb .. last_bb range
4563 should end with GIMPLE_COND. */
4564 if (gimple_code (stmt
) == GIMPLE_COND
)
4566 if (EDGE_COUNT (first_bb
->succs
) != 2)
4567 return cfg_cleanup_needed
;
4569 else if (final_range_test_p (stmt
))
4570 other_bb
= single_succ (first_bb
);
4572 return cfg_cleanup_needed
;
4574 if (stmt_could_throw_p (cfun
, stmt
))
4575 return cfg_cleanup_needed
;
4577 /* As relative ordering of post-dominator sons isn't fixed,
4578 maybe_optimize_range_tests can be called first on any
4579 bb in the range we want to optimize. So, start searching
4580 backwards, if first_bb can be set to a predecessor. */
4581 while (single_pred_p (first_bb
))
4583 basic_block pred_bb
= single_pred (first_bb
);
4584 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, NULL
, true))
4586 if (!no_side_effect_bb (first_bb
))
4590 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4591 Before starting forward search in last_bb successors, find
4592 out the other_bb. */
4593 if (first_bb
== last_bb
)
4596 /* As non-GIMPLE_COND last stmt always terminates the range,
4597 if forward search didn't discover anything, just give up. */
4598 if (gimple_code (stmt
) != GIMPLE_COND
)
4599 return cfg_cleanup_needed
;
4600 /* Look at both successors. Either it ends with a GIMPLE_COND
4601 and satisfies suitable_cond_bb, or ends with a cast and
4602 other_bb is that cast's successor. */
4603 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
4604 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
4605 || e
->dest
== first_bb
)
4606 return cfg_cleanup_needed
;
4607 else if (single_pred_p (e
->dest
))
4609 stmt
= last_stmt (e
->dest
);
4611 && gimple_code (stmt
) == GIMPLE_COND
4612 && EDGE_COUNT (e
->dest
->succs
) == 2)
4614 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
,
4621 && final_range_test_p (stmt
)
4622 && find_edge (first_bb
, single_succ (e
->dest
)))
4624 other_bb
= single_succ (e
->dest
);
4625 if (other_bb
== first_bb
)
4629 if (other_bb
== NULL
)
4630 return cfg_cleanup_needed
;
4632 /* Now do the forward search, moving last_bb to successor bbs
4633 that aren't other_bb. */
4634 while (EDGE_COUNT (last_bb
->succs
) == 2)
4636 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
4637 if (e
->dest
!= other_bb
)
4641 if (!single_pred_p (e
->dest
))
4643 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, NULL
, false))
4645 if (!no_side_effect_bb (e
->dest
))
4649 if (first_bb
== last_bb
)
4650 return cfg_cleanup_needed
;
4651 /* Here basic blocks first_bb through last_bb's predecessor
4652 end with GIMPLE_COND, all of them have one of the edges to
4653 other_bb and another to another block in the range,
4654 all blocks except first_bb don't have side-effects and
4655 last_bb ends with either GIMPLE_COND, or cast satisfying
4656 final_range_test_p. */
4657 for (bb
= last_bb
; ; bb
= single_pred (bb
))
4659 enum tree_code code
;
4661 inter_bb_range_test_entry bb_ent
;
4663 bb_ent
.op
= NULL_TREE
;
4664 bb_ent
.first_idx
= ops
.length ();
4665 bb_ent
.last_idx
= bb_ent
.first_idx
;
4666 e
= find_edge (bb
, other_bb
);
4667 stmt
= last_stmt (bb
);
4668 gimple_set_visited (stmt
, true);
4669 if (gimple_code (stmt
) != GIMPLE_COND
)
4671 use_operand_p use_p
;
4676 lhs
= gimple_assign_lhs (stmt
);
4677 rhs
= gimple_assign_rhs1 (stmt
);
4678 gcc_assert (bb
== last_bb
);
4687 # _345 = PHI <_123(N), 1(...), 1(...)>
4689 or 0 instead of 1. If it is 0, the _234
4690 range test is anded together with all the
4691 other range tests, if it is 1, it is ored with
4693 single_imm_use (lhs
, &use_p
, &phi
);
4694 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
4695 e2
= find_edge (first_bb
, other_bb
);
4697 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
4698 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
4699 code
= BIT_AND_EXPR
;
4702 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
4703 code
= BIT_IOR_EXPR
;
4706 /* If _234 SSA_NAME_DEF_STMT is
4708 (or &, corresponding to 1/0 in the phi arguments,
4709 push into ops the individual range test arguments
4710 of the bitwise or resp. and, recursively. */
4711 if (TREE_CODE (rhs
) == SSA_NAME
4712 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4714 && !get_ops (rhs
, code
, &ops
,
4715 loop_containing_stmt (stmt
))
4716 && has_single_use (rhs
))
4718 /* Otherwise, push the _234 range test itself. */
4719 operand_entry
*oe
= operand_entry_pool
.allocate ();
4725 oe
->stmt_to_insert
= NULL
;
4730 else if (is_gimple_assign (stmt
)
4731 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4733 && !get_ops (lhs
, code
, &ops
,
4734 loop_containing_stmt (stmt
))
4735 && has_single_use (lhs
))
4737 operand_entry
*oe
= operand_entry_pool
.allocate ();
4748 bb_ent
.last_idx
= ops
.length ();
4751 bbinfo
.safe_push (bb_ent
);
4754 else if (bb
== last_bb
)
4756 /* For last_bb, handle also:
4758 goto <bb 6>; [34.00%]
4760 goto <bb 7>; [66.00%]
4762 <bb 6> [local count: 79512730]:
4764 <bb 7> [local count: 1073741824]:
4765 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4766 where bb 7 is OTHER_BB, but the PHI values from the
4767 earlier bbs match the path through the empty bb
4769 bool test_swapped_p
= false;
4770 bool ok
= suitable_cond_bb (single_pred (last_bb
), last_bb
,
4771 &other_bb
, &test_swapped_p
, true);
4774 e
= EDGE_SUCC (bb
, e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4776 /* Otherwise stmt is GIMPLE_COND. */
4777 code
= gimple_cond_code (stmt
);
4778 lhs
= gimple_cond_lhs (stmt
);
4779 rhs
= gimple_cond_rhs (stmt
);
4780 if (TREE_CODE (lhs
) == SSA_NAME
4781 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4782 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4783 || rhs
!= boolean_false_node
4784 /* Either push into ops the individual bitwise
4785 or resp. and operands, depending on which
4786 edge is other_bb. */
4787 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
4788 ^ (code
== EQ_EXPR
))
4789 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
4790 loop_containing_stmt (stmt
))))
4792 /* Or push the GIMPLE_COND stmt itself. */
4793 operand_entry
*oe
= operand_entry_pool
.allocate ();
4796 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
4797 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
4798 /* oe->op = NULL signs that there is no SSA_NAME
4799 for the range test, and oe->id instead is the
4800 basic block number, at which's end the GIMPLE_COND
4804 oe
->stmt_to_insert
= NULL
;
4809 else if (ops
.length () > bb_ent
.first_idx
)
4812 bb_ent
.last_idx
= ops
.length ();
4814 bbinfo
.safe_push (bb_ent
);
4818 if (ops
.length () > 1)
4819 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
, first_bb
);
4822 unsigned int idx
, max_idx
= 0;
4823 /* update_ops relies on has_single_use predicates returning the
4824 same values as it did during get_ops earlier. Additionally it
4825 never removes statements, only adds new ones and it should walk
4826 from the single imm use and check the predicate already before
4827 making those changes.
4828 On the other side, the handling of GIMPLE_COND directly can turn
4829 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4830 it needs to be done in a separate loop afterwards. */
4831 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4833 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4834 && bbinfo
[idx
].op
!= NULL_TREE
)
4839 stmt
= last_stmt (bb
);
4840 new_op
= update_ops (bbinfo
[idx
].op
,
4842 ops
[bbinfo
[idx
].first_idx
]->rank
,
4843 ops
, &bbinfo
[idx
].first_idx
,
4844 loop_containing_stmt (stmt
));
4845 if (new_op
== NULL_TREE
)
4847 gcc_assert (bb
== last_bb
);
4848 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
4850 if (bbinfo
[idx
].op
!= new_op
)
4852 imm_use_iterator iter
;
4853 use_operand_p use_p
;
4854 gimple
*use_stmt
, *cast_or_tcc_cmp_stmt
= NULL
;
4856 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
4857 if (is_gimple_debug (use_stmt
))
4859 else if (gimple_code (use_stmt
) == GIMPLE_COND
4860 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4861 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4862 SET_USE (use_p
, new_op
);
4863 else if ((is_gimple_assign (use_stmt
)
4865 (gimple_assign_rhs_code (use_stmt
))
4866 == tcc_comparison
)))
4867 cast_or_tcc_cmp_stmt
= use_stmt
;
4868 else if (gimple_assign_cast_p (use_stmt
))
4869 cast_or_tcc_cmp_stmt
= use_stmt
;
4873 if (cast_or_tcc_cmp_stmt
)
4875 gcc_assert (bb
== last_bb
);
4876 tree lhs
= gimple_assign_lhs (cast_or_tcc_cmp_stmt
);
4877 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
4878 enum tree_code rhs_code
4879 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt
)
4880 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt
)
4883 if (is_gimple_min_invariant (new_op
))
4885 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
4886 g
= gimple_build_assign (new_lhs
, new_op
);
4889 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
4890 gimple_stmt_iterator gsi
4891 = gsi_for_stmt (cast_or_tcc_cmp_stmt
);
4892 gimple_set_uid (g
, gimple_uid (cast_or_tcc_cmp_stmt
));
4893 gimple_set_visited (g
, true);
4894 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4895 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
4896 if (is_gimple_debug (use_stmt
))
4898 else if (gimple_code (use_stmt
) == GIMPLE_COND
4899 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4900 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4901 SET_USE (use_p
, new_lhs
);
4910 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4912 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4913 && bbinfo
[idx
].op
== NULL_TREE
4914 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
4916 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
4921 /* If we collapse the conditional to a true/false
4922 condition, then bubble that knowledge up to our caller. */
4923 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
4925 gimple_cond_make_false (cond_stmt
);
4926 cfg_cleanup_needed
= true;
4928 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
4930 gimple_cond_make_true (cond_stmt
);
4931 cfg_cleanup_needed
= true;
4935 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
4936 gimple_cond_set_lhs (cond_stmt
,
4937 ops
[bbinfo
[idx
].first_idx
]->op
);
4938 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
4940 update_stmt (cond_stmt
);
4946 /* The above changes could result in basic blocks after the first
4947 modified one, up to and including last_bb, to be executed even if
4948 they would not be in the original program. If the value ranges of
4949 assignment lhs' in those bbs were dependent on the conditions
4950 guarding those basic blocks which now can change, the VRs might
4951 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4952 are only used within the same bb, it should be not a big deal if
4953 we just reset all the VRs in those bbs. See PR68671. */
4954 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
4955 reset_flow_sensitive_info_in_bb (bb
);
4957 return cfg_cleanup_needed
;
4960 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4961 of STMT in it's operands. This is also known as a "destructive
4962 update" operation. */
4965 is_phi_for_stmt (gimple
*stmt
, tree operand
)
4970 use_operand_p arg_p
;
4973 if (TREE_CODE (operand
) != SSA_NAME
)
4976 lhs
= gimple_assign_lhs (stmt
);
4978 def_stmt
= SSA_NAME_DEF_STMT (operand
);
4979 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
4983 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
4984 if (lhs
== USE_FROM_PTR (arg_p
))
4989 /* Remove def stmt of VAR if VAR has zero uses and recurse
4990 on rhs1 operand if so. */
4993 remove_visited_stmt_chain (tree var
)
4996 gimple_stmt_iterator gsi
;
5000 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
5002 stmt
= SSA_NAME_DEF_STMT (var
);
5003 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
5005 var
= gimple_assign_rhs1 (stmt
);
5006 gsi
= gsi_for_stmt (stmt
);
5007 reassoc_remove_stmt (&gsi
);
5008 release_defs (stmt
);
5015 /* This function checks three consequtive operands in
5016 passed operands vector OPS starting from OPINDEX and
5017 swaps two operands if it is profitable for binary operation
5018 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5020 We pair ops with the same rank if possible.
5022 The alternative we try is to see if STMT is a destructive
5023 update style statement, which is like:
5026 In that case, we want to use the destructive update form to
5027 expose the possible vectorizer sum reduction opportunity.
5028 In that case, the third operand will be the phi node. This
5029 check is not performed if STMT is null.
5031 We could, of course, try to be better as noted above, and do a
5032 lot of work to try to find these opportunities in >3 operand
5033 cases, but it is unlikely to be worth it. */
5036 swap_ops_for_binary_stmt (const vec
<operand_entry
*> &ops
,
5037 unsigned int opindex
, gimple
*stmt
)
5039 operand_entry
*oe1
, *oe2
, *oe3
;
5042 oe2
= ops
[opindex
+ 1];
5043 oe3
= ops
[opindex
+ 2];
5045 if ((oe1
->rank
== oe2
->rank
5046 && oe2
->rank
!= oe3
->rank
)
5047 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
5048 && !is_phi_for_stmt (stmt
, oe1
->op
)
5049 && !is_phi_for_stmt (stmt
, oe2
->op
)))
5050 std::swap (*oe1
, *oe3
);
5051 else if ((oe1
->rank
== oe3
->rank
5052 && oe2
->rank
!= oe3
->rank
)
5053 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
5054 && !is_phi_for_stmt (stmt
, oe1
->op
)
5055 && !is_phi_for_stmt (stmt
, oe3
->op
)))
5056 std::swap (*oe1
, *oe2
);
5059 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5060 two definitions, otherwise return STMT. */
5062 static inline gimple
*
5063 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
)
5065 if (TREE_CODE (rhs1
) == SSA_NAME
5066 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
5067 stmt
= SSA_NAME_DEF_STMT (rhs1
);
5068 if (TREE_CODE (rhs2
) == SSA_NAME
5069 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
5070 stmt
= SSA_NAME_DEF_STMT (rhs2
);
5074 /* If the stmt that defines operand has to be inserted, insert it
5077 insert_stmt_before_use (gimple
*stmt
, gimple
*stmt_to_insert
)
5079 gcc_assert (is_gimple_assign (stmt_to_insert
));
5080 tree rhs1
= gimple_assign_rhs1 (stmt_to_insert
);
5081 tree rhs2
= gimple_assign_rhs2 (stmt_to_insert
);
5082 gimple
*insert_point
= find_insert_point (stmt
, rhs1
, rhs2
);
5083 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_point
);
5084 gimple_set_uid (stmt_to_insert
, gimple_uid (insert_point
));
5086 /* If the insert point is not stmt, then insert_point would be
5087 the point where operand rhs1 or rhs2 is defined. In this case,
5088 stmt_to_insert has to be inserted afterwards. This would
5089 only happen when the stmt insertion point is flexible. */
5090 if (stmt
== insert_point
)
5091 gsi_insert_before (&gsi
, stmt_to_insert
, GSI_NEW_STMT
);
5093 insert_stmt_after (stmt_to_insert
, insert_point
);
5097 /* Recursively rewrite our linearized statements so that the operators
5098 match those in OPS[OPINDEX], putting the computation in rank
5099 order. Return new lhs.
5100 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5101 the current stmt and during recursive invocations.
5102 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5103 recursive invocations. */
5106 rewrite_expr_tree (gimple
*stmt
, enum tree_code rhs_code
, unsigned int opindex
,
5107 const vec
<operand_entry
*> &ops
, bool changed
,
5110 tree rhs1
= gimple_assign_rhs1 (stmt
);
5111 tree rhs2
= gimple_assign_rhs2 (stmt
);
5112 tree lhs
= gimple_assign_lhs (stmt
);
5115 /* The final recursion case for this function is that you have
5116 exactly two operations left.
5117 If we had exactly one op in the entire list to start with, we
5118 would have never called this function, and the tail recursion
5119 rewrites them one at a time. */
5120 if (opindex
+ 2 == ops
.length ())
5122 operand_entry
*oe1
, *oe2
;
5125 oe2
= ops
[opindex
+ 1];
5127 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
5129 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5130 unsigned int uid
= gimple_uid (stmt
);
5132 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5134 fprintf (dump_file
, "Transforming ");
5135 print_gimple_stmt (dump_file
, stmt
, 0);
5138 /* If the stmt that defines operand has to be inserted, insert it
5140 if (oe1
->stmt_to_insert
)
5141 insert_stmt_before_use (stmt
, oe1
->stmt_to_insert
);
5142 if (oe2
->stmt_to_insert
)
5143 insert_stmt_before_use (stmt
, oe2
->stmt_to_insert
);
5144 /* Even when changed is false, reassociation could have e.g. removed
5145 some redundant operations, so unless we are just swapping the
5146 arguments or unless there is no change at all (then we just
5147 return lhs), force creation of a new SSA_NAME. */
5148 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
5150 gimple
*insert_point
5151 = find_insert_point (stmt
, oe1
->op
, oe2
->op
);
5152 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5154 = gimple_build_assign (lhs
, rhs_code
,
5156 gimple_set_uid (stmt
, uid
);
5157 gimple_set_visited (stmt
, true);
5158 if (insert_point
== gsi_stmt (gsi
))
5159 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5161 insert_stmt_after (stmt
, insert_point
);
5165 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
5167 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
5168 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
5172 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
5173 remove_visited_stmt_chain (rhs1
);
5175 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5177 fprintf (dump_file
, " into ");
5178 print_gimple_stmt (dump_file
, stmt
, 0);
5184 /* If we hit here, we should have 3 or more ops left. */
5185 gcc_assert (opindex
+ 2 < ops
.length ());
5187 /* Rewrite the next operator. */
5190 /* If the stmt that defines operand has to be inserted, insert it
5192 if (oe
->stmt_to_insert
)
5193 insert_stmt_before_use (stmt
, oe
->stmt_to_insert
);
5195 /* Recurse on the LHS of the binary operator, which is guaranteed to
5196 be the non-leaf side. */
5198 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), rhs_code
, opindex
+ 1, ops
,
5199 changed
|| oe
->op
!= rhs2
|| next_changed
,
5202 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
5204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5206 fprintf (dump_file
, "Transforming ");
5207 print_gimple_stmt (dump_file
, stmt
, 0);
5210 /* If changed is false, this is either opindex == 0
5211 or all outer rhs2's were equal to corresponding oe->op,
5212 and powi_result is NULL.
5213 That means lhs is equivalent before and after reassociation.
5214 Otherwise ensure the old lhs SSA_NAME is not reused and
5215 create a new stmt as well, so that any debug stmts will be
5216 properly adjusted. */
5219 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5220 unsigned int uid
= gimple_uid (stmt
);
5221 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
5223 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5224 stmt
= gimple_build_assign (lhs
, rhs_code
,
5226 gimple_set_uid (stmt
, uid
);
5227 gimple_set_visited (stmt
, true);
5228 if (insert_point
== gsi_stmt (gsi
))
5229 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5231 insert_stmt_after (stmt
, insert_point
);
5235 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
5237 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
5238 gimple_assign_set_rhs2 (stmt
, oe
->op
);
5242 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5244 fprintf (dump_file
, " into ");
5245 print_gimple_stmt (dump_file
, stmt
, 0);
5251 /* Find out how many cycles we need to compute statements chain.
5252 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5253 maximum number of independent statements we may execute per cycle. */
5256 get_required_cycles (int ops_num
, int cpu_width
)
5262 /* While we have more than 2 * cpu_width operands
5263 we may reduce number of operands by cpu_width
5265 res
= ops_num
/ (2 * cpu_width
);
5267 /* Remained operands count may be reduced twice per cycle
5268 until we have only one operand. */
5269 rest
= (unsigned)(ops_num
- res
* cpu_width
);
5270 elog
= exact_log2 (rest
);
5274 res
+= floor_log2 (rest
) + 1;
5279 /* Returns an optimal number of registers to use for computation of
5280 given statements. */
5283 get_reassociation_width (int ops_num
, enum tree_code opc
,
5286 int param_width
= param_tree_reassoc_width
;
5291 if (param_width
> 0)
5292 width
= param_width
;
5294 width
= targetm
.sched
.reassociation_width (opc
, mode
);
5299 /* Get the minimal time required for sequence computation. */
5300 cycles_best
= get_required_cycles (ops_num
, width
);
5302 /* Check if we may use less width and still compute sequence for
5303 the same time. It will allow us to reduce registers usage.
5304 get_required_cycles is monotonically increasing with lower width
5305 so we can perform a binary search for the minimal width that still
5306 results in the optimal cycle count. */
5308 while (width
> width_min
)
5310 int width_mid
= (width
+ width_min
) / 2;
5312 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
5314 else if (width_min
< width_mid
)
5315 width_min
= width_mid
;
5323 /* Recursively rewrite our linearized statements so that the operators
5324 match those in OPS[OPINDEX], putting the computation in rank
5325 order and trying to allow operations to be executed in
5329 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
5330 const vec
<operand_entry
*> &ops
)
5332 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
5333 int op_num
= ops
.length ();
5334 gcc_assert (op_num
> 0);
5335 int stmt_num
= op_num
- 1;
5336 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
5337 int op_index
= op_num
- 1;
5339 int ready_stmts_end
= 0;
5341 gimple
*stmt1
= NULL
, *stmt2
= NULL
;
5342 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
5344 /* We start expression rewriting from the top statements.
5345 So, in this loop we create a full list of statements
5346 we will work with. */
5347 stmts
[stmt_num
- 1] = stmt
;
5348 for (i
= stmt_num
- 2; i
>= 0; i
--)
5349 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
5351 for (i
= 0; i
< stmt_num
; i
++)
5355 /* Determine whether we should use results of
5356 already handled statements or not. */
5357 if (ready_stmts_end
== 0
5358 && (i
- stmt_index
>= width
|| op_index
< 1))
5359 ready_stmts_end
= i
;
5361 /* Now we choose operands for the next statement. Non zero
5362 value in ready_stmts_end means here that we should use
5363 the result of already generated statements as new operand. */
5364 if (ready_stmts_end
> 0)
5366 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
5367 if (ready_stmts_end
> stmt_index
)
5368 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
5369 else if (op_index
>= 0)
5371 operand_entry
*oe
= ops
[op_index
--];
5372 stmt2
= oe
->stmt_to_insert
;
5377 gcc_assert (stmt_index
< i
);
5378 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
5381 if (stmt_index
>= ready_stmts_end
)
5382 ready_stmts_end
= 0;
5387 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
5388 operand_entry
*oe2
= ops
[op_index
--];
5389 operand_entry
*oe1
= ops
[op_index
--];
5391 stmt2
= oe2
->stmt_to_insert
;
5393 stmt1
= oe1
->stmt_to_insert
;
5396 /* If we emit the last statement then we should put
5397 operands into the last statement. It will also
5399 if (op_index
< 0 && stmt_index
== i
)
5402 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5404 fprintf (dump_file
, "Transforming ");
5405 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5408 /* If the stmt that defines operand has to be inserted, insert it
5411 insert_stmt_before_use (stmts
[i
], stmt1
);
5413 insert_stmt_before_use (stmts
[i
], stmt2
);
5414 stmt1
= stmt2
= NULL
;
5416 /* We keep original statement only for the last one. All
5417 others are recreated. */
5418 if (i
== stmt_num
- 1)
5420 gimple_assign_set_rhs1 (stmts
[i
], op1
);
5421 gimple_assign_set_rhs2 (stmts
[i
], op2
);
5422 update_stmt (stmts
[i
]);
5426 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
5427 gimple_set_visited (stmts
[i
], true);
5429 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5431 fprintf (dump_file
, " into ");
5432 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5436 remove_visited_stmt_chain (last_rhs1
);
5439 /* Transform STMT, which is really (A +B) + (C + D) into the left
5440 linear form, ((A+B)+C)+D.
5441 Recurse on D if necessary. */
5444 linearize_expr (gimple
*stmt
)
5446 gimple_stmt_iterator gsi
;
5447 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
5448 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5449 gimple
*oldbinrhs
= binrhs
;
5450 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5451 gimple
*newbinrhs
= NULL
;
5452 class loop
*loop
= loop_containing_stmt (stmt
);
5453 tree lhs
= gimple_assign_lhs (stmt
);
5455 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
5456 && is_reassociable_op (binrhs
, rhscode
, loop
));
5458 gsi
= gsi_for_stmt (stmt
);
5460 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
5461 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
5462 gimple_assign_rhs_code (binrhs
),
5463 gimple_assign_lhs (binlhs
),
5464 gimple_assign_rhs2 (binrhs
));
5465 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
5466 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
5467 gimple_set_uid (binrhs
, gimple_uid (stmt
));
5469 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
5470 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5472 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5474 fprintf (dump_file
, "Linearized: ");
5475 print_gimple_stmt (dump_file
, stmt
, 0);
5478 reassociate_stats
.linearized
++;
5481 gsi
= gsi_for_stmt (oldbinrhs
);
5482 reassoc_remove_stmt (&gsi
);
5483 release_defs (oldbinrhs
);
5485 gimple_set_visited (stmt
, true);
5486 gimple_set_visited (binlhs
, true);
5487 gimple_set_visited (binrhs
, true);
5489 /* Tail recurse on the new rhs if it still needs reassociation. */
5490 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
5491 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5492 want to change the algorithm while converting to tuples. */
5493 linearize_expr (stmt
);
5496 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5497 it. Otherwise, return NULL. */
5500 get_single_immediate_use (tree lhs
)
5502 use_operand_p immuse
;
5505 if (TREE_CODE (lhs
) == SSA_NAME
5506 && single_imm_use (lhs
, &immuse
, &immusestmt
)
5507 && is_gimple_assign (immusestmt
))
5513 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5514 representing the negated value. Insertions of any necessary
5515 instructions go before GSI.
5516 This function is recursive in that, if you hand it "a_5" as the
5517 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5518 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5521 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
5523 gimple
*negatedefstmt
= NULL
;
5524 tree resultofnegate
;
5525 gimple_stmt_iterator gsi
;
5528 /* If we are trying to negate a name, defined by an add, negate the
5529 add operands instead. */
5530 if (TREE_CODE (tonegate
) == SSA_NAME
)
5531 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
5532 if (TREE_CODE (tonegate
) == SSA_NAME
5533 && is_gimple_assign (negatedefstmt
)
5534 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
5535 && has_single_use (gimple_assign_lhs (negatedefstmt
))
5536 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
5538 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
5539 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
5540 tree lhs
= gimple_assign_lhs (negatedefstmt
);
5543 gsi
= gsi_for_stmt (negatedefstmt
);
5544 rhs1
= negate_value (rhs1
, &gsi
);
5546 gsi
= gsi_for_stmt (negatedefstmt
);
5547 rhs2
= negate_value (rhs2
, &gsi
);
5549 gsi
= gsi_for_stmt (negatedefstmt
);
5550 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5551 gimple_set_visited (negatedefstmt
, true);
5552 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
5553 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
5554 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5558 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
5559 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
5560 NULL_TREE
, true, GSI_SAME_STMT
);
5562 uid
= gimple_uid (gsi_stmt (gsi
));
5563 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
5565 gimple
*stmt
= gsi_stmt (gsi
);
5566 if (gimple_uid (stmt
) != 0)
5568 gimple_set_uid (stmt
, uid
);
5570 return resultofnegate
;
5573 /* Return true if we should break up the subtract in STMT into an add
5574 with negate. This is true when we the subtract operands are really
5575 adds, or the subtract itself is used in an add expression. In
5576 either case, breaking up the subtract into an add with negate
5577 exposes the adds to reassociation. */
5580 should_break_up_subtract (gimple
*stmt
)
5582 tree lhs
= gimple_assign_lhs (stmt
);
5583 tree binlhs
= gimple_assign_rhs1 (stmt
);
5584 tree binrhs
= gimple_assign_rhs2 (stmt
);
5586 class loop
*loop
= loop_containing_stmt (stmt
);
5588 if (TREE_CODE (binlhs
) == SSA_NAME
5589 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
5592 if (TREE_CODE (binrhs
) == SSA_NAME
5593 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
5596 if (TREE_CODE (lhs
) == SSA_NAME
5597 && (immusestmt
= get_single_immediate_use (lhs
))
5598 && is_gimple_assign (immusestmt
)
5599 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
5600 || (gimple_assign_rhs_code (immusestmt
) == MINUS_EXPR
5601 && gimple_assign_rhs1 (immusestmt
) == lhs
)
5602 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
5607 /* Transform STMT from A - B into A + -B. */
5610 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
5612 tree rhs1
= gimple_assign_rhs1 (stmt
);
5613 tree rhs2
= gimple_assign_rhs2 (stmt
);
5615 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5617 fprintf (dump_file
, "Breaking up subtract ");
5618 print_gimple_stmt (dump_file
, stmt
, 0);
5621 rhs2
= negate_value (rhs2
, gsip
);
5622 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
5626 /* Determine whether STMT is a builtin call that raises an SSA name
5627 to an integer power and has only one use. If so, and this is early
5628 reassociation and unsafe math optimizations are permitted, place
5629 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5630 If any of these conditions does not hold, return FALSE. */
5633 acceptable_pow_call (gcall
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
5636 REAL_VALUE_TYPE c
, cint
;
5638 switch (gimple_call_combined_fn (stmt
))
5641 if (flag_errno_math
)
5644 *base
= gimple_call_arg (stmt
, 0);
5645 arg1
= gimple_call_arg (stmt
, 1);
5647 if (TREE_CODE (arg1
) != REAL_CST
)
5650 c
= TREE_REAL_CST (arg1
);
5652 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
5655 *exponent
= real_to_integer (&c
);
5656 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
5657 if (!real_identical (&c
, &cint
))
5663 *base
= gimple_call_arg (stmt
, 0);
5664 arg1
= gimple_call_arg (stmt
, 1);
5666 if (!tree_fits_shwi_p (arg1
))
5669 *exponent
= tree_to_shwi (arg1
);
5676 /* Expanding negative exponents is generally unproductive, so we don't
5677 complicate matters with those. Exponents of zero and one should
5678 have been handled by expression folding. */
5679 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
5685 /* Try to derive and add operand entry for OP to *OPS. Return false if
5689 try_special_add_to_ops (vec
<operand_entry
*> *ops
,
5690 enum tree_code code
,
5691 tree op
, gimple
* def_stmt
)
5693 tree base
= NULL_TREE
;
5694 HOST_WIDE_INT exponent
= 0;
5696 if (TREE_CODE (op
) != SSA_NAME
5697 || ! has_single_use (op
))
5700 if (code
== MULT_EXPR
5701 && reassoc_insert_powi_p
5702 && flag_unsafe_math_optimizations
5703 && is_gimple_call (def_stmt
)
5704 && acceptable_pow_call (as_a
<gcall
*> (def_stmt
), &base
, &exponent
))
5706 add_repeat_to_ops_vec (ops
, base
, exponent
);
5707 gimple_set_visited (def_stmt
, true);
5710 else if (code
== MULT_EXPR
5711 && is_gimple_assign (def_stmt
)
5712 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
5713 && !HONOR_SNANS (TREE_TYPE (op
))
5714 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op
))
5715 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op
))))
5717 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5718 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
5719 add_to_ops_vec (ops
, rhs1
);
5720 add_to_ops_vec (ops
, cst
);
5721 gimple_set_visited (def_stmt
, true);
5728 /* Recursively linearize a binary expression that is the RHS of STMT.
5729 Place the operands of the expression tree in the vector named OPS. */
5732 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
5733 bool is_associative
, bool set_visited
)
5735 tree binlhs
= gimple_assign_rhs1 (stmt
);
5736 tree binrhs
= gimple_assign_rhs2 (stmt
);
5737 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
5738 bool binlhsisreassoc
= false;
5739 bool binrhsisreassoc
= false;
5740 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5741 class loop
*loop
= loop_containing_stmt (stmt
);
5744 gimple_set_visited (stmt
, true);
5746 if (TREE_CODE (binlhs
) == SSA_NAME
)
5748 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
5749 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
5750 && !stmt_could_throw_p (cfun
, binlhsdef
));
5753 if (TREE_CODE (binrhs
) == SSA_NAME
)
5755 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
5756 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
5757 && !stmt_could_throw_p (cfun
, binrhsdef
));
5760 /* If the LHS is not reassociable, but the RHS is, we need to swap
5761 them. If neither is reassociable, there is nothing we can do, so
5762 just put them in the ops vector. If the LHS is reassociable,
5763 linearize it. If both are reassociable, then linearize the RHS
5766 if (!binlhsisreassoc
)
5768 /* If this is not a associative operation like division, give up. */
5769 if (!is_associative
)
5771 add_to_ops_vec (ops
, binrhs
);
5775 if (!binrhsisreassoc
)
5778 if (try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5779 /* If we add ops for the rhs we expect to be able to recurse
5780 to it via the lhs during expression rewrite so swap
5784 add_to_ops_vec (ops
, binrhs
);
5786 if (!try_special_add_to_ops (ops
, rhscode
, binlhs
, binlhsdef
))
5787 add_to_ops_vec (ops
, binlhs
);
5793 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5795 fprintf (dump_file
, "swapping operands of ");
5796 print_gimple_stmt (dump_file
, stmt
, 0);
5799 swap_ssa_operands (stmt
,
5800 gimple_assign_rhs1_ptr (stmt
),
5801 gimple_assign_rhs2_ptr (stmt
));
5804 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5806 fprintf (dump_file
, " is now ");
5807 print_gimple_stmt (dump_file
, stmt
, 0);
5809 if (!binrhsisreassoc
)
5812 /* We want to make it so the lhs is always the reassociative op,
5814 std::swap (binlhs
, binrhs
);
5816 else if (binrhsisreassoc
)
5818 linearize_expr (stmt
);
5819 binlhs
= gimple_assign_rhs1 (stmt
);
5820 binrhs
= gimple_assign_rhs2 (stmt
);
5823 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
5824 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
5826 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
5827 is_associative
, set_visited
);
5829 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5830 add_to_ops_vec (ops
, binrhs
);
5833 /* Repropagate the negates back into subtracts, since no other pass
5834 currently does it. */
5837 repropagate_negates (void)
5842 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
5844 gimple
*user
= get_single_immediate_use (negate
);
5846 if (!user
|| !is_gimple_assign (user
))
5849 /* The negate operand can be either operand of a PLUS_EXPR
5850 (it can be the LHS if the RHS is a constant for example).
5852 Force the negate operand to the RHS of the PLUS_EXPR, then
5853 transform the PLUS_EXPR into a MINUS_EXPR. */
5854 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
5856 /* If the negated operand appears on the LHS of the
5857 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5858 to force the negated operand to the RHS of the PLUS_EXPR. */
5859 if (gimple_assign_rhs1 (user
) == negate
)
5861 swap_ssa_operands (user
,
5862 gimple_assign_rhs1_ptr (user
),
5863 gimple_assign_rhs2_ptr (user
));
5866 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5867 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5868 if (gimple_assign_rhs2 (user
) == negate
)
5870 tree rhs1
= gimple_assign_rhs1 (user
);
5871 tree rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
5872 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
5873 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
5877 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
5879 if (gimple_assign_rhs1 (user
) == negate
)
5884 which we transform into
5887 This pushes down the negate which we possibly can merge
5888 into some other operation, hence insert it into the
5889 plus_negates vector. */
5890 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
5891 tree a
= gimple_assign_rhs1 (feed
);
5892 tree b
= gimple_assign_rhs2 (user
);
5893 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
5894 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
5895 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
5896 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
5897 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
5898 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
5899 user
= gsi_stmt (gsi2
);
5901 reassoc_remove_stmt (&gsi
);
5902 release_defs (feed
);
5903 plus_negates
.safe_push (gimple_assign_lhs (user
));
5907 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5908 rid of one operation. */
5909 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
5910 tree a
= gimple_assign_rhs1 (feed
);
5911 tree rhs1
= gimple_assign_rhs1 (user
);
5912 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
5913 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
5914 update_stmt (gsi_stmt (gsi
));
5920 /* Break up subtract operations in block BB.
5922 We do this top down because we don't know whether the subtract is
5923 part of a possible chain of reassociation except at the top.
5932 we want to break up k = t - q, but we won't until we've transformed q
5933 = b - r, which won't be broken up until we transform b = c - d.
5935 En passant, clear the GIMPLE visited flag on every statement
5936 and set UIDs within each basic block. */
5939 break_up_subtract_bb (basic_block bb
)
5941 gimple_stmt_iterator gsi
;
5943 unsigned int uid
= 1;
5945 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5947 gimple
*stmt
= gsi_stmt (gsi
);
5948 gimple_set_visited (stmt
, false);
5949 gimple_set_uid (stmt
, uid
++);
5951 if (!is_gimple_assign (stmt
)
5952 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt
)))
5953 || !can_reassociate_op_p (gimple_assign_lhs (stmt
)))
5956 /* Look for simple gimple subtract operations. */
5957 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
5959 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt
))
5960 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt
)))
5963 /* Check for a subtract used only in an addition. If this
5964 is the case, transform it into add of a negate for better
5965 reassociation. IE transform C = A-B into C = A + -B if C
5966 is only used in an addition. */
5967 if (should_break_up_subtract (stmt
))
5968 break_up_subtract (stmt
, &gsi
);
5970 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
5971 && can_reassociate_op_p (gimple_assign_rhs1 (stmt
)))
5972 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
5974 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
5976 son
= next_dom_son (CDI_DOMINATORS
, son
))
5977 break_up_subtract_bb (son
);
5980 /* Used for repeated factor analysis. */
5981 struct repeat_factor
5983 /* An SSA name that occurs in a multiply chain. */
5986 /* Cached rank of the factor. */
5989 /* Number of occurrences of the factor in the chain. */
5990 HOST_WIDE_INT count
;
5992 /* An SSA name representing the product of this factor and
5993 all factors appearing later in the repeated factor vector. */
5998 static vec
<repeat_factor
> repeat_factor_vec
;
6000 /* Used for sorting the repeat factor vector. Sort primarily by
6001 ascending occurrence count, secondarily by descending rank. */
6004 compare_repeat_factors (const void *x1
, const void *x2
)
6006 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
6007 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
6009 if (rf1
->count
!= rf2
->count
)
6010 return rf1
->count
- rf2
->count
;
6012 return rf2
->rank
- rf1
->rank
;
6015 /* Look for repeated operands in OPS in the multiply tree rooted at
6016 STMT. Replace them with an optimal sequence of multiplies and powi
6017 builtin calls, and remove the used operands from OPS. Return an
6018 SSA name representing the value of the replacement sequence. */
6021 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
6023 unsigned i
, j
, vec_len
;
6026 repeat_factor
*rf1
, *rf2
;
6027 repeat_factor rfnew
;
6028 tree result
= NULL_TREE
;
6029 tree target_ssa
, iter_result
;
6030 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
6031 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
6032 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
6033 gimple
*mul_stmt
, *pow_stmt
;
6035 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6036 target, unless type is integral. */
6037 if (!powi_fndecl
&& !INTEGRAL_TYPE_P (type
))
6040 /* Allocate the repeated factor vector. */
6041 repeat_factor_vec
.create (10);
6043 /* Scan the OPS vector for all SSA names in the product and build
6044 up a vector of occurrence counts for each factor. */
6045 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6047 if (TREE_CODE (oe
->op
) == SSA_NAME
)
6049 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6051 if (rf1
->factor
== oe
->op
)
6053 rf1
->count
+= oe
->count
;
6058 if (j
>= repeat_factor_vec
.length ())
6060 rfnew
.factor
= oe
->op
;
6061 rfnew
.rank
= oe
->rank
;
6062 rfnew
.count
= oe
->count
;
6063 rfnew
.repr
= NULL_TREE
;
6064 repeat_factor_vec
.safe_push (rfnew
);
6069 /* Sort the repeated factor vector by (a) increasing occurrence count,
6070 and (b) decreasing rank. */
6071 repeat_factor_vec
.qsort (compare_repeat_factors
);
6073 /* It is generally best to combine as many base factors as possible
6074 into a product before applying __builtin_powi to the result.
6075 However, the sort order chosen for the repeated factor vector
6076 allows us to cache partial results for the product of the base
6077 factors for subsequent use. When we already have a cached partial
6078 result from a previous iteration, it is best to make use of it
6079 before looking for another __builtin_pow opportunity.
6081 As an example, consider x * x * y * y * y * z * z * z * z.
6082 We want to first compose the product x * y * z, raise it to the
6083 second power, then multiply this by y * z, and finally multiply
6084 by z. This can be done in 5 multiplies provided we cache y * z
6085 for use in both expressions:
6093 If we instead ignored the cached y * z and first multiplied by
6094 the __builtin_pow opportunity z * z, we would get the inferior:
6103 vec_len
= repeat_factor_vec
.length ();
6105 /* Repeatedly look for opportunities to create a builtin_powi call. */
6108 HOST_WIDE_INT power
;
6110 /* First look for the largest cached product of factors from
6111 preceding iterations. If found, create a builtin_powi for
6112 it if the minimum occurrence count for its factors is at
6113 least 2, or just use this cached product as our next
6114 multiplicand if the minimum occurrence count is 1. */
6115 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6117 if (rf1
->repr
&& rf1
->count
> 0)
6127 iter_result
= rf1
->repr
;
6129 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6133 fputs ("Multiplying by cached product ", dump_file
);
6134 for (elt
= j
; elt
< vec_len
; elt
++)
6136 rf
= &repeat_factor_vec
[elt
];
6137 print_generic_expr (dump_file
, rf
->factor
);
6138 if (elt
< vec_len
- 1)
6139 fputs (" * ", dump_file
);
6141 fputs ("\n", dump_file
);
6146 if (INTEGRAL_TYPE_P (type
))
6148 gcc_assert (power
> 1);
6149 gimple_stmt_iterator gsip
= gsi
;
6151 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6153 gimple_stmt_iterator gsic
= gsi
;
6154 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6156 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6157 gimple_set_visited (gsi_stmt (gsic
), true);
6163 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6165 = gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6166 build_int_cst (integer_type_node
,
6168 gimple_call_set_lhs (pow_stmt
, iter_result
);
6169 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6170 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6171 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6174 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6178 fputs ("Building __builtin_pow call for cached product (",
6180 for (elt
= j
; elt
< vec_len
; elt
++)
6182 rf
= &repeat_factor_vec
[elt
];
6183 print_generic_expr (dump_file
, rf
->factor
);
6184 if (elt
< vec_len
- 1)
6185 fputs (" * ", dump_file
);
6187 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
6194 /* Otherwise, find the first factor in the repeated factor
6195 vector whose occurrence count is at least 2. If no such
6196 factor exists, there are no builtin_powi opportunities
6198 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6200 if (rf1
->count
>= 2)
6209 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6213 fputs ("Building __builtin_pow call for (", dump_file
);
6214 for (elt
= j
; elt
< vec_len
; elt
++)
6216 rf
= &repeat_factor_vec
[elt
];
6217 print_generic_expr (dump_file
, rf
->factor
);
6218 if (elt
< vec_len
- 1)
6219 fputs (" * ", dump_file
);
6221 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
6224 reassociate_stats
.pows_created
++;
6226 /* Visit each element of the vector in reverse order (so that
6227 high-occurrence elements are visited first, and within the
6228 same occurrence count, lower-ranked elements are visited
6229 first). Form a linear product of all elements in this order
6230 whose occurrencce count is at least that of element J.
6231 Record the SSA name representing the product of each element
6232 with all subsequent elements in the vector. */
6233 if (j
== vec_len
- 1)
6234 rf1
->repr
= rf1
->factor
;
6237 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
6241 rf1
= &repeat_factor_vec
[ii
];
6242 rf2
= &repeat_factor_vec
[ii
+ 1];
6244 /* Init the last factor's representative to be itself. */
6246 rf2
->repr
= rf2
->factor
;
6251 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6252 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
6254 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6255 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6256 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6257 rf1
->repr
= target_ssa
;
6259 /* Don't reprocess the multiply we just introduced. */
6260 gimple_set_visited (mul_stmt
, true);
6264 /* Form a call to __builtin_powi for the maximum product
6265 just formed, raised to the power obtained earlier. */
6266 rf1
= &repeat_factor_vec
[j
];
6267 if (INTEGRAL_TYPE_P (type
))
6269 gcc_assert (power
> 1);
6270 gimple_stmt_iterator gsip
= gsi
;
6272 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6274 gimple_stmt_iterator gsic
= gsi
;
6275 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6277 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6278 gimple_set_visited (gsi_stmt (gsic
), true);
6284 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6285 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6286 build_int_cst (integer_type_node
,
6288 gimple_call_set_lhs (pow_stmt
, iter_result
);
6289 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6290 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6291 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6295 /* If we previously formed at least one other builtin_powi call,
6296 form the product of this one and those others. */
6299 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6300 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
6301 result
, iter_result
);
6302 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6303 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6304 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6305 gimple_set_visited (mul_stmt
, true);
6306 result
= new_result
;
6309 result
= iter_result
;
6311 /* Decrement the occurrence count of each element in the product
6312 by the count found above, and remove this many copies of each
6314 for (i
= j
; i
< vec_len
; i
++)
6319 rf1
= &repeat_factor_vec
[i
];
6320 rf1
->count
-= power
;
6322 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
6324 if (oe
->op
== rf1
->factor
)
6328 ops
->ordered_remove (n
);
6344 /* At this point all elements in the repeated factor vector have a
6345 remaining occurrence count of 0 or 1, and those with a count of 1
6346 don't have cached representatives. Re-sort the ops vector and
6348 ops
->qsort (sort_by_operand_rank
);
6349 repeat_factor_vec
.release ();
6351 /* Return the final product computed herein. Note that there may
6352 still be some elements with single occurrence count left in OPS;
6353 those will be handled by the normal reassociation logic. */
6357 /* Attempt to optimize
6358 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6359 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6362 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
6366 unsigned int length
= ops
->length ();
6367 tree cst
= ops
->last ()->op
;
6369 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
6372 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6374 if (TREE_CODE (oe
->op
) == SSA_NAME
6375 && has_single_use (oe
->op
))
6377 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6378 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
6381 switch (gimple_call_combined_fn (old_call
))
6384 CASE_CFN_COPYSIGN_FN
:
6385 arg0
= gimple_call_arg (old_call
, 0);
6386 arg1
= gimple_call_arg (old_call
, 1);
6387 /* The first argument of copysign must be a constant,
6388 otherwise there's nothing to do. */
6389 if (TREE_CODE (arg0
) == REAL_CST
)
6391 tree type
= TREE_TYPE (arg0
);
6392 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
6393 /* If we couldn't fold to a single constant, skip it.
6394 That happens e.g. for inexact multiplication when
6396 if (mul
== NULL_TREE
)
6398 /* Instead of adjusting OLD_CALL, let's build a new
6399 call to not leak the LHS and prevent keeping bogus
6400 debug statements. DCE will clean up the old call. */
6402 if (gimple_call_internal_p (old_call
))
6403 new_call
= gimple_build_call_internal
6404 (IFN_COPYSIGN
, 2, mul
, arg1
);
6406 new_call
= gimple_build_call
6407 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
6408 tree lhs
= make_ssa_name (type
);
6409 gimple_call_set_lhs (new_call
, lhs
);
6410 gimple_set_location (new_call
,
6411 gimple_location (old_call
));
6412 insert_stmt_after (new_call
, old_call
);
6413 /* We've used the constant, get rid of it. */
6415 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
6416 /* Handle the CST1 < 0 case by negating the result. */
6419 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
6421 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
6422 insert_stmt_after (negate_stmt
, new_call
);
6427 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6429 fprintf (dump_file
, "Optimizing copysign: ");
6430 print_generic_expr (dump_file
, cst
);
6431 fprintf (dump_file
, " * COPYSIGN (");
6432 print_generic_expr (dump_file
, arg0
);
6433 fprintf (dump_file
, ", ");
6434 print_generic_expr (dump_file
, arg1
);
6435 fprintf (dump_file
, ") into %sCOPYSIGN (",
6436 cst1_neg
? "-" : "");
6437 print_generic_expr (dump_file
, mul
);
6438 fprintf (dump_file
, ", ");
6439 print_generic_expr (dump_file
, arg1
);
6440 fprintf (dump_file
, "\n");
6453 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6456 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
6460 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6462 fprintf (dump_file
, "Transforming ");
6463 print_gimple_stmt (dump_file
, stmt
, 0);
6466 rhs1
= gimple_assign_rhs1 (stmt
);
6467 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
6469 remove_visited_stmt_chain (rhs1
);
6471 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6473 fprintf (dump_file
, " into ");
6474 print_gimple_stmt (dump_file
, stmt
, 0);
6478 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6481 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
6482 tree rhs1
, tree rhs2
)
6484 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6486 fprintf (dump_file
, "Transforming ");
6487 print_gimple_stmt (dump_file
, stmt
, 0);
6490 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
6491 update_stmt (gsi_stmt (*gsi
));
6492 remove_visited_stmt_chain (rhs1
);
6494 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6496 fprintf (dump_file
, " into ");
6497 print_gimple_stmt (dump_file
, stmt
, 0);
6501 /* Reassociate expressions in basic block BB and its post-dominator as
6504 Bubble up return status from maybe_optimize_range_tests. */
6507 reassociate_bb (basic_block bb
)
6509 gimple_stmt_iterator gsi
;
6511 gimple
*stmt
= last_stmt (bb
);
6512 bool cfg_cleanup_needed
= false;
6514 if (stmt
&& !gimple_visited_p (stmt
))
6515 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
6517 bool do_prev
= false;
6518 for (gsi
= gsi_last_bb (bb
);
6519 !gsi_end_p (gsi
); do_prev
? gsi_prev (&gsi
) : (void) 0)
6522 stmt
= gsi_stmt (gsi
);
6524 if (is_gimple_assign (stmt
)
6525 && !stmt_could_throw_p (cfun
, stmt
))
6527 tree lhs
, rhs1
, rhs2
;
6528 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
6530 /* If this was part of an already processed statement,
6531 we don't need to touch it again. */
6532 if (gimple_visited_p (stmt
))
6534 /* This statement might have become dead because of previous
6536 if (has_zero_uses (gimple_get_lhs (stmt
)))
6538 reassoc_remove_stmt (&gsi
);
6539 release_defs (stmt
);
6540 /* We might end up removing the last stmt above which
6541 places the iterator to the end of the sequence.
6542 Reset it to the last stmt in this case and make sure
6543 we don't do gsi_prev in that case. */
6544 if (gsi_end_p (gsi
))
6546 gsi
= gsi_last_bb (bb
);
6553 /* If this is not a gimple binary expression, there is
6554 nothing for us to do with it. */
6555 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
6558 lhs
= gimple_assign_lhs (stmt
);
6559 rhs1
= gimple_assign_rhs1 (stmt
);
6560 rhs2
= gimple_assign_rhs2 (stmt
);
6562 /* For non-bit or min/max operations we can't associate
6563 all types. Verify that here. */
6564 if ((rhs_code
!= BIT_IOR_EXPR
6565 && rhs_code
!= BIT_AND_EXPR
6566 && rhs_code
!= BIT_XOR_EXPR
6567 && rhs_code
!= MIN_EXPR
6568 && rhs_code
!= MAX_EXPR
6569 && !can_reassociate_type_p (TREE_TYPE (lhs
)))
6570 || !can_reassociate_op_p (rhs1
)
6571 || !can_reassociate_op_p (rhs2
))
6574 if (associative_tree_code (rhs_code
))
6576 auto_vec
<operand_entry
*> ops
;
6577 tree powi_result
= NULL_TREE
;
6578 bool is_vector
= VECTOR_TYPE_P (TREE_TYPE (lhs
));
6580 /* There may be no immediate uses left by the time we
6581 get here because we may have eliminated them all. */
6582 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
6585 gimple_set_visited (stmt
, true);
6586 linearize_expr_tree (&ops
, stmt
, true, true);
6587 ops
.qsort (sort_by_operand_rank
);
6588 int orig_len
= ops
.length ();
6589 optimize_ops_list (rhs_code
, &ops
);
6590 if (undistribute_ops_list (rhs_code
, &ops
,
6591 loop_containing_stmt (stmt
)))
6593 ops
.qsort (sort_by_operand_rank
);
6594 optimize_ops_list (rhs_code
, &ops
);
6596 if (undistribute_bitref_for_vector (rhs_code
, &ops
,
6597 loop_containing_stmt (stmt
)))
6599 ops
.qsort (sort_by_operand_rank
);
6600 optimize_ops_list (rhs_code
, &ops
);
6602 if (rhs_code
== PLUS_EXPR
6603 && transform_add_to_multiply (&ops
))
6604 ops
.qsort (sort_by_operand_rank
);
6606 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
6609 optimize_vec_cond_expr (rhs_code
, &ops
);
6611 optimize_range_tests (rhs_code
, &ops
, NULL
);
6614 if (rhs_code
== MULT_EXPR
&& !is_vector
)
6616 attempt_builtin_copysign (&ops
);
6618 if (reassoc_insert_powi_p
6619 && (flag_unsafe_math_optimizations
6620 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))))
6621 powi_result
= attempt_builtin_powi (stmt
, &ops
);
6624 operand_entry
*last
;
6625 bool negate_result
= false;
6626 if (ops
.length () > 1
6627 && rhs_code
== MULT_EXPR
)
6630 if ((integer_minus_onep (last
->op
)
6631 || real_minus_onep (last
->op
))
6632 && !HONOR_SNANS (TREE_TYPE (lhs
))
6633 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs
))
6634 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs
))))
6637 negate_result
= true;
6642 /* If the operand vector is now empty, all operands were
6643 consumed by the __builtin_powi optimization. */
6644 if (ops
.length () == 0)
6645 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
6646 else if (ops
.length () == 1)
6648 tree last_op
= ops
.last ()->op
;
6650 /* If the stmt that defines operand has to be inserted, insert it
6652 if (ops
.last ()->stmt_to_insert
)
6653 insert_stmt_before_use (stmt
, ops
.last ()->stmt_to_insert
);
6655 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
6658 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
6662 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
6663 int ops_num
= ops
.length ();
6666 /* For binary bit operations, if there are at least 3
6667 operands and the last operand in OPS is a constant,
6668 move it to the front. This helps ensure that we generate
6669 (X & Y) & C rather than (X & C) & Y. The former will
6670 often match a canonical bit test when we get to RTL. */
6671 if (ops
.length () > 2
6672 && (rhs_code
== BIT_AND_EXPR
6673 || rhs_code
== BIT_IOR_EXPR
6674 || rhs_code
== BIT_XOR_EXPR
)
6675 && TREE_CODE (ops
.last ()->op
) == INTEGER_CST
)
6676 std::swap (*ops
[0], *ops
[ops_num
- 1]);
6678 /* Only rewrite the expression tree to parallel in the
6679 last reassoc pass to avoid useless work back-and-forth
6680 with initial linearization. */
6681 if (!reassoc_insert_powi_p
6682 && ops
.length () > 3
6683 && (width
= get_reassociation_width (ops_num
, rhs_code
,
6686 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6688 "Width = %d was chosen for reassociation\n",
6690 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
6695 /* When there are three operands left, we want
6696 to make sure the ones that get the double
6697 binary op are chosen wisely. */
6698 int len
= ops
.length ();
6700 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
6702 new_lhs
= rewrite_expr_tree (stmt
, rhs_code
, 0, ops
,
6708 /* If we combined some repeated factors into a
6709 __builtin_powi call, multiply that result by the
6710 reassociated operands. */
6713 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
6714 tree type
= TREE_TYPE (lhs
);
6715 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
6717 gimple_set_lhs (lhs_stmt
, target_ssa
);
6718 update_stmt (lhs_stmt
);
6721 target_ssa
= new_lhs
;
6724 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
6725 powi_result
, target_ssa
);
6726 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6727 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6728 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
6734 stmt
= SSA_NAME_DEF_STMT (lhs
);
6735 tree tmp
= make_ssa_name (TREE_TYPE (lhs
));
6736 gimple_set_lhs (stmt
, tmp
);
6739 gassign
*neg_stmt
= gimple_build_assign (lhs
, NEGATE_EXPR
,
6741 gimple_set_uid (neg_stmt
, gimple_uid (stmt
));
6742 gsi_insert_after (&gsi
, neg_stmt
, GSI_NEW_STMT
);
6748 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
6750 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
6751 cfg_cleanup_needed
|= reassociate_bb (son
);
6753 return cfg_cleanup_needed
;
6756 /* Add jumps around shifts for range tests turned into bit tests.
6757 For each SSA_NAME VAR we have code like:
6758 VAR = ...; // final stmt of range comparison
6759 // bit test here...;
6760 OTHERVAR = ...; // final stmt of the bit test sequence
6761 RES = VAR | OTHERVAR;
6762 Turn the above into:
6769 // bit test here...;
6772 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6780 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
6782 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
6785 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
6787 && is_gimple_assign (use_stmt
)
6788 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
6789 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
6791 basic_block cond_bb
= gimple_bb (def_stmt
);
6792 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
6793 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
6795 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6796 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
6797 build_zero_cst (TREE_TYPE (var
)),
6798 NULL_TREE
, NULL_TREE
);
6799 location_t loc
= gimple_location (use_stmt
);
6800 gimple_set_location (g
, loc
);
6801 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
6803 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
6804 etrue
->probability
= profile_probability::even ();
6805 edge efalse
= find_edge (cond_bb
, then_bb
);
6806 efalse
->flags
= EDGE_FALSE_VALUE
;
6807 efalse
->probability
-= etrue
->probability
;
6808 then_bb
->count
-= etrue
->count ();
6810 tree othervar
= NULL_TREE
;
6811 if (gimple_assign_rhs1 (use_stmt
) == var
)
6812 othervar
= gimple_assign_rhs2 (use_stmt
);
6813 else if (gimple_assign_rhs2 (use_stmt
) == var
)
6814 othervar
= gimple_assign_rhs1 (use_stmt
);
6817 tree lhs
= gimple_assign_lhs (use_stmt
);
6818 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
6819 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
6820 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
6821 gsi
= gsi_for_stmt (use_stmt
);
6822 gsi_remove (&gsi
, true);
6824 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
6825 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
6827 reassoc_branch_fixups
.release ();
6830 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
6831 void debug_ops_vector (vec
<operand_entry
*> ops
);
6833 /* Dump the operand entry vector OPS to FILE. */
6836 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
6841 FOR_EACH_VEC_ELT (ops
, i
, oe
)
6843 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
6844 print_generic_expr (file
, oe
->op
);
6845 fprintf (file
, "\n");
6849 /* Dump the operand entry vector OPS to STDERR. */
6852 debug_ops_vector (vec
<operand_entry
*> ops
)
6854 dump_ops_vector (stderr
, ops
);
6857 /* Bubble up return status from reassociate_bb. */
6862 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
6863 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
6866 /* Initialize the reassociation pass. */
6873 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
6875 /* Find the loops, so that we can prevent moving calculations in
6877 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
6879 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
6881 next_operand_entry_id
= 0;
6883 /* Reverse RPO (Reverse Post Order) will give us something where
6884 deeper loops come later. */
6885 pre_and_rev_post_order_compute (NULL
, bbs
, false);
6886 bb_rank
= XCNEWVEC (int64_t, last_basic_block_for_fn (cfun
));
6887 operand_rank
= new hash_map
<tree
, int64_t>;
6889 /* Give each default definition a distinct rank. This includes
6890 parameters and the static chain. Walk backwards over all
6891 SSA names so that we get proper rank ordering according
6892 to tree_swap_operands_p. */
6893 for (i
= num_ssa_names
- 1; i
> 0; --i
)
6895 tree name
= ssa_name (i
);
6896 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
6897 insert_operand_rank (name
, ++rank
);
6900 /* Set up rank for each BB */
6901 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
6902 bb_rank
[bbs
[i
]] = ++rank
<< 16;
6905 calculate_dominance_info (CDI_POST_DOMINATORS
);
6906 plus_negates
= vNULL
;
6909 /* Cleanup after the reassociation pass, and print stats if
6915 statistics_counter_event (cfun
, "Linearized",
6916 reassociate_stats
.linearized
);
6917 statistics_counter_event (cfun
, "Constants eliminated",
6918 reassociate_stats
.constants_eliminated
);
6919 statistics_counter_event (cfun
, "Ops eliminated",
6920 reassociate_stats
.ops_eliminated
);
6921 statistics_counter_event (cfun
, "Statements rewritten",
6922 reassociate_stats
.rewritten
);
6923 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
6924 reassociate_stats
.pows_encountered
);
6925 statistics_counter_event (cfun
, "Built-in powi calls created",
6926 reassociate_stats
.pows_created
);
6928 delete operand_rank
;
6929 operand_entry_pool
.release ();
6931 plus_negates
.release ();
6932 free_dominance_info (CDI_POST_DOMINATORS
);
6933 loop_optimizer_finalize ();
6936 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6937 insertion of __builtin_powi calls.
6939 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6940 optimization of a gimple conditional. Otherwise returns zero. */
6943 execute_reassoc (bool insert_powi_p
)
6945 reassoc_insert_powi_p
= insert_powi_p
;
6949 bool cfg_cleanup_needed
;
6950 cfg_cleanup_needed
= do_reassoc ();
6951 repropagate_negates ();
6955 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
6960 const pass_data pass_data_reassoc
=
6962 GIMPLE_PASS
, /* type */
6963 "reassoc", /* name */
6964 OPTGROUP_NONE
, /* optinfo_flags */
6965 TV_TREE_REASSOC
, /* tv_id */
6966 ( PROP_cfg
| PROP_ssa
), /* properties_required */
6967 0, /* properties_provided */
6968 0, /* properties_destroyed */
6969 0, /* todo_flags_start */
6970 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
6973 class pass_reassoc
: public gimple_opt_pass
6976 pass_reassoc (gcc::context
*ctxt
)
6977 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
6980 /* opt_pass methods: */
6981 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
6982 void set_pass_param (unsigned int n
, bool param
)
6984 gcc_assert (n
== 0);
6985 insert_powi_p
= param
;
6987 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
6988 virtual unsigned int execute (function
*)
6989 { return execute_reassoc (insert_powi_p
); }
6992 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6993 point 3a in the pass header comment. */
6995 }; // class pass_reassoc
7000 make_pass_reassoc (gcc::context
*ctxt
)
7002 return new pass_reassoc (ctxt
);