1 /* Reassociation for trees.
2 Copyright (C) 2005-2019 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "gimple-fold.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
46 #include "tree-ssa-loop.h"
49 #include "langhooks.h"
54 #include "case-cfn-macros.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
70 3. Optimization of the operand lists, eliminating things like a +
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
107 You want to first merge all leaves of the same rank, as much as
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
161 newstmt = mergetmp + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p
;
185 int constants_eliminated
;
188 int pows_encountered
;
192 /* Operator, rank pair. */
199 gimple
*stmt_to_insert
;
202 static object_allocator
<operand_entry
> operand_entry_pool
203 ("operand entry pool");
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static unsigned int next_operand_entry_id
;
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
212 static long *bb_rank
;
214 /* Operand->rank hashtable. */
215 static hash_map
<tree
, long> *operand_rank
;
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec
<tree
> reassoc_branch_fixups
;
224 static long get_rank (tree
);
225 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
231 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
233 gimple
*stmt
= gsi_stmt (*gsi
);
235 if (!MAY_HAVE_DEBUG_BIND_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
236 return gsi_remove (gsi
, true);
238 gimple_stmt_iterator prev
= *gsi
;
240 unsigned uid
= gimple_uid (stmt
);
241 basic_block bb
= gimple_bb (stmt
);
242 bool ret
= gsi_remove (gsi
, true);
243 if (!gsi_end_p (prev
))
246 prev
= gsi_start_bb (bb
);
247 gimple
*end_stmt
= gsi_stmt (*gsi
);
248 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
250 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
251 gimple_set_uid (stmt
, uid
);
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
270 phi_rank (gimple
*stmt
)
272 basic_block bb
= gimple_bb (stmt
);
273 struct loop
*father
= bb
->loop_father
;
279 /* We only care about real loops (those with a latch). */
281 return bb_rank
[bb
->index
];
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb
!= father
->header
286 return bb_rank
[bb
->index
];
288 /* Ignore virtual SSA_NAMEs. */
289 res
= gimple_phi_result (stmt
);
290 if (virtual_operand_p (res
))
291 return bb_rank
[bb
->index
];
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res
, &use
, &use_stmt
)
296 || gimple_bb (use_stmt
)->loop_father
!= father
)
297 return bb_rank
[bb
->index
];
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
302 tree arg
= gimple_phi_arg_def (stmt
, i
);
303 if (TREE_CODE (arg
) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
306 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
307 if (gimple_bb (def_stmt
)->loop_father
== father
)
308 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
312 /* Must be an uninteresting phi. */
313 return bb_rank
[bb
->index
];
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
320 loop_carried_phi (tree exp
)
325 if (TREE_CODE (exp
) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp
))
329 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
331 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
339 if (phi_rank (phi_stmt
) != block_rank
)
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
350 propagate_rank (long rank
, tree op
)
354 if (loop_carried_phi (op
))
357 op_rank
= get_rank (op
);
359 return MAX (rank
, op_rank
);
362 /* Look up the operand rank structure for expression E. */
365 find_operand_rank (tree e
)
367 long *slot
= operand_rank
->get (e
);
368 return slot
? *slot
: -1;
371 /* Insert {E,RANK} into the operand rank hashtable. */
374 insert_operand_rank (tree e
, long rank
)
376 gcc_assert (rank
> 0);
377 gcc_assert (!operand_rank
->put (e
, rank
));
380 /* Given an expression E, return the rank of the expression. */
385 /* SSA_NAME's have the rank of the expression they are the result
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
422 if (TREE_CODE (e
) == SSA_NAME
)
429 if (SSA_NAME_IS_DEFAULT_DEF (e
))
430 return find_operand_rank (e
);
432 stmt
= SSA_NAME_DEF_STMT (e
);
433 if (gimple_code (stmt
) == GIMPLE_PHI
)
434 return phi_rank (stmt
);
436 if (!is_gimple_assign (stmt
))
437 return bb_rank
[gimple_bb (stmt
)->index
];
439 /* If we already have a rank for this expression, use that. */
440 rank
= find_operand_rank (e
);
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
451 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
452 rank
= propagate_rank (rank
, op
);
454 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
456 fprintf (dump_file
, "Rank for ");
457 print_generic_expr (dump_file
, e
);
458 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e
, (rank
+ 1));
466 /* Constants, globals, etc., are rank 0 */
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 4
474 #define FLOAT_ONE_CONST_TYPE 1 << 3
475 #define FLOAT_CONST_TYPE 1 << 2
476 #define OTHER_CONST_TYPE 1 << 1
478 /* Classify an invariant tree into integer, float, or other, so that
479 we can sort them to be near other constants of the same type. */
481 constant_type (tree t
)
483 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
484 return INTEGER_CONST_TYPE
;
485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
487 /* Sort -1.0 and 1.0 constants last, while in some cases
488 const_binop can't optimize some inexact operations, multiplication
489 by -1.0 or 1.0 can be always merged with others. */
490 if (real_onep (t
) || real_minus_onep (t
))
491 return FLOAT_ONE_CONST_TYPE
;
492 return FLOAT_CONST_TYPE
;
495 return OTHER_CONST_TYPE
;
498 /* qsort comparison function to sort operand entries PA and PB by rank
499 so that the sorted array is ordered by rank in decreasing order. */
501 sort_by_operand_rank (const void *pa
, const void *pb
)
503 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
504 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
506 if (oeb
->rank
!= oea
->rank
)
507 return oeb
->rank
> oea
->rank
? 1 : -1;
509 /* It's nicer for optimize_expression if constants that are likely
510 to fold when added/multiplied/whatever are put next to each
511 other. Since all constants have rank 0, order them by type. */
514 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
515 return constant_type (oea
->op
) - constant_type (oeb
->op
);
517 /* To make sorting result stable, we use unique IDs to determine
519 return oeb
->id
> oea
->id
? 1 : -1;
522 if (TREE_CODE (oea
->op
) != SSA_NAME
)
524 if (TREE_CODE (oeb
->op
) != SSA_NAME
)
525 return oeb
->id
> oea
->id
? 1 : -1;
529 else if (TREE_CODE (oeb
->op
) != SSA_NAME
)
532 /* Lastly, make sure the versions that are the same go next to each
534 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
536 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
537 versions of removed SSA_NAMEs, so if possible, prefer to sort
538 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
540 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
541 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
542 basic_block bba
= gimple_bb (stmta
);
543 basic_block bbb
= gimple_bb (stmtb
);
546 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
547 but the other might not. */
552 /* If neither is, compare bb_rank. */
553 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
554 return (bb_rank
[bbb
->index
] >> 16) - (bb_rank
[bba
->index
] >> 16);
557 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
558 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
562 return SSA_NAME_VERSION (oeb
->op
) > SSA_NAME_VERSION (oea
->op
) ? 1 : -1;
565 return oeb
->id
> oea
->id
? 1 : -1;
568 /* Add an operand entry to *OPS for the tree operand OP. */
571 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
, gimple
*stmt_to_insert
= NULL
)
573 operand_entry
*oe
= operand_entry_pool
.allocate ();
576 oe
->rank
= get_rank (op
);
577 oe
->id
= next_operand_entry_id
++;
579 oe
->stmt_to_insert
= stmt_to_insert
;
583 /* Add an operand entry to *OPS for the tree operand OP with repeat
587 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
588 HOST_WIDE_INT repeat
)
590 operand_entry
*oe
= operand_entry_pool
.allocate ();
593 oe
->rank
= get_rank (op
);
594 oe
->id
= next_operand_entry_id
++;
596 oe
->stmt_to_insert
= NULL
;
599 reassociate_stats
.pows_encountered
++;
602 /* Return true if STMT is reassociable operation containing a binary
603 operation with tree code CODE, and is inside LOOP. */
606 is_reassociable_op (gimple
*stmt
, enum tree_code code
, struct loop
*loop
)
608 basic_block bb
= gimple_bb (stmt
);
610 if (gimple_bb (stmt
) == NULL
)
613 if (!flow_bb_inside_loop_p (loop
, bb
))
616 if (is_gimple_assign (stmt
)
617 && gimple_assign_rhs_code (stmt
) == code
618 && has_single_use (gimple_assign_lhs (stmt
)))
620 tree rhs1
= gimple_assign_rhs1 (stmt
);
621 tree rhs2
= gimple_assign_rhs2 (stmt
);
622 if (TREE_CODE (rhs1
) == SSA_NAME
623 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1
))
626 && TREE_CODE (rhs2
) == SSA_NAME
627 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2
))
636 /* Return true if STMT is a nop-conversion. */
639 gimple_nop_conversion_p (gimple
*stmt
)
641 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
643 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
644 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
645 TREE_TYPE (gimple_assign_rhs1 (ass
))))
651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
652 operand of the negate operation. Otherwise, return NULL. */
655 get_unary_op (tree name
, enum tree_code opcode
)
657 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
659 /* Look through nop conversions (sign changes). */
660 if (gimple_nop_conversion_p (stmt
)
661 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
662 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
664 if (!is_gimple_assign (stmt
))
667 if (gimple_assign_rhs_code (stmt
) == opcode
)
668 return gimple_assign_rhs1 (stmt
);
672 /* Return true if OP1 and OP2 have the same value if casted to either type. */
675 ops_equal_values_p (tree op1
, tree op2
)
681 if (TREE_CODE (op1
) == SSA_NAME
)
683 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
684 if (gimple_nop_conversion_p (stmt
))
686 op1
= gimple_assign_rhs1 (stmt
);
692 if (TREE_CODE (op2
) == SSA_NAME
)
694 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
695 if (gimple_nop_conversion_p (stmt
))
697 op2
= gimple_assign_rhs1 (stmt
);
708 /* If CURR and LAST are a pair of ops that OPCODE allows us to
709 eliminate through equivalences, do so, remove them from OPS, and
710 return true. Otherwise, return false. */
713 eliminate_duplicate_pair (enum tree_code opcode
,
714 vec
<operand_entry
*> *ops
,
721 /* If we have two of the same op, and the opcode is & |, min, or max,
722 we can eliminate one of them.
723 If we have two of the same op, and the opcode is ^, we can
724 eliminate both of them. */
726 if (last
&& last
->op
== curr
->op
)
734 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
736 fprintf (dump_file
, "Equivalence: ");
737 print_generic_expr (dump_file
, curr
->op
);
738 fprintf (dump_file
, " [&|minmax] ");
739 print_generic_expr (dump_file
, last
->op
);
740 fprintf (dump_file
, " -> ");
741 print_generic_stmt (dump_file
, last
->op
);
744 ops
->ordered_remove (i
);
745 reassociate_stats
.ops_eliminated
++;
750 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
752 fprintf (dump_file
, "Equivalence: ");
753 print_generic_expr (dump_file
, curr
->op
);
754 fprintf (dump_file
, " ^ ");
755 print_generic_expr (dump_file
, last
->op
);
756 fprintf (dump_file
, " -> nothing\n");
759 reassociate_stats
.ops_eliminated
+= 2;
761 if (ops
->length () == 2)
764 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
769 ops
->ordered_remove (i
-1);
770 ops
->ordered_remove (i
-1);
782 static vec
<tree
> plus_negates
;
784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
785 expression, look in OPS for a corresponding positive operation to cancel
786 it out. If we find one, remove the other from OPS, replace
787 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
791 eliminate_plus_minus_pair (enum tree_code opcode
,
792 vec
<operand_entry
*> *ops
,
793 unsigned int currindex
,
801 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
804 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
805 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
806 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
809 /* Any non-negated version will have a rank that is one less than
810 the current rank. So once we hit those ranks, if we don't find
813 for (i
= currindex
+ 1;
814 ops
->iterate (i
, &oe
)
815 && oe
->rank
>= curr
->rank
- 1 ;
819 && ops_equal_values_p (oe
->op
, negateop
))
821 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
823 fprintf (dump_file
, "Equivalence: ");
824 print_generic_expr (dump_file
, negateop
);
825 fprintf (dump_file
, " + -");
826 print_generic_expr (dump_file
, oe
->op
);
827 fprintf (dump_file
, " -> 0\n");
830 ops
->ordered_remove (i
);
831 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
832 ops
->ordered_remove (currindex
);
833 reassociate_stats
.ops_eliminated
++;
838 && ops_equal_values_p (oe
->op
, notop
))
840 tree op_type
= TREE_TYPE (oe
->op
);
842 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
844 fprintf (dump_file
, "Equivalence: ");
845 print_generic_expr (dump_file
, notop
);
846 fprintf (dump_file
, " + ~");
847 print_generic_expr (dump_file
, oe
->op
);
848 fprintf (dump_file
, " -> -1\n");
851 ops
->ordered_remove (i
);
852 add_to_ops_vec (ops
, build_all_ones_cst (op_type
));
853 ops
->ordered_remove (currindex
);
854 reassociate_stats
.ops_eliminated
++;
860 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
861 save it for later inspection in repropagate_negates(). */
862 if (negateop
!= NULL_TREE
863 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
864 plus_negates
.safe_push (curr
->op
);
869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
870 bitwise not expression, look in OPS for a corresponding operand to
871 cancel it out. If we find one, remove the other from OPS, replace
872 OPS[CURRINDEX] with 0, and return true. Otherwise, return
876 eliminate_not_pairs (enum tree_code opcode
,
877 vec
<operand_entry
*> *ops
,
878 unsigned int currindex
,
885 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
886 || TREE_CODE (curr
->op
) != SSA_NAME
)
889 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
890 if (notop
== NULL_TREE
)
893 /* Any non-not version will have a rank that is one less than
894 the current rank. So once we hit those ranks, if we don't find
897 for (i
= currindex
+ 1;
898 ops
->iterate (i
, &oe
)
899 && oe
->rank
>= curr
->rank
- 1;
904 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
906 fprintf (dump_file
, "Equivalence: ");
907 print_generic_expr (dump_file
, notop
);
908 if (opcode
== BIT_AND_EXPR
)
909 fprintf (dump_file
, " & ~");
910 else if (opcode
== BIT_IOR_EXPR
)
911 fprintf (dump_file
, " | ~");
912 print_generic_expr (dump_file
, oe
->op
);
913 if (opcode
== BIT_AND_EXPR
)
914 fprintf (dump_file
, " -> 0\n");
915 else if (opcode
== BIT_IOR_EXPR
)
916 fprintf (dump_file
, " -> -1\n");
919 if (opcode
== BIT_AND_EXPR
)
920 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
921 else if (opcode
== BIT_IOR_EXPR
)
922 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
924 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
926 ops
->quick_push (oe
);
934 /* Use constant value that may be present in OPS to try to eliminate
935 operands. Note that this function is only really used when we've
936 eliminated ops for other reasons, or merged constants. Across
937 single statements, fold already does all of this, plus more. There
938 is little point in duplicating logic, so I've only included the
939 identities that I could ever construct testcases to trigger. */
942 eliminate_using_constants (enum tree_code opcode
,
943 vec
<operand_entry
*> *ops
)
945 operand_entry
*oelast
= ops
->last ();
946 tree type
= TREE_TYPE (oelast
->op
);
948 if (oelast
->rank
== 0
949 && (ANY_INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
954 if (integer_zerop (oelast
->op
))
956 if (ops
->length () != 1)
958 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
959 fprintf (dump_file
, "Found & 0, removing all other ops\n");
961 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
964 ops
->quick_push (oelast
);
968 else if (integer_all_onesp (oelast
->op
))
970 if (ops
->length () != 1)
972 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
973 fprintf (dump_file
, "Found & -1, removing\n");
975 reassociate_stats
.ops_eliminated
++;
980 if (integer_all_onesp (oelast
->op
))
982 if (ops
->length () != 1)
984 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
985 fprintf (dump_file
, "Found | -1, removing all other ops\n");
987 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
990 ops
->quick_push (oelast
);
994 else if (integer_zerop (oelast
->op
))
996 if (ops
->length () != 1)
998 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
999 fprintf (dump_file
, "Found | 0, removing\n");
1001 reassociate_stats
.ops_eliminated
++;
1006 if (integer_zerop (oelast
->op
)
1007 || (FLOAT_TYPE_P (type
)
1008 && !HONOR_NANS (type
)
1009 && !HONOR_SIGNED_ZEROS (type
)
1010 && real_zerop (oelast
->op
)))
1012 if (ops
->length () != 1)
1014 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1015 fprintf (dump_file
, "Found * 0, removing all other ops\n");
1017 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1019 ops
->quick_push (oelast
);
1023 else if (integer_onep (oelast
->op
)
1024 || (FLOAT_TYPE_P (type
)
1025 && !HONOR_SNANS (type
)
1026 && real_onep (oelast
->op
)))
1028 if (ops
->length () != 1)
1030 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1031 fprintf (dump_file
, "Found * 1, removing\n");
1033 reassociate_stats
.ops_eliminated
++;
1041 if (integer_zerop (oelast
->op
)
1042 || (FLOAT_TYPE_P (type
)
1043 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1044 && fold_real_zero_addition_p (type
, oelast
->op
,
1045 opcode
== MINUS_EXPR
)))
1047 if (ops
->length () != 1)
1049 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1050 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1052 reassociate_stats
.ops_eliminated
++;
1064 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1067 /* Structure for tracking and counting operands. */
1071 enum tree_code oecode
;
1076 /* The heap for the oecount hashtable and the sorted list of operands. */
1077 static vec
<oecount
> cvec
;
1080 /* Oecount hashtable helpers. */
1082 struct oecount_hasher
: int_hash
<int, 0, 1>
1084 static inline hashval_t
hash (int);
1085 static inline bool equal (int, int);
1088 /* Hash function for oecount. */
1091 oecount_hasher::hash (int p
)
1093 const oecount
*c
= &cvec
[p
- 42];
1094 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1097 /* Comparison function for oecount. */
1100 oecount_hasher::equal (int p1
, int p2
)
1102 const oecount
*c1
= &cvec
[p1
- 42];
1103 const oecount
*c2
= &cvec
[p2
- 42];
1104 return c1
->oecode
== c2
->oecode
&& c1
->op
== c2
->op
;
1107 /* Comparison function for qsort sorting oecount elements by count. */
1110 oecount_cmp (const void *p1
, const void *p2
)
1112 const oecount
*c1
= (const oecount
*)p1
;
1113 const oecount
*c2
= (const oecount
*)p2
;
1114 if (c1
->cnt
!= c2
->cnt
)
1115 return c1
->cnt
> c2
->cnt
? 1 : -1;
1117 /* If counts are identical, use unique IDs to stabilize qsort. */
1118 return c1
->id
> c2
->id
? 1 : -1;
1121 /* Return TRUE iff STMT represents a builtin call that raises OP
1122 to some exponent. */
1125 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1127 if (!is_gimple_call (stmt
))
1130 switch (gimple_call_combined_fn (stmt
))
1134 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1142 in place and return the result. Assumes that stmt_is_power_of_op
1143 was previously called for STMT and returned TRUE. */
1145 static HOST_WIDE_INT
1146 decrement_power (gimple
*stmt
)
1148 REAL_VALUE_TYPE c
, cint
;
1149 HOST_WIDE_INT power
;
1152 switch (gimple_call_combined_fn (stmt
))
1155 arg1
= gimple_call_arg (stmt
, 1);
1156 c
= TREE_REAL_CST (arg1
);
1157 power
= real_to_integer (&c
) - 1;
1158 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1159 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1163 arg1
= gimple_call_arg (stmt
, 1);
1164 power
= TREE_INT_CST_LOW (arg1
) - 1;
1165 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1173 /* Replace SSA defined by STMT and replace all its uses with new
1174 SSA. Also return the new SSA. */
1177 make_new_ssa_for_def (gimple
*stmt
, enum tree_code opcode
, tree op
)
1181 imm_use_iterator iter
;
1182 tree new_lhs
, new_debug_lhs
= NULL_TREE
;
1183 tree lhs
= gimple_get_lhs (stmt
);
1185 new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
1186 gimple_set_lhs (stmt
, new_lhs
);
1188 /* Also need to update GIMPLE_DEBUGs. */
1189 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
1191 tree repl
= new_lhs
;
1192 if (is_gimple_debug (use_stmt
))
1194 if (new_debug_lhs
== NULL_TREE
)
1196 new_debug_lhs
= make_node (DEBUG_EXPR_DECL
);
1198 = gimple_build_debug_bind (new_debug_lhs
,
1199 build2 (opcode
, TREE_TYPE (lhs
),
1202 DECL_ARTIFICIAL (new_debug_lhs
) = 1;
1203 TREE_TYPE (new_debug_lhs
) = TREE_TYPE (lhs
);
1204 SET_DECL_MODE (new_debug_lhs
, TYPE_MODE (TREE_TYPE (lhs
)));
1205 gimple_set_uid (def_temp
, gimple_uid (stmt
));
1206 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1207 gsi_insert_after (&gsi
, def_temp
, GSI_SAME_STMT
);
1209 repl
= new_debug_lhs
;
1211 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1212 SET_USE (use
, repl
);
1213 update_stmt (use_stmt
);
1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1219 uses with new SSAs. Also do this for the stmt that defines DEF
1220 if *DEF is not OP. */
1223 make_new_ssa_for_all_defs (tree
*def
, enum tree_code opcode
, tree op
,
1224 vec
<gimple
*> &stmts_to_fix
)
1230 && TREE_CODE (*def
) == SSA_NAME
1231 && (stmt
= SSA_NAME_DEF_STMT (*def
))
1232 && gimple_code (stmt
) != GIMPLE_NOP
)
1233 *def
= make_new_ssa_for_def (stmt
, opcode
, op
);
1235 FOR_EACH_VEC_ELT (stmts_to_fix
, i
, stmt
)
1236 make_new_ssa_for_def (stmt
, opcode
, op
);
1239 /* Find the single immediate use of STMT's LHS, and replace it
1240 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1241 replace *DEF with OP as well. */
1244 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1249 gimple_stmt_iterator gsi
;
1251 if (is_gimple_call (stmt
))
1252 lhs
= gimple_call_lhs (stmt
);
1254 lhs
= gimple_assign_lhs (stmt
);
1256 gcc_assert (has_single_use (lhs
));
1257 single_imm_use (lhs
, &use
, &use_stmt
);
1261 if (TREE_CODE (op
) != SSA_NAME
)
1262 update_stmt (use_stmt
);
1263 gsi
= gsi_for_stmt (stmt
);
1264 unlink_stmt_vdef (stmt
);
1265 reassoc_remove_stmt (&gsi
);
1266 release_defs (stmt
);
1269 /* Walks the linear chain with result *DEF searching for an operation
1270 with operand OP and code OPCODE removing that from the chain. *DEF
1271 is updated if there is only one operand but no operation left. */
1274 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1276 tree orig_def
= *def
;
1277 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1278 /* PR72835 - Record the stmt chain that has to be updated such that
1279 we dont use the same LHS when the values computed are different. */
1280 auto_vec
<gimple
*, 64> stmts_to_fix
;
1286 if (opcode
== MULT_EXPR
)
1288 if (stmt_is_power_of_op (stmt
, op
))
1290 if (decrement_power (stmt
) == 1)
1292 if (stmts_to_fix
.length () > 0)
1293 stmts_to_fix
.pop ();
1294 propagate_op_to_single_use (op
, stmt
, def
);
1298 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
)
1300 if (gimple_assign_rhs1 (stmt
) == op
)
1302 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1303 if (stmts_to_fix
.length () > 0)
1304 stmts_to_fix
.pop ();
1305 propagate_op_to_single_use (cst
, stmt
, def
);
1308 else if (integer_minus_onep (op
)
1309 || real_minus_onep (op
))
1311 gimple_assign_set_rhs_code
1312 (stmt
, TREE_CODE (gimple_assign_rhs1 (stmt
)));
1318 name
= gimple_assign_rhs1 (stmt
);
1320 /* If this is the operation we look for and one of the operands
1321 is ours simply propagate the other operand into the stmts
1323 if (gimple_assign_rhs_code (stmt
) == opcode
1325 || gimple_assign_rhs2 (stmt
) == op
))
1328 name
= gimple_assign_rhs2 (stmt
);
1329 if (stmts_to_fix
.length () > 0)
1330 stmts_to_fix
.pop ();
1331 propagate_op_to_single_use (name
, stmt
, def
);
1335 /* We might have a multiply of two __builtin_pow* calls, and
1336 the operand might be hiding in the rightmost one. Likewise
1337 this can happen for a negate. */
1338 if (opcode
== MULT_EXPR
1339 && gimple_assign_rhs_code (stmt
) == opcode
1340 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1341 && has_single_use (gimple_assign_rhs2 (stmt
)))
1343 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1344 if (stmt_is_power_of_op (stmt2
, op
))
1346 if (decrement_power (stmt2
) == 1)
1347 propagate_op_to_single_use (op
, stmt2
, def
);
1349 stmts_to_fix
.safe_push (stmt2
);
1352 else if (is_gimple_assign (stmt2
)
1353 && gimple_assign_rhs_code (stmt2
) == NEGATE_EXPR
)
1355 if (gimple_assign_rhs1 (stmt2
) == op
)
1357 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1358 propagate_op_to_single_use (cst
, stmt2
, def
);
1361 else if (integer_minus_onep (op
)
1362 || real_minus_onep (op
))
1364 stmts_to_fix
.safe_push (stmt2
);
1365 gimple_assign_set_rhs_code
1366 (stmt2
, TREE_CODE (gimple_assign_rhs1 (stmt2
)));
1372 /* Continue walking the chain. */
1373 gcc_assert (name
!= op
1374 && TREE_CODE (name
) == SSA_NAME
);
1375 stmt
= SSA_NAME_DEF_STMT (name
);
1376 stmts_to_fix
.safe_push (stmt
);
1380 if (stmts_to_fix
.length () > 0 || *def
== orig_def
)
1381 make_new_ssa_for_all_defs (def
, opcode
, op
, stmts_to_fix
);
1384 /* Returns true if statement S1 dominates statement S2. Like
1385 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1388 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1390 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1392 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1393 SSA_NAME. Assume it lives at the beginning of function and
1394 thus dominates everything. */
1395 if (!bb1
|| s1
== s2
)
1398 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1404 /* PHIs in the same basic block are assumed to be
1405 executed all in parallel, if only one stmt is a PHI,
1406 it dominates the other stmt in the same basic block. */
1407 if (gimple_code (s1
) == GIMPLE_PHI
)
1410 if (gimple_code (s2
) == GIMPLE_PHI
)
1413 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1415 if (gimple_uid (s1
) < gimple_uid (s2
))
1418 if (gimple_uid (s1
) > gimple_uid (s2
))
1421 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1422 unsigned int uid
= gimple_uid (s1
);
1423 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1425 gimple
*s
= gsi_stmt (gsi
);
1426 if (gimple_uid (s
) != uid
)
1435 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1438 /* Insert STMT after INSERT_POINT. */
1441 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1443 gimple_stmt_iterator gsi
;
1446 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1447 bb
= gimple_bb (insert_point
);
1448 else if (!stmt_ends_bb_p (insert_point
))
1450 gsi
= gsi_for_stmt (insert_point
);
1451 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1452 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1456 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1457 thus if it must end a basic block, it should be a call that can
1458 throw, or some assignment that can throw. If it throws, the LHS
1459 of it will not be initialized though, so only valid places using
1460 the SSA_NAME should be dominated by the fallthru edge. */
1461 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1462 gsi
= gsi_after_labels (bb
);
1463 if (gsi_end_p (gsi
))
1465 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1466 gimple_set_uid (stmt
,
1467 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1470 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1471 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1475 the result. Places the statement after the definition of either
1476 OP1 or OP2. Returns the new statement. */
1479 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1481 gimple
*op1def
= NULL
, *op2def
= NULL
;
1482 gimple_stmt_iterator gsi
;
1486 /* Create the addition statement. */
1487 op
= make_ssa_name (type
);
1488 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1490 /* Find an insertion place and insert. */
1491 if (TREE_CODE (op1
) == SSA_NAME
)
1492 op1def
= SSA_NAME_DEF_STMT (op1
);
1493 if (TREE_CODE (op2
) == SSA_NAME
)
1494 op2def
= SSA_NAME_DEF_STMT (op2
);
1495 if ((!op1def
|| gimple_nop_p (op1def
))
1496 && (!op2def
|| gimple_nop_p (op2def
)))
1498 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1499 if (gsi_end_p (gsi
))
1501 gimple_stmt_iterator gsi2
1502 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1503 gimple_set_uid (sum
,
1504 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1507 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1508 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1512 gimple
*insert_point
;
1513 if ((!op1def
|| gimple_nop_p (op1def
))
1514 || (op2def
&& !gimple_nop_p (op2def
)
1515 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1516 insert_point
= op2def
;
1518 insert_point
= op1def
;
1519 insert_stmt_after (sum
, insert_point
);
1526 /* Perform un-distribution of divisions and multiplications.
1527 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1528 to (A + B) / X for real X.
1530 The algorithm is organized as follows.
1532 - First we walk the addition chain *OPS looking for summands that
1533 are defined by a multiplication or a real division. This results
1534 in the candidates bitmap with relevant indices into *OPS.
1536 - Second we build the chains of multiplications or divisions for
1537 these candidates, counting the number of occurrences of (operand, code)
1538 pairs in all of the candidates chains.
1540 - Third we sort the (operand, code) pairs by number of occurrence and
1541 process them starting with the pair with the most uses.
1543 * For each such pair we walk the candidates again to build a
1544 second candidate bitmap noting all multiplication/division chains
1545 that have at least one occurrence of (operand, code).
1547 * We build an alternate addition chain only covering these
1548 candidates with one (operand, code) operation removed from their
1549 multiplication/division chain.
1551 * The first candidate gets replaced by the alternate addition chain
1552 multiplied/divided by the operand.
1554 * All candidate chains get disabled for further processing and
1555 processing of (operand, code) pairs continues.
1557 The alternate addition chains built are re-processed by the main
1558 reassociation algorithm which allows optimizing a * x * y + b * y * x
1559 to (a + b ) * x * y in one invocation of the reassociation pass. */
1562 undistribute_ops_list (enum tree_code opcode
,
1563 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1565 unsigned int length
= ops
->length ();
1568 unsigned nr_candidates
, nr_candidates2
;
1569 sbitmap_iterator sbi0
;
1570 vec
<operand_entry
*> *subops
;
1571 bool changed
= false;
1572 unsigned int next_oecount_id
= 0;
1575 || opcode
!= PLUS_EXPR
)
1578 /* Build a list of candidates to process. */
1579 auto_sbitmap
candidates (length
);
1580 bitmap_clear (candidates
);
1582 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1584 enum tree_code dcode
;
1587 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1589 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1590 if (!is_gimple_assign (oe1def
))
1592 dcode
= gimple_assign_rhs_code (oe1def
);
1593 if ((dcode
!= MULT_EXPR
1594 && dcode
!= RDIV_EXPR
)
1595 || !is_reassociable_op (oe1def
, dcode
, loop
))
1598 bitmap_set_bit (candidates
, i
);
1602 if (nr_candidates
< 2)
1605 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1607 fprintf (dump_file
, "searching for un-distribute opportunities ");
1608 print_generic_expr (dump_file
,
1609 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, TDF_NONE
);
1610 fprintf (dump_file
, " %d\n", nr_candidates
);
1613 /* Build linearized sub-operand lists and the counting table. */
1616 hash_table
<oecount_hasher
> ctable (15);
1618 /* ??? Macro arguments cannot have multi-argument template types in
1619 them. This typedef is needed to workaround that limitation. */
1620 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1621 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1622 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1625 enum tree_code oecode
;
1628 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1629 oecode
= gimple_assign_rhs_code (oedef
);
1630 linearize_expr_tree (&subops
[i
], oedef
,
1631 associative_tree_code (oecode
), false);
1633 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1640 c
.id
= next_oecount_id
++;
1643 idx
= cvec
.length () + 41;
1644 slot
= ctable
.find_slot (idx
, INSERT
);
1652 cvec
[*slot
- 42].cnt
++;
1657 /* Sort the counting table. */
1658 cvec
.qsort (oecount_cmp
);
1660 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1663 fprintf (dump_file
, "Candidates:\n");
1664 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1666 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1667 c
->oecode
== MULT_EXPR
1668 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1669 print_generic_expr (dump_file
, c
->op
);
1670 fprintf (dump_file
, "\n");
1674 /* Process the (operand, code) pairs in order of most occurrence. */
1675 auto_sbitmap
candidates2 (length
);
1676 while (!cvec
.is_empty ())
1678 oecount
*c
= &cvec
.last ();
1682 /* Now collect the operands in the outer chain that contain
1683 the common operand in their inner chain. */
1684 bitmap_clear (candidates2
);
1686 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1689 enum tree_code oecode
;
1691 tree op
= (*ops
)[i
]->op
;
1693 /* If we undistributed in this chain already this may be
1695 if (TREE_CODE (op
) != SSA_NAME
)
1698 oedef
= SSA_NAME_DEF_STMT (op
);
1699 oecode
= gimple_assign_rhs_code (oedef
);
1700 if (oecode
!= c
->oecode
)
1703 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1705 if (oe1
->op
== c
->op
)
1707 bitmap_set_bit (candidates2
, i
);
1714 if (nr_candidates2
>= 2)
1716 operand_entry
*oe1
, *oe2
;
1718 int first
= bitmap_first_set_bit (candidates2
);
1720 /* Build the new addition chain. */
1721 oe1
= (*ops
)[first
];
1722 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1724 fprintf (dump_file
, "Building (");
1725 print_generic_expr (dump_file
, oe1
->op
);
1727 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1728 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1732 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1734 fprintf (dump_file
, " + ");
1735 print_generic_expr (dump_file
, oe2
->op
);
1737 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1738 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1739 oe1
->op
, oe2
->op
, opcode
);
1740 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1742 oe1
->op
= gimple_get_lhs (sum
);
1745 /* Apply the multiplication/division. */
1746 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1747 oe1
->op
, c
->op
, c
->oecode
);
1748 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1750 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1751 print_generic_expr (dump_file
, c
->op
);
1752 fprintf (dump_file
, "\n");
1755 /* Record it in the addition chain and disable further
1756 undistribution with this op. */
1757 oe1
->op
= gimple_assign_lhs (prod
);
1758 oe1
->rank
= get_rank (oe1
->op
);
1759 subops
[first
].release ();
1767 for (i
= 0; i
< ops
->length (); ++i
)
1768 subops
[i
].release ();
1775 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1776 expression, examine the other OPS to see if any of them are comparisons
1777 of the same values, which we may be able to combine or eliminate.
1778 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1781 eliminate_redundant_comparison (enum tree_code opcode
,
1782 vec
<operand_entry
*> *ops
,
1783 unsigned int currindex
,
1784 operand_entry
*curr
)
1787 enum tree_code lcode
, rcode
;
1788 gimple
*def1
, *def2
;
1792 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1795 /* Check that CURR is a comparison. */
1796 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1798 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1799 if (!is_gimple_assign (def1
))
1801 lcode
= gimple_assign_rhs_code (def1
);
1802 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1804 op1
= gimple_assign_rhs1 (def1
);
1805 op2
= gimple_assign_rhs2 (def1
);
1807 /* Now look for a similar comparison in the remaining OPS. */
1808 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1812 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1814 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1815 if (!is_gimple_assign (def2
))
1817 rcode
= gimple_assign_rhs_code (def2
);
1818 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1821 /* If we got here, we have a match. See if we can combine the
1823 if (opcode
== BIT_IOR_EXPR
)
1824 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1825 rcode
, gimple_assign_rhs1 (def2
),
1826 gimple_assign_rhs2 (def2
));
1828 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1829 rcode
, gimple_assign_rhs1 (def2
),
1830 gimple_assign_rhs2 (def2
));
1834 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1835 always give us a boolean_type_node value back. If the original
1836 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1837 we need to convert. */
1838 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1839 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1841 if (TREE_CODE (t
) != INTEGER_CST
1842 && !operand_equal_p (t
, curr
->op
, 0))
1844 enum tree_code subcode
;
1845 tree newop1
, newop2
;
1846 if (!COMPARISON_CLASS_P (t
))
1848 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1849 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1850 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1851 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1855 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1857 fprintf (dump_file
, "Equivalence: ");
1858 print_generic_expr (dump_file
, curr
->op
);
1859 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1860 print_generic_expr (dump_file
, oe
->op
);
1861 fprintf (dump_file
, " -> ");
1862 print_generic_expr (dump_file
, t
);
1863 fprintf (dump_file
, "\n");
1866 /* Now we can delete oe, as it has been subsumed by the new combined
1868 ops
->ordered_remove (i
);
1869 reassociate_stats
.ops_eliminated
++;
1871 /* If t is the same as curr->op, we're done. Otherwise we must
1872 replace curr->op with t. Special case is if we got a constant
1873 back, in which case we add it to the end instead of in place of
1874 the current entry. */
1875 if (TREE_CODE (t
) == INTEGER_CST
)
1877 ops
->ordered_remove (currindex
);
1878 add_to_ops_vec (ops
, t
);
1880 else if (!operand_equal_p (t
, curr
->op
, 0))
1883 enum tree_code subcode
;
1886 gcc_assert (COMPARISON_CLASS_P (t
));
1887 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1888 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1889 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1890 gcc_checking_assert (is_gimple_val (newop1
)
1891 && is_gimple_val (newop2
));
1892 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1893 curr
->op
= gimple_get_lhs (sum
);
1902 /* Transform repeated addition of same values into multiply with
1905 transform_add_to_multiply (vec
<operand_entry
*> *ops
)
1908 tree op
= NULL_TREE
;
1910 int i
, start
= -1, end
= 0, count
= 0;
1911 auto_vec
<std::pair
<int, int> > indxs
;
1912 bool changed
= false;
1914 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
1915 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
1916 || !flag_unsafe_math_optimizations
))
1919 /* Look for repeated operands. */
1920 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
1928 else if (operand_equal_p (oe
->op
, op
, 0))
1936 indxs
.safe_push (std::make_pair (start
, end
));
1944 indxs
.safe_push (std::make_pair (start
, end
));
1946 for (j
= indxs
.length () - 1; j
>= 0; --j
)
1948 /* Convert repeated operand addition to multiplication. */
1949 start
= indxs
[j
].first
;
1950 end
= indxs
[j
].second
;
1951 op
= (*ops
)[start
]->op
;
1952 count
= end
- start
+ 1;
1953 for (i
= end
; i
>= start
; --i
)
1954 ops
->unordered_remove (i
);
1955 tree tmp
= make_ssa_name (TREE_TYPE (op
));
1956 tree cst
= build_int_cst (integer_type_node
, count
);
1958 = gimple_build_assign (tmp
, MULT_EXPR
,
1959 op
, fold_convert (TREE_TYPE (op
), cst
));
1960 gimple_set_visited (mul_stmt
, true);
1961 add_to_ops_vec (ops
, tmp
, mul_stmt
);
1969 /* Perform various identities and other optimizations on the list of
1970 operand entries, stored in OPS. The tree code for the binary
1971 operation between all the operands is OPCODE. */
1974 optimize_ops_list (enum tree_code opcode
,
1975 vec
<operand_entry
*> *ops
)
1977 unsigned int length
= ops
->length ();
1980 operand_entry
*oelast
= NULL
;
1981 bool iterate
= false;
1986 oelast
= ops
->last ();
1988 /* If the last two are constants, pop the constants off, merge them
1989 and try the next two. */
1990 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1992 operand_entry
*oelm1
= (*ops
)[length
- 2];
1994 if (oelm1
->rank
== 0
1995 && is_gimple_min_invariant (oelm1
->op
)
1996 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1997 TREE_TYPE (oelast
->op
)))
1999 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
2000 oelm1
->op
, oelast
->op
);
2002 if (folded
&& is_gimple_min_invariant (folded
))
2004 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2005 fprintf (dump_file
, "Merging constants\n");
2010 add_to_ops_vec (ops
, folded
);
2011 reassociate_stats
.constants_eliminated
++;
2013 optimize_ops_list (opcode
, ops
);
2019 eliminate_using_constants (opcode
, ops
);
2022 for (i
= 0; ops
->iterate (i
, &oe
);)
2026 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
2028 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
2029 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
2030 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
2043 optimize_ops_list (opcode
, ops
);
2046 /* The following functions are subroutines to optimize_range_tests and allow
2047 it to try to change a logical combination of comparisons into a range
2051 X == 2 || X == 5 || X == 3 || X == 4
2055 (unsigned) (X - 2) <= 3
2057 For more information see comments above fold_test_range in fold-const.c,
2058 this implementation is for GIMPLE. */
2066 bool strict_overflow_p
;
2067 unsigned int idx
, next
;
2070 /* This is similar to make_range in fold-const.c, but on top of
2071 GIMPLE instead of trees. If EXP is non-NULL, it should be
2072 an SSA_NAME and STMT argument is ignored, otherwise STMT
2073 argument should be a GIMPLE_COND. */
2076 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
2080 bool is_bool
, strict_overflow_p
;
2084 r
->strict_overflow_p
= false;
2086 r
->high
= NULL_TREE
;
2087 if (exp
!= NULL_TREE
2088 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
2091 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2092 and see if we can refine the range. Some of the cases below may not
2093 happen, but it doesn't seem worth worrying about this. We "continue"
2094 the outer loop when we've changed something; otherwise we "break"
2095 the switch, which will "break" the while. */
2096 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
2099 strict_overflow_p
= false;
2101 if (exp
== NULL_TREE
)
2103 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
2105 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
2110 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
2115 enum tree_code code
;
2116 tree arg0
, arg1
, exp_type
;
2120 if (exp
!= NULL_TREE
)
2122 if (TREE_CODE (exp
) != SSA_NAME
2123 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
2126 stmt
= SSA_NAME_DEF_STMT (exp
);
2127 if (!is_gimple_assign (stmt
))
2130 code
= gimple_assign_rhs_code (stmt
);
2131 arg0
= gimple_assign_rhs1 (stmt
);
2132 arg1
= gimple_assign_rhs2 (stmt
);
2133 exp_type
= TREE_TYPE (exp
);
2137 code
= gimple_cond_code (stmt
);
2138 arg0
= gimple_cond_lhs (stmt
);
2139 arg1
= gimple_cond_rhs (stmt
);
2140 exp_type
= boolean_type_node
;
2143 if (TREE_CODE (arg0
) != SSA_NAME
2144 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0
))
2146 loc
= gimple_location (stmt
);
2150 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
2151 /* Ensure the range is either +[-,0], +[0,0],
2152 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2153 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2154 or similar expression of unconditional true or
2155 false, it should not be negated. */
2156 && ((high
&& integer_zerop (high
))
2157 || (low
&& integer_onep (low
))))
2170 if ((TYPE_PRECISION (exp_type
) == 1
2171 || TREE_CODE (exp_type
) == BOOLEAN_TYPE
)
2172 && TYPE_PRECISION (TREE_TYPE (arg0
)) > 1)
2175 else if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
2177 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
2182 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2197 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2199 &strict_overflow_p
);
2200 if (nexp
!= NULL_TREE
)
2203 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2216 r
->strict_overflow_p
= strict_overflow_p
;
2220 /* Comparison function for qsort. Sort entries
2221 without SSA_NAME exp first, then with SSA_NAMEs sorted
2222 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2223 by increasing ->low and if ->low is the same, by increasing
2224 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2228 range_entry_cmp (const void *a
, const void *b
)
2230 const struct range_entry
*p
= (const struct range_entry
*) a
;
2231 const struct range_entry
*q
= (const struct range_entry
*) b
;
2233 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2235 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2237 /* Group range_entries for the same SSA_NAME together. */
2238 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2240 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2242 /* If ->low is different, NULL low goes first, then by
2244 if (p
->low
!= NULL_TREE
)
2246 if (q
->low
!= NULL_TREE
)
2248 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2250 if (tem
&& integer_onep (tem
))
2252 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2254 if (tem
&& integer_onep (tem
))
2260 else if (q
->low
!= NULL_TREE
)
2262 /* If ->high is different, NULL high goes last, before that by
2264 if (p
->high
!= NULL_TREE
)
2266 if (q
->high
!= NULL_TREE
)
2268 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2270 if (tem
&& integer_onep (tem
))
2272 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2274 if (tem
&& integer_onep (tem
))
2280 else if (q
->high
!= NULL_TREE
)
2282 /* If both ranges are the same, sort below by ascending idx. */
2287 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2290 if (p
->idx
< q
->idx
)
2294 gcc_checking_assert (p
->idx
> q
->idx
);
2299 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2300 insert needed statements BEFORE or after GSI. */
2303 force_into_ssa_name (gimple_stmt_iterator
*gsi
, tree expr
, bool before
)
2305 enum gsi_iterator_update m
= before
? GSI_SAME_STMT
: GSI_CONTINUE_LINKING
;
2306 tree ret
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
, before
, m
);
2307 if (TREE_CODE (ret
) != SSA_NAME
)
2309 gimple
*g
= gimple_build_assign (make_ssa_name (TREE_TYPE (ret
)), ret
);
2311 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2313 gsi_insert_after (gsi
, g
, GSI_CONTINUE_LINKING
);
2314 ret
= gimple_assign_lhs (g
);
2319 /* Helper routine of optimize_range_test.
2320 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2321 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2322 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2323 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2324 an array of COUNT pointers to other ranges. Return
2325 true if the range merge has been successful.
2326 If OPCODE is ERROR_MARK, this is called from within
2327 maybe_optimize_range_tests and is performing inter-bb range optimization.
2328 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2332 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2333 struct range_entry
**otherrangep
,
2334 unsigned int count
, enum tree_code opcode
,
2335 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2336 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2338 operand_entry
*oe
= (*ops
)[range
->idx
];
2340 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2341 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2342 location_t loc
= gimple_location (stmt
);
2343 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2344 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2346 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2347 gimple_stmt_iterator gsi
;
2348 unsigned int i
, uid
;
2350 if (tem
== NULL_TREE
)
2353 /* If op is default def SSA_NAME, there is no place to insert the
2354 new comparison. Give up, unless we can use OP itself as the
2356 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2358 if (op
== range
->exp
2359 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2360 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2362 || (TREE_CODE (tem
) == EQ_EXPR
2363 && TREE_OPERAND (tem
, 0) == op
2364 && integer_onep (TREE_OPERAND (tem
, 1))))
2365 && opcode
!= BIT_IOR_EXPR
2366 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2375 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2376 warning_at (loc
, OPT_Wstrict_overflow
,
2377 "assuming signed overflow does not occur "
2378 "when simplifying range test");
2380 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2382 struct range_entry
*r
;
2383 fprintf (dump_file
, "Optimizing range tests ");
2384 print_generic_expr (dump_file
, range
->exp
);
2385 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2386 print_generic_expr (dump_file
, range
->low
);
2387 fprintf (dump_file
, ", ");
2388 print_generic_expr (dump_file
, range
->high
);
2389 fprintf (dump_file
, "]");
2390 for (i
= 0; i
< count
; i
++)
2397 && r
->exp
!= range
->exp
2398 && TREE_CODE (r
->exp
) == SSA_NAME
)
2400 fprintf (dump_file
, " and ");
2401 print_generic_expr (dump_file
, r
->exp
);
2404 fprintf (dump_file
, " and");
2405 fprintf (dump_file
, " %c[", r
->in_p
? '+' : '-');
2406 print_generic_expr (dump_file
, r
->low
);
2407 fprintf (dump_file
, ", ");
2408 print_generic_expr (dump_file
, r
->high
);
2409 fprintf (dump_file
, "]");
2411 fprintf (dump_file
, "\n into ");
2412 print_generic_expr (dump_file
, tem
);
2413 fprintf (dump_file
, "\n");
2416 if (opcode
== BIT_IOR_EXPR
2417 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2418 tem
= invert_truthvalue_loc (loc
, tem
);
2420 tem
= fold_convert_loc (loc
, optype
, tem
);
2423 gsi
= gsi_for_stmt (stmt
);
2424 uid
= gimple_uid (stmt
);
2432 gcc_checking_assert (tem
== op
);
2433 /* In rare cases range->exp can be equal to lhs of stmt.
2434 In that case we have to insert after the stmt rather then before
2435 it. If stmt is a PHI, insert it at the start of the basic block. */
2436 else if (op
!= range
->exp
)
2438 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2439 tem
= force_into_ssa_name (&gsi
, tem
, true);
2442 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2444 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2445 tem
= force_into_ssa_name (&gsi
, tem
, false);
2449 gsi
= gsi_after_labels (gimple_bb (stmt
));
2450 if (!gsi_end_p (gsi
))
2451 uid
= gimple_uid (gsi_stmt (gsi
));
2454 gsi
= gsi_start_bb (gimple_bb (stmt
));
2456 while (!gsi_end_p (gsi
))
2458 uid
= gimple_uid (gsi_stmt (gsi
));
2462 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2463 tem
= force_into_ssa_name (&gsi
, tem
, true);
2464 if (gsi_end_p (gsi
))
2465 gsi
= gsi_last_bb (gimple_bb (stmt
));
2469 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2470 if (gimple_uid (gsi_stmt (gsi
)))
2473 gimple_set_uid (gsi_stmt (gsi
), uid
);
2480 range
->strict_overflow_p
= false;
2482 for (i
= 0; i
< count
; i
++)
2485 range
= otherrange
+ i
;
2487 range
= otherrangep
[i
];
2488 oe
= (*ops
)[range
->idx
];
2489 /* Now change all the other range test immediate uses, so that
2490 those tests will be optimized away. */
2491 if (opcode
== ERROR_MARK
)
2494 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2495 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2497 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2498 ? boolean_false_node
: boolean_true_node
);
2501 oe
->op
= error_mark_node
;
2502 range
->exp
= NULL_TREE
;
2503 range
->low
= NULL_TREE
;
2504 range
->high
= NULL_TREE
;
2509 /* Optimize X == CST1 || X == CST2
2510 if popcount (CST1 ^ CST2) == 1 into
2511 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2512 Similarly for ranges. E.g.
2513 X != 2 && X != 3 && X != 10 && X != 11
2514 will be transformed by the previous optimization into
2515 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2516 and this loop can transform that into
2517 !(((X & ~8) - 2U) <= 1U). */
2520 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2521 tree lowi
, tree lowj
, tree highi
, tree highj
,
2522 vec
<operand_entry
*> *ops
,
2523 struct range_entry
*rangei
,
2524 struct range_entry
*rangej
)
2526 tree lowxor
, highxor
, tem
, exp
;
2527 /* Check lowi ^ lowj == highi ^ highj and
2528 popcount (lowi ^ lowj) == 1. */
2529 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2530 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2532 if (!integer_pow2p (lowxor
))
2534 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2535 if (!tree_int_cst_equal (lowxor
, highxor
))
2539 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
2540 int prec
= GET_MODE_PRECISION (mode
);
2541 if (TYPE_PRECISION (type
) < prec
2542 || (wi::to_wide (TYPE_MIN_VALUE (type
))
2543 != wi::min_value (prec
, TYPE_SIGN (type
)))
2544 || (wi::to_wide (TYPE_MAX_VALUE (type
))
2545 != wi::max_value (prec
, TYPE_SIGN (type
))))
2547 type
= build_nonstandard_integer_type (prec
, TYPE_UNSIGNED (type
));
2548 exp
= fold_convert (type
, exp
);
2549 lowxor
= fold_convert (type
, lowxor
);
2550 lowi
= fold_convert (type
, lowi
);
2551 highi
= fold_convert (type
, highi
);
2553 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2554 exp
= fold_build2 (BIT_AND_EXPR
, type
, exp
, tem
);
2555 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2556 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2557 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2558 NULL
, rangei
->in_p
, lowj
, highj
,
2559 rangei
->strict_overflow_p
2560 || rangej
->strict_overflow_p
))
2565 /* Optimize X == CST1 || X == CST2
2566 if popcount (CST2 - CST1) == 1 into
2567 ((X - CST1) & ~(CST2 - CST1)) == 0.
2568 Similarly for ranges. E.g.
2569 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2570 || X == 75 || X == 45
2571 will be transformed by the previous optimization into
2572 (X - 43U) <= 3U || (X - 75U) <= 3U
2573 and this loop can transform that into
2574 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2576 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2577 tree lowi
, tree lowj
, tree highi
, tree highj
,
2578 vec
<operand_entry
*> *ops
,
2579 struct range_entry
*rangei
,
2580 struct range_entry
*rangej
)
2582 tree tem1
, tem2
, mask
;
2583 /* Check highi - lowi == highj - lowj. */
2584 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2585 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2587 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2588 if (!tree_int_cst_equal (tem1
, tem2
))
2590 /* Check popcount (lowj - lowi) == 1. */
2591 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2592 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2594 if (!integer_pow2p (tem1
))
2597 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
2598 int prec
= GET_MODE_PRECISION (mode
);
2599 if (TYPE_PRECISION (type
) < prec
2600 || (wi::to_wide (TYPE_MIN_VALUE (type
))
2601 != wi::min_value (prec
, TYPE_SIGN (type
)))
2602 || (wi::to_wide (TYPE_MAX_VALUE (type
))
2603 != wi::max_value (prec
, TYPE_SIGN (type
))))
2604 type
= build_nonstandard_integer_type (prec
, 1);
2606 type
= unsigned_type_for (type
);
2607 tem1
= fold_convert (type
, tem1
);
2608 tem2
= fold_convert (type
, tem2
);
2609 lowi
= fold_convert (type
, lowi
);
2610 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2611 tem1
= fold_build2 (MINUS_EXPR
, type
,
2612 fold_convert (type
, rangei
->exp
), lowi
);
2613 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2614 lowj
= build_int_cst (type
, 0);
2615 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2616 NULL
, rangei
->in_p
, lowj
, tem2
,
2617 rangei
->strict_overflow_p
2618 || rangej
->strict_overflow_p
))
2623 /* It does some common checks for function optimize_range_tests_xor and
2624 optimize_range_tests_diff.
2625 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2626 Else it calls optimize_range_tests_diff. */
2629 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2630 bool optimize_xor
, vec
<operand_entry
*> *ops
,
2631 struct range_entry
*ranges
)
2634 bool any_changes
= false;
2635 for (i
= first
; i
< length
; i
++)
2637 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2639 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2641 type
= TREE_TYPE (ranges
[i
].exp
);
2642 if (!INTEGRAL_TYPE_P (type
))
2644 lowi
= ranges
[i
].low
;
2645 if (lowi
== NULL_TREE
)
2646 lowi
= TYPE_MIN_VALUE (type
);
2647 highi
= ranges
[i
].high
;
2648 if (highi
== NULL_TREE
)
2650 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2653 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2655 lowj
= ranges
[j
].low
;
2656 if (lowj
== NULL_TREE
)
2658 highj
= ranges
[j
].high
;
2659 if (highj
== NULL_TREE
)
2660 highj
= TYPE_MAX_VALUE (type
);
2661 /* Check lowj > highi. */
2662 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2664 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2667 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2669 ranges
+ i
, ranges
+ j
);
2671 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2673 ranges
+ i
, ranges
+ j
);
2684 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2685 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2686 EXP on success, NULL otherwise. */
2689 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2690 wide_int
*mask
, tree
*totallowp
)
2692 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2693 if (tem
== NULL_TREE
2694 || TREE_CODE (tem
) != INTEGER_CST
2695 || TREE_OVERFLOW (tem
)
2696 || tree_int_cst_sgn (tem
) == -1
2697 || compare_tree_int (tem
, prec
) != -1)
2700 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2701 *mask
= wi::shifted_mask (0, max
, false, prec
);
2702 if (TREE_CODE (exp
) == BIT_AND_EXPR
2703 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2705 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2706 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2707 if (wi::popcount (msk
) == 1
2708 && wi::ltu_p (msk
, prec
- max
))
2710 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2711 max
+= msk
.to_uhwi ();
2712 exp
= TREE_OPERAND (exp
, 0);
2713 if (integer_zerop (low
)
2714 && TREE_CODE (exp
) == PLUS_EXPR
2715 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2717 tree ret
= TREE_OPERAND (exp
, 0);
2720 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2721 TYPE_PRECISION (TREE_TYPE (low
))));
2722 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
2728 else if (!tree_int_cst_lt (totallow
, tbias
))
2730 bias
= wi::to_widest (tbias
);
2731 bias
-= wi::to_widest (totallow
);
2732 if (bias
>= 0 && bias
< prec
- max
)
2734 *mask
= wi::lshift (*mask
, bias
);
2742 if (!tree_int_cst_lt (totallow
, low
))
2744 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2745 if (tem
== NULL_TREE
2746 || TREE_CODE (tem
) != INTEGER_CST
2747 || TREE_OVERFLOW (tem
)
2748 || compare_tree_int (tem
, prec
- max
) == 1)
2751 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2755 /* Attempt to optimize small range tests using bit test.
2757 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2758 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2759 has been by earlier optimizations optimized into:
2760 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2761 As all the 43 through 82 range is less than 64 numbers,
2762 for 64-bit word targets optimize that into:
2763 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2766 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2767 vec
<operand_entry
*> *ops
,
2768 struct range_entry
*ranges
)
2771 bool any_changes
= false;
2772 int prec
= GET_MODE_BITSIZE (word_mode
);
2773 auto_vec
<struct range_entry
*, 64> candidates
;
2775 for (i
= first
; i
< length
- 2; i
++)
2777 tree lowi
, highi
, lowj
, highj
, type
;
2779 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2781 type
= TREE_TYPE (ranges
[i
].exp
);
2782 if (!INTEGRAL_TYPE_P (type
))
2784 lowi
= ranges
[i
].low
;
2785 if (lowi
== NULL_TREE
)
2786 lowi
= TYPE_MIN_VALUE (type
);
2787 highi
= ranges
[i
].high
;
2788 if (highi
== NULL_TREE
)
2791 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2792 highi
, &mask
, &lowi
);
2793 if (exp
== NULL_TREE
)
2795 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2796 candidates
.truncate (0);
2797 int end
= MIN (i
+ 64, length
);
2798 for (j
= i
+ 1; j
< end
; j
++)
2801 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2803 if (ranges
[j
].exp
== exp
)
2805 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2807 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2810 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2812 exp2
= TREE_OPERAND (exp2
, 0);
2822 lowj
= ranges
[j
].low
;
2823 if (lowj
== NULL_TREE
)
2825 highj
= ranges
[j
].high
;
2826 if (highj
== NULL_TREE
)
2827 highj
= TYPE_MAX_VALUE (type
);
2829 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2830 highj
, &mask2
, NULL
);
2834 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2835 candidates
.safe_push (&ranges
[j
]);
2838 /* If we need otherwise 3 or more comparisons, use a bit test. */
2839 if (candidates
.length () >= 2)
2841 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2842 wi::to_widest (lowi
)
2843 + prec
- 1 - wi::clz (mask
));
2844 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
2846 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2847 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2848 location_t loc
= gimple_location (stmt
);
2849 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2851 /* See if it isn't cheaper to pretend the minimum value of the
2852 range is 0, if maximum value is small enough.
2853 We can avoid then subtraction of the minimum value, but the
2854 mask constant could be perhaps more expensive. */
2855 if (compare_tree_int (lowi
, 0) > 0
2856 && compare_tree_int (high
, prec
) < 0)
2859 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2860 rtx reg
= gen_raw_REG (word_mode
, 10000);
2861 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2862 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2863 GEN_INT (-m
)), speed_p
);
2864 rtx r
= immed_wide_int_const (mask
, word_mode
);
2865 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2866 word_mode
, speed_p
);
2867 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2868 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2869 word_mode
, speed_p
);
2872 mask
= wi::lshift (mask
, m
);
2873 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2877 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2879 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2881 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2882 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2883 fold_convert_loc (loc
, etype
, exp
),
2884 fold_convert_loc (loc
, etype
, lowi
));
2885 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2886 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2887 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2888 build_int_cst (word_type
, 1), exp
);
2889 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2890 wide_int_to_tree (word_type
, mask
));
2891 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2892 build_zero_cst (word_type
));
2893 if (is_gimple_val (exp
))
2896 /* The shift might have undefined behavior if TEM is true,
2897 but reassociate_bb isn't prepared to have basic blocks
2898 split when it is running. So, temporarily emit a code
2899 with BIT_IOR_EXPR instead of &&, and fix it up in
2902 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2903 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2904 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2906 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2907 gimple_seq_add_seq_without_update (&seq
, seq2
);
2908 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2909 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2910 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
2911 BIT_IOR_EXPR
, tem
, exp
);
2912 gimple_set_location (g
, loc
);
2913 gimple_seq_add_stmt_without_update (&seq
, g
);
2914 exp
= gimple_assign_lhs (g
);
2915 tree val
= build_zero_cst (optype
);
2916 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2917 candidates
.length (), opcode
, ops
, exp
,
2918 seq
, false, val
, val
, strict_overflow_p
))
2921 reassoc_branch_fixups
.safe_push (tem
);
2924 gimple_seq_discard (seq
);
2930 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
2931 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
2934 optimize_range_tests_cmp_bitwise (enum tree_code opcode
, int first
, int length
,
2935 vec
<operand_entry
*> *ops
,
2936 struct range_entry
*ranges
)
2940 bool any_changes
= false;
2941 auto_vec
<int, 128> buckets
;
2942 auto_vec
<int, 32> chains
;
2943 auto_vec
<struct range_entry
*, 32> candidates
;
2945 for (i
= first
; i
< length
; i
++)
2947 if (ranges
[i
].exp
== NULL_TREE
2948 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
2950 || TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) <= 1
2951 || TREE_CODE (TREE_TYPE (ranges
[i
].exp
)) == BOOLEAN_TYPE
2952 || ranges
[i
].low
== NULL_TREE
2953 || ranges
[i
].low
!= ranges
[i
].high
)
2956 bool zero_p
= integer_zerop (ranges
[i
].low
);
2957 if (!zero_p
&& !integer_all_onesp (ranges
[i
].low
))
2960 b
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) * 2 + !zero_p
;
2961 if (buckets
.length () <= b
)
2962 buckets
.safe_grow_cleared (b
+ 1);
2963 if (chains
.length () <= (unsigned) i
)
2964 chains
.safe_grow (i
+ 1);
2965 chains
[i
] = buckets
[b
];
2969 FOR_EACH_VEC_ELT (buckets
, b
, i
)
2970 if (i
&& chains
[i
- 1])
2973 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
2975 gimple
*gk
= SSA_NAME_DEF_STMT (ranges
[k
- 1].exp
);
2976 gimple
*gj
= SSA_NAME_DEF_STMT (ranges
[j
- 1].exp
);
2977 if (reassoc_stmt_dominates_stmt_p (gk
, gj
))
2980 tree type1
= TREE_TYPE (ranges
[k
- 1].exp
);
2981 tree type2
= NULL_TREE
;
2982 bool strict_overflow_p
= false;
2983 candidates
.truncate (0);
2984 for (j
= i
; j
; j
= chains
[j
- 1])
2986 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
2987 strict_overflow_p
|= ranges
[j
- 1].strict_overflow_p
;
2989 || useless_type_conversion_p (type1
, type
))
2991 else if (type2
== NULL_TREE
2992 || useless_type_conversion_p (type2
, type
))
2994 if (type2
== NULL_TREE
)
2996 candidates
.safe_push (&ranges
[j
- 1]);
2999 unsigned l
= candidates
.length ();
3000 for (j
= i
; j
; j
= chains
[j
- 1])
3002 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3005 if (useless_type_conversion_p (type1
, type
))
3007 else if (type2
== NULL_TREE
3008 || useless_type_conversion_p (type2
, type
))
3010 candidates
.safe_push (&ranges
[j
- 1]);
3012 gimple_seq seq
= NULL
;
3013 tree op
= NULL_TREE
;
3015 struct range_entry
*r
;
3016 candidates
.safe_push (&ranges
[k
- 1]);
3017 FOR_EACH_VEC_ELT (candidates
, id
, r
)
3027 g
= gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, op
);
3028 gimple_seq_add_stmt_without_update (&seq
, g
);
3029 op
= gimple_assign_lhs (g
);
3031 tree type
= TREE_TYPE (r
->exp
);
3033 if (id
>= l
&& !useless_type_conversion_p (type1
, type
))
3035 g
= gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, exp
);
3036 gimple_seq_add_stmt_without_update (&seq
, g
);
3037 exp
= gimple_assign_lhs (g
);
3039 g
= gimple_build_assign (make_ssa_name (id
>= l
? type1
: type2
),
3040 (b
& 1) ? BIT_AND_EXPR
: BIT_IOR_EXPR
,
3042 gimple_seq_add_stmt_without_update (&seq
, g
);
3043 op
= gimple_assign_lhs (g
);
3046 if (update_range_test (&ranges
[k
- 1], NULL
, candidates
.address (),
3047 candidates
.length (), opcode
, ops
, op
,
3048 seq
, true, ranges
[k
- 1].low
,
3049 ranges
[k
- 1].low
, strict_overflow_p
))
3052 gimple_seq_discard (seq
);
3058 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3059 a >= 0 && a < b into (unsigned) a < (unsigned) b
3060 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3063 optimize_range_tests_var_bound (enum tree_code opcode
, int first
, int length
,
3064 vec
<operand_entry
*> *ops
,
3065 struct range_entry
*ranges
,
3066 basic_block first_bb
)
3069 bool any_changes
= false;
3070 hash_map
<tree
, int> *map
= NULL
;
3072 for (i
= first
; i
< length
; i
++)
3074 if (ranges
[i
].exp
== NULL_TREE
3075 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3079 tree type
= TREE_TYPE (ranges
[i
].exp
);
3080 if (!INTEGRAL_TYPE_P (type
)
3081 || TYPE_UNSIGNED (type
)
3082 || ranges
[i
].low
== NULL_TREE
3083 || !integer_zerop (ranges
[i
].low
)
3084 || ranges
[i
].high
!= NULL_TREE
)
3086 /* EXP >= 0 here. */
3088 map
= new hash_map
<tree
, int>;
3089 map
->put (ranges
[i
].exp
, i
);
3095 for (i
= 0; i
< length
; i
++)
3097 bool in_p
= ranges
[i
].in_p
;
3098 if (ranges
[i
].low
== NULL_TREE
3099 || ranges
[i
].high
== NULL_TREE
)
3101 if (!integer_zerop (ranges
[i
].low
)
3102 || !integer_zerop (ranges
[i
].high
))
3105 && TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) == 1
3106 && TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3107 && integer_onep (ranges
[i
].low
)
3108 && integer_onep (ranges
[i
].high
))
3119 if (TREE_CODE (ranges
[i
].exp
) != SSA_NAME
)
3121 stmt
= SSA_NAME_DEF_STMT (ranges
[i
].exp
);
3122 if (!is_gimple_assign (stmt
))
3124 ccode
= gimple_assign_rhs_code (stmt
);
3125 rhs1
= gimple_assign_rhs1 (stmt
);
3126 rhs2
= gimple_assign_rhs2 (stmt
);
3130 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3131 stmt
= last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3132 if (gimple_code (stmt
) != GIMPLE_COND
)
3134 ccode
= gimple_cond_code (stmt
);
3135 rhs1
= gimple_cond_lhs (stmt
);
3136 rhs2
= gimple_cond_rhs (stmt
);
3139 if (TREE_CODE (rhs1
) != SSA_NAME
3140 || rhs2
== NULL_TREE
3141 || TREE_CODE (rhs2
) != SSA_NAME
)
3155 ccode
= invert_tree_comparison (ccode
, false);
3160 std::swap (rhs1
, rhs2
);
3161 ccode
= swap_tree_comparison (ccode
);
3170 int *idx
= map
->get (rhs1
);
3174 /* maybe_optimize_range_tests allows statements without side-effects
3175 in the basic blocks as long as they are consumed in the same bb.
3176 Make sure rhs2's def stmt is not among them, otherwise we can't
3177 use safely get_nonzero_bits on it. E.g. in:
3178 # RANGE [-83, 1] NONZERO 173
3179 # k_32 = PHI <k_47(13), k_12(9)>
3182 goto <bb 5>; [26.46%]
3184 goto <bb 9>; [73.54%]
3186 <bb 5> [local count: 140323371]:
3187 # RANGE [0, 1] NONZERO 1
3189 # RANGE [0, 4] NONZERO 4
3191 # RANGE [0, 4] NONZERO 4
3192 iftmp.0_44 = (char) _21;
3193 if (k_32 < iftmp.0_44)
3194 goto <bb 6>; [84.48%]
3196 goto <bb 9>; [15.52%]
3197 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3198 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3199 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3200 those stmts even for negative k_32 and the value ranges would be no
3201 longer guaranteed and so the optimization would be invalid. */
3202 while (opcode
== ERROR_MARK
)
3204 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3205 basic_block bb2
= gimple_bb (g
);
3208 && dominated_by_p (CDI_DOMINATORS
, bb2
, first_bb
))
3210 /* As an exception, handle a few common cases. */
3211 if (gimple_assign_cast_p (g
)
3212 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g
))))
3214 tree op0
= gimple_assign_rhs1 (g
);
3215 if (TYPE_UNSIGNED (TREE_TYPE (op0
))
3216 && (TYPE_PRECISION (TREE_TYPE (rhs2
))
3217 > TYPE_PRECISION (TREE_TYPE (op0
))))
3218 /* Zero-extension is always ok. */
3220 else if (TYPE_PRECISION (TREE_TYPE (rhs2
))
3221 == TYPE_PRECISION (TREE_TYPE (op0
))
3222 && TREE_CODE (op0
) == SSA_NAME
)
3224 /* Cast from signed to unsigned or vice versa. Retry
3225 with the op0 as new rhs2. */
3230 else if (is_gimple_assign (g
)
3231 && gimple_assign_rhs_code (g
) == BIT_AND_EXPR
3232 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
3233 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g
))))
3234 /* Masking with INTEGER_CST with MSB clear is always ok
3241 if (rhs2
== NULL_TREE
)
3244 wide_int nz
= get_nonzero_bits (rhs2
);
3248 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3249 and RHS2 is known to be RHS2 >= 0. */
3250 tree utype
= unsigned_type_for (TREE_TYPE (rhs1
));
3252 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
3253 if ((ranges
[*idx
].strict_overflow_p
3254 || ranges
[i
].strict_overflow_p
)
3255 && issue_strict_overflow_warning (wc
))
3256 warning_at (gimple_location (stmt
), OPT_Wstrict_overflow
,
3257 "assuming signed overflow does not occur "
3258 "when simplifying range test");
3260 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3262 struct range_entry
*r
= &ranges
[*idx
];
3263 fprintf (dump_file
, "Optimizing range test ");
3264 print_generic_expr (dump_file
, r
->exp
);
3265 fprintf (dump_file
, " +[");
3266 print_generic_expr (dump_file
, r
->low
);
3267 fprintf (dump_file
, ", ");
3268 print_generic_expr (dump_file
, r
->high
);
3269 fprintf (dump_file
, "] and comparison ");
3270 print_generic_expr (dump_file
, rhs1
);
3271 fprintf (dump_file
, " %s ", op_symbol_code (ccode
));
3272 print_generic_expr (dump_file
, rhs2
);
3273 fprintf (dump_file
, "\n into (");
3274 print_generic_expr (dump_file
, utype
);
3275 fprintf (dump_file
, ") ");
3276 print_generic_expr (dump_file
, rhs1
);
3277 fprintf (dump_file
, " %s (", op_symbol_code (ccode
));
3278 print_generic_expr (dump_file
, utype
);
3279 fprintf (dump_file
, ") ");
3280 print_generic_expr (dump_file
, rhs2
);
3281 fprintf (dump_file
, "\n");
3284 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3286 if (opcode
== BIT_IOR_EXPR
3287 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3290 ccode
= invert_tree_comparison (ccode
, false);
3293 unsigned int uid
= gimple_uid (stmt
);
3294 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3295 gimple
*g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs1
);
3296 gimple_set_uid (g
, uid
);
3297 rhs1
= gimple_assign_lhs (g
);
3298 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3299 if (!useless_type_conversion_p (utype
, TREE_TYPE (rhs2
)))
3301 g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs2
);
3302 gimple_set_uid (g
, uid
);
3303 rhs2
= gimple_assign_lhs (g
);
3304 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3306 if (tree_swap_operands_p (rhs1
, rhs2
))
3308 std::swap (rhs1
, rhs2
);
3309 ccode
= swap_tree_comparison (ccode
);
3311 if (gimple_code (stmt
) == GIMPLE_COND
)
3313 gcond
*c
= as_a
<gcond
*> (stmt
);
3314 gimple_cond_set_code (c
, ccode
);
3315 gimple_cond_set_lhs (c
, rhs1
);
3316 gimple_cond_set_rhs (c
, rhs2
);
3321 tree ctype
= oe
->op
? TREE_TYPE (oe
->op
) : boolean_type_node
;
3322 if (!INTEGRAL_TYPE_P (ctype
)
3323 || (TREE_CODE (ctype
) != BOOLEAN_TYPE
3324 && TYPE_PRECISION (ctype
) != 1))
3325 ctype
= boolean_type_node
;
3326 g
= gimple_build_assign (make_ssa_name (ctype
), ccode
, rhs1
, rhs2
);
3327 gimple_set_uid (g
, uid
);
3328 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3329 if (oe
->op
&& ctype
!= TREE_TYPE (oe
->op
))
3331 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (oe
->op
)),
3332 NOP_EXPR
, gimple_assign_lhs (g
));
3333 gimple_set_uid (g
, uid
);
3334 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3336 ranges
[i
].exp
= gimple_assign_lhs (g
);
3337 oe
->op
= ranges
[i
].exp
;
3338 ranges
[i
].low
= build_zero_cst (TREE_TYPE (ranges
[i
].exp
));
3339 ranges
[i
].high
= ranges
[i
].low
;
3341 ranges
[i
].strict_overflow_p
= false;
3342 oe
= (*ops
)[ranges
[*idx
].idx
];
3343 /* Now change all the other range test immediate uses, so that
3344 those tests will be optimized away. */
3345 if (opcode
== ERROR_MARK
)
3348 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
3349 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
3351 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
3352 ? boolean_false_node
: boolean_true_node
);
3355 oe
->op
= error_mark_node
;
3356 ranges
[*idx
].exp
= NULL_TREE
;
3357 ranges
[*idx
].low
= NULL_TREE
;
3358 ranges
[*idx
].high
= NULL_TREE
;
3366 /* Optimize range tests, similarly how fold_range_test optimizes
3367 it on trees. The tree code for the binary
3368 operation between all the operands is OPCODE.
3369 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3370 maybe_optimize_range_tests for inter-bb range optimization.
3371 In that case if oe->op is NULL, oe->id is bb->index whose
3372 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3374 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3377 optimize_range_tests (enum tree_code opcode
,
3378 vec
<operand_entry
*> *ops
, basic_block first_bb
)
3380 unsigned int length
= ops
->length (), i
, j
, first
;
3382 struct range_entry
*ranges
;
3383 bool any_changes
= false;
3388 ranges
= XNEWVEC (struct range_entry
, length
);
3389 for (i
= 0; i
< length
; i
++)
3393 init_range_entry (ranges
+ i
, oe
->op
,
3396 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
3397 /* For | invert it now, we will invert it again before emitting
3398 the optimized expression. */
3399 if (opcode
== BIT_IOR_EXPR
3400 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3401 ranges
[i
].in_p
= !ranges
[i
].in_p
;
3404 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
3405 for (i
= 0; i
< length
; i
++)
3406 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
3409 /* Try to merge ranges. */
3410 for (first
= i
; i
< length
; i
++)
3412 tree low
= ranges
[i
].low
;
3413 tree high
= ranges
[i
].high
;
3414 int in_p
= ranges
[i
].in_p
;
3415 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3416 int update_fail_count
= 0;
3418 for (j
= i
+ 1; j
< length
; j
++)
3420 if (ranges
[i
].exp
!= ranges
[j
].exp
)
3422 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
3423 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
3425 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3431 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
3432 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
3433 low
, high
, strict_overflow_p
))
3438 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3439 while update_range_test would fail. */
3440 else if (update_fail_count
== 64)
3443 ++update_fail_count
;
3446 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
3449 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
3450 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
3452 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
3453 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
3455 any_changes
|= optimize_range_tests_cmp_bitwise (opcode
, first
, length
,
3457 any_changes
|= optimize_range_tests_var_bound (opcode
, first
, length
, ops
,
3460 if (any_changes
&& opcode
!= ERROR_MARK
)
3463 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3465 if (oe
->op
== error_mark_node
)
3474 XDELETEVEC (ranges
);
3478 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3479 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3480 otherwise the comparison code. */
3483 ovce_extract_ops (tree var
, gassign
**rets
, bool *reti
)
3485 if (TREE_CODE (var
) != SSA_NAME
)
3488 gassign
*stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (var
));
3492 /* ??? If we start creating more COND_EXPR, we could perform
3493 this same optimization with them. For now, simplify. */
3494 if (gimple_assign_rhs_code (stmt
) != VEC_COND_EXPR
)
3497 tree cond
= gimple_assign_rhs1 (stmt
);
3498 tree_code cmp
= TREE_CODE (cond
);
3499 if (TREE_CODE_CLASS (cmp
) != tcc_comparison
)
3502 /* ??? For now, allow only canonical true and false result vectors.
3503 We could expand this to other constants should the need arise,
3504 but at the moment we don't create them. */
3505 tree t
= gimple_assign_rhs2 (stmt
);
3506 tree f
= gimple_assign_rhs3 (stmt
);
3508 if (integer_all_onesp (t
))
3510 else if (integer_all_onesp (f
))
3512 cmp
= invert_tree_comparison (cmp
, false);
3517 if (!integer_zerop (f
))
3528 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3529 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3532 optimize_vec_cond_expr (tree_code opcode
, vec
<operand_entry
*> *ops
)
3534 unsigned int length
= ops
->length (), i
, j
;
3535 bool any_changes
= false;
3540 for (i
= 0; i
< length
; ++i
)
3542 tree elt0
= (*ops
)[i
]->op
;
3546 tree_code cmp0
= ovce_extract_ops (elt0
, &stmt0
, &invert
);
3547 if (cmp0
== ERROR_MARK
)
3550 for (j
= i
+ 1; j
< length
; ++j
)
3552 tree
&elt1
= (*ops
)[j
]->op
;
3555 tree_code cmp1
= ovce_extract_ops (elt1
, &stmt1
, NULL
);
3556 if (cmp1
== ERROR_MARK
)
3559 tree cond0
= gimple_assign_rhs1 (stmt0
);
3560 tree x0
= TREE_OPERAND (cond0
, 0);
3561 tree y0
= TREE_OPERAND (cond0
, 1);
3563 tree cond1
= gimple_assign_rhs1 (stmt1
);
3564 tree x1
= TREE_OPERAND (cond1
, 0);
3565 tree y1
= TREE_OPERAND (cond1
, 1);
3568 if (opcode
== BIT_AND_EXPR
)
3569 comb
= maybe_fold_and_comparisons (cmp0
, x0
, y0
, cmp1
, x1
, y1
);
3570 else if (opcode
== BIT_IOR_EXPR
)
3571 comb
= maybe_fold_or_comparisons (cmp0
, x0
, y0
, cmp1
, x1
, y1
);
3578 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3580 fprintf (dump_file
, "Transforming ");
3581 print_generic_expr (dump_file
, cond0
);
3582 fprintf (dump_file
, " %c ", opcode
== BIT_AND_EXPR
? '&' : '|');
3583 print_generic_expr (dump_file
, cond1
);
3584 fprintf (dump_file
, " into ");
3585 print_generic_expr (dump_file
, comb
);
3586 fputc ('\n', dump_file
);
3589 gimple_assign_set_rhs1 (stmt0
, comb
);
3591 std::swap (*gimple_assign_rhs2_ptr (stmt0
),
3592 *gimple_assign_rhs3_ptr (stmt0
));
3593 update_stmt (stmt0
);
3595 elt1
= error_mark_node
;
3604 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3606 if (oe
->op
== error_mark_node
)
3618 /* Return true if STMT is a cast like:
3624 # _345 = PHI <_123(N), 1(...), 1(...)>
3625 where _234 has bool type, _123 has single use and
3626 bb N has a single successor M. This is commonly used in
3627 the last block of a range test.
3629 Also Return true if STMT is tcc_compare like:
3635 # _345 = PHI <_234(N), 1(...), 1(...)>
3637 where _234 has booltype, single use and
3638 bb N has a single successor M. This is commonly used in
3639 the last block of a range test. */
3642 final_range_test_p (gimple
*stmt
)
3644 basic_block bb
, rhs_bb
, lhs_bb
;
3647 use_operand_p use_p
;
3650 if (!gimple_assign_cast_p (stmt
)
3651 && (!is_gimple_assign (stmt
)
3652 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
3653 != tcc_comparison
)))
3655 bb
= gimple_bb (stmt
);
3656 if (!single_succ_p (bb
))
3658 e
= single_succ_edge (bb
);
3659 if (e
->flags
& EDGE_COMPLEX
)
3662 lhs
= gimple_assign_lhs (stmt
);
3663 rhs
= gimple_assign_rhs1 (stmt
);
3664 if (gimple_assign_cast_p (stmt
)
3665 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3666 || TREE_CODE (rhs
) != SSA_NAME
3667 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
))
3670 if (!gimple_assign_cast_p (stmt
)
3671 && (TREE_CODE (TREE_TYPE (lhs
)) != BOOLEAN_TYPE
))
3674 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3675 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
3678 if (gimple_code (use_stmt
) != GIMPLE_PHI
3679 || gimple_bb (use_stmt
) != e
->dest
)
3682 /* And that the rhs is defined in the same loop. */
3683 if (gimple_assign_cast_p (stmt
))
3685 if (TREE_CODE (rhs
) != SSA_NAME
3686 || !(rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
)))
3687 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
3692 if (TREE_CODE (lhs
) != SSA_NAME
3693 || !(lhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
)))
3694 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), lhs_bb
))
3701 /* Return true if BB is suitable basic block for inter-bb range test
3702 optimization. If BACKWARD is true, BB should be the only predecessor
3703 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3704 or compared with to find a common basic block to which all conditions
3705 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3706 be the only predecessor of BB. */
3709 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
3712 edge_iterator ei
, ei2
;
3716 bool other_edge_seen
= false;
3721 /* Check last stmt first. */
3722 stmt
= last_stmt (bb
);
3724 || (gimple_code (stmt
) != GIMPLE_COND
3725 && (backward
|| !final_range_test_p (stmt
)))
3726 || gimple_visited_p (stmt
)
3727 || stmt_could_throw_p (cfun
, stmt
)
3730 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
3733 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3734 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3735 to *OTHER_BB (if not set yet, try to find it out). */
3736 if (EDGE_COUNT (bb
->succs
) != 2)
3738 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3740 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
3742 if (e
->dest
== test_bb
)
3751 if (*other_bb
== NULL
)
3753 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
3754 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
3756 else if (e
->dest
== e2
->dest
)
3757 *other_bb
= e
->dest
;
3758 if (*other_bb
== NULL
)
3761 if (e
->dest
== *other_bb
)
3762 other_edge_seen
= true;
3766 if (*other_bb
== NULL
|| !other_edge_seen
)
3769 else if (single_succ (bb
) != *other_bb
)
3772 /* Now check all PHIs of *OTHER_BB. */
3773 e
= find_edge (bb
, *other_bb
);
3774 e2
= find_edge (test_bb
, *other_bb
);
3775 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3777 gphi
*phi
= gsi
.phi ();
3778 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3779 corresponding to BB and TEST_BB predecessor must be the same. */
3780 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
3781 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
3783 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3784 one of the PHIs should have the lhs of the last stmt in
3785 that block as PHI arg and that PHI should have 0 or 1
3786 corresponding to it in all other range test basic blocks
3790 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
3791 == gimple_assign_lhs (stmt
)
3792 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
3793 || integer_onep (gimple_phi_arg_def (phi
,
3799 gimple
*test_last
= last_stmt (test_bb
);
3800 if (gimple_code (test_last
) != GIMPLE_COND
3801 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
3802 == gimple_assign_lhs (test_last
)
3803 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
3804 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
3814 /* Return true if BB doesn't have side-effects that would disallow
3815 range test optimization, all SSA_NAMEs set in the bb are consumed
3816 in the bb and there are no PHIs. */
3819 no_side_effect_bb (basic_block bb
)
3821 gimple_stmt_iterator gsi
;
3824 if (!gimple_seq_empty_p (phi_nodes (bb
)))
3826 last
= last_stmt (bb
);
3827 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3829 gimple
*stmt
= gsi_stmt (gsi
);
3831 imm_use_iterator imm_iter
;
3832 use_operand_p use_p
;
3834 if (is_gimple_debug (stmt
))
3836 if (gimple_has_side_effects (stmt
))
3840 if (!is_gimple_assign (stmt
))
3842 lhs
= gimple_assign_lhs (stmt
);
3843 if (TREE_CODE (lhs
) != SSA_NAME
)
3845 if (gimple_assign_rhs_could_trap_p (stmt
))
3847 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
3849 gimple
*use_stmt
= USE_STMT (use_p
);
3850 if (is_gimple_debug (use_stmt
))
3852 if (gimple_bb (use_stmt
) != bb
)
3859 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3860 return true and fill in *OPS recursively. */
3863 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
3866 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3870 if (!is_reassociable_op (stmt
, code
, loop
))
3873 rhs
[0] = gimple_assign_rhs1 (stmt
);
3874 rhs
[1] = gimple_assign_rhs2 (stmt
);
3875 gimple_set_visited (stmt
, true);
3876 for (i
= 0; i
< 2; i
++)
3877 if (TREE_CODE (rhs
[i
]) == SSA_NAME
3878 && !get_ops (rhs
[i
], code
, ops
, loop
)
3879 && has_single_use (rhs
[i
]))
3881 operand_entry
*oe
= operand_entry_pool
.allocate ();
3887 oe
->stmt_to_insert
= NULL
;
3888 ops
->safe_push (oe
);
3893 /* Find the ops that were added by get_ops starting from VAR, see if
3894 they were changed during update_range_test and if yes, create new
3898 update_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> ops
,
3899 unsigned int *pidx
, struct loop
*loop
)
3901 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3905 if (!is_reassociable_op (stmt
, code
, loop
))
3908 rhs
[0] = gimple_assign_rhs1 (stmt
);
3909 rhs
[1] = gimple_assign_rhs2 (stmt
);
3912 for (i
= 0; i
< 2; i
++)
3913 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
3915 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
3916 if (rhs
[2 + i
] == NULL_TREE
)
3918 if (has_single_use (rhs
[i
]))
3919 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
3921 rhs
[2 + i
] = rhs
[i
];
3924 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
3925 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
3927 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3928 var
= make_ssa_name (TREE_TYPE (var
));
3929 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
3931 gimple_set_uid (g
, gimple_uid (stmt
));
3932 gimple_set_visited (g
, true);
3933 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3938 /* Structure to track the initial value passed to get_ops and
3939 the range in the ops vector for each basic block. */
3941 struct inter_bb_range_test_entry
3944 unsigned int first_idx
, last_idx
;
3947 /* Inter-bb range test optimization.
3949 Returns TRUE if a gimple conditional is optimized to a true/false,
3950 otherwise return FALSE.
3952 This indicates to the caller that it should run a CFG cleanup pass
3953 once reassociation is completed. */
3956 maybe_optimize_range_tests (gimple
*stmt
)
3958 basic_block first_bb
= gimple_bb (stmt
);
3959 basic_block last_bb
= first_bb
;
3960 basic_block other_bb
= NULL
;
3964 auto_vec
<operand_entry
*> ops
;
3965 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3966 bool any_changes
= false;
3967 bool cfg_cleanup_needed
= false;
3969 /* Consider only basic blocks that end with GIMPLE_COND or
3970 a cast statement satisfying final_range_test_p. All
3971 but the last bb in the first_bb .. last_bb range
3972 should end with GIMPLE_COND. */
3973 if (gimple_code (stmt
) == GIMPLE_COND
)
3975 if (EDGE_COUNT (first_bb
->succs
) != 2)
3976 return cfg_cleanup_needed
;
3978 else if (final_range_test_p (stmt
))
3979 other_bb
= single_succ (first_bb
);
3981 return cfg_cleanup_needed
;
3983 if (stmt_could_throw_p (cfun
, stmt
))
3984 return cfg_cleanup_needed
;
3986 /* As relative ordering of post-dominator sons isn't fixed,
3987 maybe_optimize_range_tests can be called first on any
3988 bb in the range we want to optimize. So, start searching
3989 backwards, if first_bb can be set to a predecessor. */
3990 while (single_pred_p (first_bb
))
3992 basic_block pred_bb
= single_pred (first_bb
);
3993 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3995 if (!no_side_effect_bb (first_bb
))
3999 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4000 Before starting forward search in last_bb successors, find
4001 out the other_bb. */
4002 if (first_bb
== last_bb
)
4005 /* As non-GIMPLE_COND last stmt always terminates the range,
4006 if forward search didn't discover anything, just give up. */
4007 if (gimple_code (stmt
) != GIMPLE_COND
)
4008 return cfg_cleanup_needed
;
4009 /* Look at both successors. Either it ends with a GIMPLE_COND
4010 and satisfies suitable_cond_bb, or ends with a cast and
4011 other_bb is that cast's successor. */
4012 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
4013 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
4014 || e
->dest
== first_bb
)
4015 return cfg_cleanup_needed
;
4016 else if (single_pred_p (e
->dest
))
4018 stmt
= last_stmt (e
->dest
);
4020 && gimple_code (stmt
) == GIMPLE_COND
4021 && EDGE_COUNT (e
->dest
->succs
) == 2)
4023 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
4029 && final_range_test_p (stmt
)
4030 && find_edge (first_bb
, single_succ (e
->dest
)))
4032 other_bb
= single_succ (e
->dest
);
4033 if (other_bb
== first_bb
)
4037 if (other_bb
== NULL
)
4038 return cfg_cleanup_needed
;
4040 /* Now do the forward search, moving last_bb to successor bbs
4041 that aren't other_bb. */
4042 while (EDGE_COUNT (last_bb
->succs
) == 2)
4044 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
4045 if (e
->dest
!= other_bb
)
4049 if (!single_pred_p (e
->dest
))
4051 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
4053 if (!no_side_effect_bb (e
->dest
))
4057 if (first_bb
== last_bb
)
4058 return cfg_cleanup_needed
;
4059 /* Here basic blocks first_bb through last_bb's predecessor
4060 end with GIMPLE_COND, all of them have one of the edges to
4061 other_bb and another to another block in the range,
4062 all blocks except first_bb don't have side-effects and
4063 last_bb ends with either GIMPLE_COND, or cast satisfying
4064 final_range_test_p. */
4065 for (bb
= last_bb
; ; bb
= single_pred (bb
))
4067 enum tree_code code
;
4069 inter_bb_range_test_entry bb_ent
;
4071 bb_ent
.op
= NULL_TREE
;
4072 bb_ent
.first_idx
= ops
.length ();
4073 bb_ent
.last_idx
= bb_ent
.first_idx
;
4074 e
= find_edge (bb
, other_bb
);
4075 stmt
= last_stmt (bb
);
4076 gimple_set_visited (stmt
, true);
4077 if (gimple_code (stmt
) != GIMPLE_COND
)
4079 use_operand_p use_p
;
4084 lhs
= gimple_assign_lhs (stmt
);
4085 rhs
= gimple_assign_rhs1 (stmt
);
4086 gcc_assert (bb
== last_bb
);
4095 # _345 = PHI <_123(N), 1(...), 1(...)>
4097 or 0 instead of 1. If it is 0, the _234
4098 range test is anded together with all the
4099 other range tests, if it is 1, it is ored with
4101 single_imm_use (lhs
, &use_p
, &phi
);
4102 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
4103 e2
= find_edge (first_bb
, other_bb
);
4105 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
4106 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
4107 code
= BIT_AND_EXPR
;
4110 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
4111 code
= BIT_IOR_EXPR
;
4114 /* If _234 SSA_NAME_DEF_STMT is
4116 (or &, corresponding to 1/0 in the phi arguments,
4117 push into ops the individual range test arguments
4118 of the bitwise or resp. and, recursively. */
4119 if (TREE_CODE (rhs
) == SSA_NAME
4120 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4122 && !get_ops (rhs
, code
, &ops
,
4123 loop_containing_stmt (stmt
))
4124 && has_single_use (rhs
))
4126 /* Otherwise, push the _234 range test itself. */
4127 operand_entry
*oe
= operand_entry_pool
.allocate ();
4133 oe
->stmt_to_insert
= NULL
;
4138 else if (is_gimple_assign (stmt
)
4139 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4141 && !get_ops (lhs
, code
, &ops
,
4142 loop_containing_stmt (stmt
))
4143 && has_single_use (lhs
))
4145 operand_entry
*oe
= operand_entry_pool
.allocate ();
4156 bb_ent
.last_idx
= ops
.length ();
4159 bbinfo
.safe_push (bb_ent
);
4162 /* Otherwise stmt is GIMPLE_COND. */
4163 code
= gimple_cond_code (stmt
);
4164 lhs
= gimple_cond_lhs (stmt
);
4165 rhs
= gimple_cond_rhs (stmt
);
4166 if (TREE_CODE (lhs
) == SSA_NAME
4167 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4168 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4169 || rhs
!= boolean_false_node
4170 /* Either push into ops the individual bitwise
4171 or resp. and operands, depending on which
4172 edge is other_bb. */
4173 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
4174 ^ (code
== EQ_EXPR
))
4175 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
4176 loop_containing_stmt (stmt
))))
4178 /* Or push the GIMPLE_COND stmt itself. */
4179 operand_entry
*oe
= operand_entry_pool
.allocate ();
4182 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
4183 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
4184 /* oe->op = NULL signs that there is no SSA_NAME
4185 for the range test, and oe->id instead is the
4186 basic block number, at which's end the GIMPLE_COND
4190 oe
->stmt_to_insert
= NULL
;
4195 else if (ops
.length () > bb_ent
.first_idx
)
4198 bb_ent
.last_idx
= ops
.length ();
4200 bbinfo
.safe_push (bb_ent
);
4204 if (ops
.length () > 1)
4205 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
, first_bb
);
4208 unsigned int idx
, max_idx
= 0;
4209 /* update_ops relies on has_single_use predicates returning the
4210 same values as it did during get_ops earlier. Additionally it
4211 never removes statements, only adds new ones and it should walk
4212 from the single imm use and check the predicate already before
4213 making those changes.
4214 On the other side, the handling of GIMPLE_COND directly can turn
4215 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4216 it needs to be done in a separate loop afterwards. */
4217 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4219 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4220 && bbinfo
[idx
].op
!= NULL_TREE
)
4225 stmt
= last_stmt (bb
);
4226 new_op
= update_ops (bbinfo
[idx
].op
,
4228 ops
[bbinfo
[idx
].first_idx
]->rank
,
4229 ops
, &bbinfo
[idx
].first_idx
,
4230 loop_containing_stmt (stmt
));
4231 if (new_op
== NULL_TREE
)
4233 gcc_assert (bb
== last_bb
);
4234 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
4236 if (bbinfo
[idx
].op
!= new_op
)
4238 imm_use_iterator iter
;
4239 use_operand_p use_p
;
4240 gimple
*use_stmt
, *cast_or_tcc_cmp_stmt
= NULL
;
4242 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
4243 if (is_gimple_debug (use_stmt
))
4245 else if (gimple_code (use_stmt
) == GIMPLE_COND
4246 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4247 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4248 SET_USE (use_p
, new_op
);
4249 else if ((is_gimple_assign (use_stmt
)
4251 (gimple_assign_rhs_code (use_stmt
))
4252 == tcc_comparison
)))
4253 cast_or_tcc_cmp_stmt
= use_stmt
;
4254 else if (gimple_assign_cast_p (use_stmt
))
4255 cast_or_tcc_cmp_stmt
= use_stmt
;
4259 if (cast_or_tcc_cmp_stmt
)
4261 gcc_assert (bb
== last_bb
);
4262 tree lhs
= gimple_assign_lhs (cast_or_tcc_cmp_stmt
);
4263 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
4264 enum tree_code rhs_code
4265 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt
)
4266 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt
)
4269 if (is_gimple_min_invariant (new_op
))
4271 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
4272 g
= gimple_build_assign (new_lhs
, new_op
);
4275 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
4276 gimple_stmt_iterator gsi
4277 = gsi_for_stmt (cast_or_tcc_cmp_stmt
);
4278 gimple_set_uid (g
, gimple_uid (cast_or_tcc_cmp_stmt
));
4279 gimple_set_visited (g
, true);
4280 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4281 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
4282 if (is_gimple_debug (use_stmt
))
4284 else if (gimple_code (use_stmt
) == GIMPLE_COND
4285 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4286 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4287 SET_USE (use_p
, new_lhs
);
4296 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4298 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4299 && bbinfo
[idx
].op
== NULL_TREE
4300 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
4302 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
4307 /* If we collapse the conditional to a true/false
4308 condition, then bubble that knowledge up to our caller. */
4309 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
4311 gimple_cond_make_false (cond_stmt
);
4312 cfg_cleanup_needed
= true;
4314 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
4316 gimple_cond_make_true (cond_stmt
);
4317 cfg_cleanup_needed
= true;
4321 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
4322 gimple_cond_set_lhs (cond_stmt
,
4323 ops
[bbinfo
[idx
].first_idx
]->op
);
4324 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
4326 update_stmt (cond_stmt
);
4332 /* The above changes could result in basic blocks after the first
4333 modified one, up to and including last_bb, to be executed even if
4334 they would not be in the original program. If the value ranges of
4335 assignment lhs' in those bbs were dependent on the conditions
4336 guarding those basic blocks which now can change, the VRs might
4337 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4338 are only used within the same bb, it should be not a big deal if
4339 we just reset all the VRs in those bbs. See PR68671. */
4340 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
4341 reset_flow_sensitive_info_in_bb (bb
);
4343 return cfg_cleanup_needed
;
4346 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4347 of STMT in it's operands. This is also known as a "destructive
4348 update" operation. */
4351 is_phi_for_stmt (gimple
*stmt
, tree operand
)
4356 use_operand_p arg_p
;
4359 if (TREE_CODE (operand
) != SSA_NAME
)
4362 lhs
= gimple_assign_lhs (stmt
);
4364 def_stmt
= SSA_NAME_DEF_STMT (operand
);
4365 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
4369 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
4370 if (lhs
== USE_FROM_PTR (arg_p
))
4375 /* Remove def stmt of VAR if VAR has zero uses and recurse
4376 on rhs1 operand if so. */
4379 remove_visited_stmt_chain (tree var
)
4382 gimple_stmt_iterator gsi
;
4386 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
4388 stmt
= SSA_NAME_DEF_STMT (var
);
4389 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
4391 var
= gimple_assign_rhs1 (stmt
);
4392 gsi
= gsi_for_stmt (stmt
);
4393 reassoc_remove_stmt (&gsi
);
4394 release_defs (stmt
);
4401 /* This function checks three consequtive operands in
4402 passed operands vector OPS starting from OPINDEX and
4403 swaps two operands if it is profitable for binary operation
4404 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4406 We pair ops with the same rank if possible.
4408 The alternative we try is to see if STMT is a destructive
4409 update style statement, which is like:
4412 In that case, we want to use the destructive update form to
4413 expose the possible vectorizer sum reduction opportunity.
4414 In that case, the third operand will be the phi node. This
4415 check is not performed if STMT is null.
4417 We could, of course, try to be better as noted above, and do a
4418 lot of work to try to find these opportunities in >3 operand
4419 cases, but it is unlikely to be worth it. */
4422 swap_ops_for_binary_stmt (vec
<operand_entry
*> ops
,
4423 unsigned int opindex
, gimple
*stmt
)
4425 operand_entry
*oe1
, *oe2
, *oe3
;
4428 oe2
= ops
[opindex
+ 1];
4429 oe3
= ops
[opindex
+ 2];
4431 if ((oe1
->rank
== oe2
->rank
4432 && oe2
->rank
!= oe3
->rank
)
4433 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
4434 && !is_phi_for_stmt (stmt
, oe1
->op
)
4435 && !is_phi_for_stmt (stmt
, oe2
->op
)))
4436 std::swap (*oe1
, *oe3
);
4437 else if ((oe1
->rank
== oe3
->rank
4438 && oe2
->rank
!= oe3
->rank
)
4439 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
4440 && !is_phi_for_stmt (stmt
, oe1
->op
)
4441 && !is_phi_for_stmt (stmt
, oe3
->op
)))
4442 std::swap (*oe1
, *oe2
);
4445 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4446 two definitions, otherwise return STMT. */
4448 static inline gimple
*
4449 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
)
4451 if (TREE_CODE (rhs1
) == SSA_NAME
4452 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
4453 stmt
= SSA_NAME_DEF_STMT (rhs1
);
4454 if (TREE_CODE (rhs2
) == SSA_NAME
4455 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
4456 stmt
= SSA_NAME_DEF_STMT (rhs2
);
4460 /* If the stmt that defines operand has to be inserted, insert it
4463 insert_stmt_before_use (gimple
*stmt
, gimple
*stmt_to_insert
)
4465 gcc_assert (is_gimple_assign (stmt_to_insert
));
4466 tree rhs1
= gimple_assign_rhs1 (stmt_to_insert
);
4467 tree rhs2
= gimple_assign_rhs2 (stmt_to_insert
);
4468 gimple
*insert_point
= find_insert_point (stmt
, rhs1
, rhs2
);
4469 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_point
);
4470 gimple_set_uid (stmt_to_insert
, gimple_uid (insert_point
));
4472 /* If the insert point is not stmt, then insert_point would be
4473 the point where operand rhs1 or rhs2 is defined. In this case,
4474 stmt_to_insert has to be inserted afterwards. This would
4475 only happen when the stmt insertion point is flexible. */
4476 if (stmt
== insert_point
)
4477 gsi_insert_before (&gsi
, stmt_to_insert
, GSI_NEW_STMT
);
4479 insert_stmt_after (stmt_to_insert
, insert_point
);
4483 /* Recursively rewrite our linearized statements so that the operators
4484 match those in OPS[OPINDEX], putting the computation in rank
4485 order. Return new lhs.
4486 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4487 the current stmt and during recursive invocations.
4488 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4489 recursive invocations. */
4492 rewrite_expr_tree (gimple
*stmt
, unsigned int opindex
,
4493 vec
<operand_entry
*> ops
, bool changed
, bool next_changed
)
4495 tree rhs1
= gimple_assign_rhs1 (stmt
);
4496 tree rhs2
= gimple_assign_rhs2 (stmt
);
4497 tree lhs
= gimple_assign_lhs (stmt
);
4500 /* The final recursion case for this function is that you have
4501 exactly two operations left.
4502 If we had exactly one op in the entire list to start with, we
4503 would have never called this function, and the tail recursion
4504 rewrites them one at a time. */
4505 if (opindex
+ 2 == ops
.length ())
4507 operand_entry
*oe1
, *oe2
;
4510 oe2
= ops
[opindex
+ 1];
4512 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
4514 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4515 unsigned int uid
= gimple_uid (stmt
);
4517 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4519 fprintf (dump_file
, "Transforming ");
4520 print_gimple_stmt (dump_file
, stmt
, 0);
4523 /* If the stmt that defines operand has to be inserted, insert it
4525 if (oe1
->stmt_to_insert
)
4526 insert_stmt_before_use (stmt
, oe1
->stmt_to_insert
);
4527 if (oe2
->stmt_to_insert
)
4528 insert_stmt_before_use (stmt
, oe2
->stmt_to_insert
);
4529 /* Even when changed is false, reassociation could have e.g. removed
4530 some redundant operations, so unless we are just swapping the
4531 arguments or unless there is no change at all (then we just
4532 return lhs), force creation of a new SSA_NAME. */
4533 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
4535 gimple
*insert_point
4536 = find_insert_point (stmt
, oe1
->op
, oe2
->op
);
4537 lhs
= make_ssa_name (TREE_TYPE (lhs
));
4539 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
4541 gimple_set_uid (stmt
, uid
);
4542 gimple_set_visited (stmt
, true);
4543 if (insert_point
== gsi_stmt (gsi
))
4544 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
4546 insert_stmt_after (stmt
, insert_point
);
4550 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
4552 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
4553 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
4557 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
4558 remove_visited_stmt_chain (rhs1
);
4560 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4562 fprintf (dump_file
, " into ");
4563 print_gimple_stmt (dump_file
, stmt
, 0);
4569 /* If we hit here, we should have 3 or more ops left. */
4570 gcc_assert (opindex
+ 2 < ops
.length ());
4572 /* Rewrite the next operator. */
4575 /* If the stmt that defines operand has to be inserted, insert it
4577 if (oe
->stmt_to_insert
)
4578 insert_stmt_before_use (stmt
, oe
->stmt_to_insert
);
4580 /* Recurse on the LHS of the binary operator, which is guaranteed to
4581 be the non-leaf side. */
4583 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
4584 changed
|| oe
->op
!= rhs2
|| next_changed
,
4587 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
4589 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4591 fprintf (dump_file
, "Transforming ");
4592 print_gimple_stmt (dump_file
, stmt
, 0);
4595 /* If changed is false, this is either opindex == 0
4596 or all outer rhs2's were equal to corresponding oe->op,
4597 and powi_result is NULL.
4598 That means lhs is equivalent before and after reassociation.
4599 Otherwise ensure the old lhs SSA_NAME is not reused and
4600 create a new stmt as well, so that any debug stmts will be
4601 properly adjusted. */
4604 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4605 unsigned int uid
= gimple_uid (stmt
);
4606 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
4608 lhs
= make_ssa_name (TREE_TYPE (lhs
));
4609 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
4611 gimple_set_uid (stmt
, uid
);
4612 gimple_set_visited (stmt
, true);
4613 if (insert_point
== gsi_stmt (gsi
))
4614 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
4616 insert_stmt_after (stmt
, insert_point
);
4620 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
4622 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
4623 gimple_assign_set_rhs2 (stmt
, oe
->op
);
4627 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4629 fprintf (dump_file
, " into ");
4630 print_gimple_stmt (dump_file
, stmt
, 0);
4636 /* Find out how many cycles we need to compute statements chain.
4637 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4638 maximum number of independent statements we may execute per cycle. */
4641 get_required_cycles (int ops_num
, int cpu_width
)
4647 /* While we have more than 2 * cpu_width operands
4648 we may reduce number of operands by cpu_width
4650 res
= ops_num
/ (2 * cpu_width
);
4652 /* Remained operands count may be reduced twice per cycle
4653 until we have only one operand. */
4654 rest
= (unsigned)(ops_num
- res
* cpu_width
);
4655 elog
= exact_log2 (rest
);
4659 res
+= floor_log2 (rest
) + 1;
4664 /* Returns an optimal number of registers to use for computation of
4665 given statements. */
4668 get_reassociation_width (int ops_num
, enum tree_code opc
,
4671 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
4676 if (param_width
> 0)
4677 width
= param_width
;
4679 width
= targetm
.sched
.reassociation_width (opc
, mode
);
4684 /* Get the minimal time required for sequence computation. */
4685 cycles_best
= get_required_cycles (ops_num
, width
);
4687 /* Check if we may use less width and still compute sequence for
4688 the same time. It will allow us to reduce registers usage.
4689 get_required_cycles is monotonically increasing with lower width
4690 so we can perform a binary search for the minimal width that still
4691 results in the optimal cycle count. */
4693 while (width
> width_min
)
4695 int width_mid
= (width
+ width_min
) / 2;
4697 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
4699 else if (width_min
< width_mid
)
4700 width_min
= width_mid
;
4708 /* Recursively rewrite our linearized statements so that the operators
4709 match those in OPS[OPINDEX], putting the computation in rank
4710 order and trying to allow operations to be executed in
4714 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
4715 vec
<operand_entry
*> ops
)
4717 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
4718 int op_num
= ops
.length ();
4719 gcc_assert (op_num
> 0);
4720 int stmt_num
= op_num
- 1;
4721 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
4722 int op_index
= op_num
- 1;
4724 int ready_stmts_end
= 0;
4726 gimple
*stmt1
= NULL
, *stmt2
= NULL
;
4727 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
4729 /* We start expression rewriting from the top statements.
4730 So, in this loop we create a full list of statements
4731 we will work with. */
4732 stmts
[stmt_num
- 1] = stmt
;
4733 for (i
= stmt_num
- 2; i
>= 0; i
--)
4734 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
4736 for (i
= 0; i
< stmt_num
; i
++)
4740 /* Determine whether we should use results of
4741 already handled statements or not. */
4742 if (ready_stmts_end
== 0
4743 && (i
- stmt_index
>= width
|| op_index
< 1))
4744 ready_stmts_end
= i
;
4746 /* Now we choose operands for the next statement. Non zero
4747 value in ready_stmts_end means here that we should use
4748 the result of already generated statements as new operand. */
4749 if (ready_stmts_end
> 0)
4751 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
4752 if (ready_stmts_end
> stmt_index
)
4753 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
4754 else if (op_index
>= 0)
4756 operand_entry
*oe
= ops
[op_index
--];
4757 stmt2
= oe
->stmt_to_insert
;
4762 gcc_assert (stmt_index
< i
);
4763 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
4766 if (stmt_index
>= ready_stmts_end
)
4767 ready_stmts_end
= 0;
4772 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
4773 operand_entry
*oe2
= ops
[op_index
--];
4774 operand_entry
*oe1
= ops
[op_index
--];
4776 stmt2
= oe2
->stmt_to_insert
;
4778 stmt1
= oe1
->stmt_to_insert
;
4781 /* If we emit the last statement then we should put
4782 operands into the last statement. It will also
4784 if (op_index
< 0 && stmt_index
== i
)
4787 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4789 fprintf (dump_file
, "Transforming ");
4790 print_gimple_stmt (dump_file
, stmts
[i
], 0);
4793 /* If the stmt that defines operand has to be inserted, insert it
4796 insert_stmt_before_use (stmts
[i
], stmt1
);
4798 insert_stmt_before_use (stmts
[i
], stmt2
);
4799 stmt1
= stmt2
= NULL
;
4801 /* We keep original statement only for the last one. All
4802 others are recreated. */
4803 if (i
== stmt_num
- 1)
4805 gimple_assign_set_rhs1 (stmts
[i
], op1
);
4806 gimple_assign_set_rhs2 (stmts
[i
], op2
);
4807 update_stmt (stmts
[i
]);
4811 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
4812 gimple_set_visited (stmts
[i
], true);
4814 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4816 fprintf (dump_file
, " into ");
4817 print_gimple_stmt (dump_file
, stmts
[i
], 0);
4821 remove_visited_stmt_chain (last_rhs1
);
4824 /* Transform STMT, which is really (A +B) + (C + D) into the left
4825 linear form, ((A+B)+C)+D.
4826 Recurse on D if necessary. */
4829 linearize_expr (gimple
*stmt
)
4831 gimple_stmt_iterator gsi
;
4832 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
4833 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
4834 gimple
*oldbinrhs
= binrhs
;
4835 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4836 gimple
*newbinrhs
= NULL
;
4837 struct loop
*loop
= loop_containing_stmt (stmt
);
4838 tree lhs
= gimple_assign_lhs (stmt
);
4840 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
4841 && is_reassociable_op (binrhs
, rhscode
, loop
));
4843 gsi
= gsi_for_stmt (stmt
);
4845 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
4846 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
4847 gimple_assign_rhs_code (binrhs
),
4848 gimple_assign_lhs (binlhs
),
4849 gimple_assign_rhs2 (binrhs
));
4850 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
4851 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
4852 gimple_set_uid (binrhs
, gimple_uid (stmt
));
4854 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
4855 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
4857 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4859 fprintf (dump_file
, "Linearized: ");
4860 print_gimple_stmt (dump_file
, stmt
, 0);
4863 reassociate_stats
.linearized
++;
4866 gsi
= gsi_for_stmt (oldbinrhs
);
4867 reassoc_remove_stmt (&gsi
);
4868 release_defs (oldbinrhs
);
4870 gimple_set_visited (stmt
, true);
4871 gimple_set_visited (binlhs
, true);
4872 gimple_set_visited (binrhs
, true);
4874 /* Tail recurse on the new rhs if it still needs reassociation. */
4875 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
4876 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4877 want to change the algorithm while converting to tuples. */
4878 linearize_expr (stmt
);
4881 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4882 it. Otherwise, return NULL. */
4885 get_single_immediate_use (tree lhs
)
4887 use_operand_p immuse
;
4890 if (TREE_CODE (lhs
) == SSA_NAME
4891 && single_imm_use (lhs
, &immuse
, &immusestmt
)
4892 && is_gimple_assign (immusestmt
))
4898 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4899 representing the negated value. Insertions of any necessary
4900 instructions go before GSI.
4901 This function is recursive in that, if you hand it "a_5" as the
4902 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4903 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4906 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
4908 gimple
*negatedefstmt
= NULL
;
4909 tree resultofnegate
;
4910 gimple_stmt_iterator gsi
;
4913 /* If we are trying to negate a name, defined by an add, negate the
4914 add operands instead. */
4915 if (TREE_CODE (tonegate
) == SSA_NAME
)
4916 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
4917 if (TREE_CODE (tonegate
) == SSA_NAME
4918 && is_gimple_assign (negatedefstmt
)
4919 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
4920 && has_single_use (gimple_assign_lhs (negatedefstmt
))
4921 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
4923 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
4924 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
4925 tree lhs
= gimple_assign_lhs (negatedefstmt
);
4928 gsi
= gsi_for_stmt (negatedefstmt
);
4929 rhs1
= negate_value (rhs1
, &gsi
);
4931 gsi
= gsi_for_stmt (negatedefstmt
);
4932 rhs2
= negate_value (rhs2
, &gsi
);
4934 gsi
= gsi_for_stmt (negatedefstmt
);
4935 lhs
= make_ssa_name (TREE_TYPE (lhs
));
4936 gimple_set_visited (negatedefstmt
, true);
4937 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
4938 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
4939 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4943 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
4944 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
4945 NULL_TREE
, true, GSI_SAME_STMT
);
4947 uid
= gimple_uid (gsi_stmt (gsi
));
4948 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4950 gimple
*stmt
= gsi_stmt (gsi
);
4951 if (gimple_uid (stmt
) != 0)
4953 gimple_set_uid (stmt
, uid
);
4955 return resultofnegate
;
4958 /* Return true if we should break up the subtract in STMT into an add
4959 with negate. This is true when we the subtract operands are really
4960 adds, or the subtract itself is used in an add expression. In
4961 either case, breaking up the subtract into an add with negate
4962 exposes the adds to reassociation. */
4965 should_break_up_subtract (gimple
*stmt
)
4967 tree lhs
= gimple_assign_lhs (stmt
);
4968 tree binlhs
= gimple_assign_rhs1 (stmt
);
4969 tree binrhs
= gimple_assign_rhs2 (stmt
);
4971 struct loop
*loop
= loop_containing_stmt (stmt
);
4973 if (TREE_CODE (binlhs
) == SSA_NAME
4974 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
4977 if (TREE_CODE (binrhs
) == SSA_NAME
4978 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
4981 if (TREE_CODE (lhs
) == SSA_NAME
4982 && (immusestmt
= get_single_immediate_use (lhs
))
4983 && is_gimple_assign (immusestmt
)
4984 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
4985 || (gimple_assign_rhs_code (immusestmt
) == MINUS_EXPR
4986 && gimple_assign_rhs1 (immusestmt
) == lhs
)
4987 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
4992 /* Transform STMT from A - B into A + -B. */
4995 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
4997 tree rhs1
= gimple_assign_rhs1 (stmt
);
4998 tree rhs2
= gimple_assign_rhs2 (stmt
);
5000 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5002 fprintf (dump_file
, "Breaking up subtract ");
5003 print_gimple_stmt (dump_file
, stmt
, 0);
5006 rhs2
= negate_value (rhs2
, gsip
);
5007 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
5011 /* Determine whether STMT is a builtin call that raises an SSA name
5012 to an integer power and has only one use. If so, and this is early
5013 reassociation and unsafe math optimizations are permitted, place
5014 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5015 If any of these conditions does not hold, return FALSE. */
5018 acceptable_pow_call (gcall
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
5021 REAL_VALUE_TYPE c
, cint
;
5023 switch (gimple_call_combined_fn (stmt
))
5026 if (flag_errno_math
)
5029 *base
= gimple_call_arg (stmt
, 0);
5030 arg1
= gimple_call_arg (stmt
, 1);
5032 if (TREE_CODE (arg1
) != REAL_CST
)
5035 c
= TREE_REAL_CST (arg1
);
5037 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
5040 *exponent
= real_to_integer (&c
);
5041 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
5042 if (!real_identical (&c
, &cint
))
5048 *base
= gimple_call_arg (stmt
, 0);
5049 arg1
= gimple_call_arg (stmt
, 1);
5051 if (!tree_fits_shwi_p (arg1
))
5054 *exponent
= tree_to_shwi (arg1
);
5061 /* Expanding negative exponents is generally unproductive, so we don't
5062 complicate matters with those. Exponents of zero and one should
5063 have been handled by expression folding. */
5064 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
5070 /* Try to derive and add operand entry for OP to *OPS. Return false if
5074 try_special_add_to_ops (vec
<operand_entry
*> *ops
,
5075 enum tree_code code
,
5076 tree op
, gimple
* def_stmt
)
5078 tree base
= NULL_TREE
;
5079 HOST_WIDE_INT exponent
= 0;
5081 if (TREE_CODE (op
) != SSA_NAME
5082 || ! has_single_use (op
))
5085 if (code
== MULT_EXPR
5086 && reassoc_insert_powi_p
5087 && flag_unsafe_math_optimizations
5088 && is_gimple_call (def_stmt
)
5089 && acceptable_pow_call (as_a
<gcall
*> (def_stmt
), &base
, &exponent
))
5091 add_repeat_to_ops_vec (ops
, base
, exponent
);
5092 gimple_set_visited (def_stmt
, true);
5095 else if (code
== MULT_EXPR
5096 && is_gimple_assign (def_stmt
)
5097 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
5098 && !HONOR_SNANS (TREE_TYPE (op
))
5099 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op
))
5100 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op
))))
5102 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5103 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
5104 add_to_ops_vec (ops
, rhs1
);
5105 add_to_ops_vec (ops
, cst
);
5106 gimple_set_visited (def_stmt
, true);
5113 /* Recursively linearize a binary expression that is the RHS of STMT.
5114 Place the operands of the expression tree in the vector named OPS. */
5117 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
5118 bool is_associative
, bool set_visited
)
5120 tree binlhs
= gimple_assign_rhs1 (stmt
);
5121 tree binrhs
= gimple_assign_rhs2 (stmt
);
5122 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
5123 bool binlhsisreassoc
= false;
5124 bool binrhsisreassoc
= false;
5125 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5126 struct loop
*loop
= loop_containing_stmt (stmt
);
5129 gimple_set_visited (stmt
, true);
5131 if (TREE_CODE (binlhs
) == SSA_NAME
)
5133 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
5134 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
5135 && !stmt_could_throw_p (cfun
, binlhsdef
));
5138 if (TREE_CODE (binrhs
) == SSA_NAME
)
5140 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
5141 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
5142 && !stmt_could_throw_p (cfun
, binrhsdef
));
5145 /* If the LHS is not reassociable, but the RHS is, we need to swap
5146 them. If neither is reassociable, there is nothing we can do, so
5147 just put them in the ops vector. If the LHS is reassociable,
5148 linearize it. If both are reassociable, then linearize the RHS
5151 if (!binlhsisreassoc
)
5153 /* If this is not a associative operation like division, give up. */
5154 if (!is_associative
)
5156 add_to_ops_vec (ops
, binrhs
);
5160 if (!binrhsisreassoc
)
5162 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5163 add_to_ops_vec (ops
, binrhs
);
5165 if (!try_special_add_to_ops (ops
, rhscode
, binlhs
, binlhsdef
))
5166 add_to_ops_vec (ops
, binlhs
);
5171 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5173 fprintf (dump_file
, "swapping operands of ");
5174 print_gimple_stmt (dump_file
, stmt
, 0);
5177 swap_ssa_operands (stmt
,
5178 gimple_assign_rhs1_ptr (stmt
),
5179 gimple_assign_rhs2_ptr (stmt
));
5182 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5184 fprintf (dump_file
, " is now ");
5185 print_gimple_stmt (dump_file
, stmt
, 0);
5188 /* We want to make it so the lhs is always the reassociative op,
5190 std::swap (binlhs
, binrhs
);
5192 else if (binrhsisreassoc
)
5194 linearize_expr (stmt
);
5195 binlhs
= gimple_assign_rhs1 (stmt
);
5196 binrhs
= gimple_assign_rhs2 (stmt
);
5199 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
5200 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
5202 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
5203 is_associative
, set_visited
);
5205 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5206 add_to_ops_vec (ops
, binrhs
);
5209 /* Repropagate the negates back into subtracts, since no other pass
5210 currently does it. */
5213 repropagate_negates (void)
5218 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
5220 gimple
*user
= get_single_immediate_use (negate
);
5222 if (!user
|| !is_gimple_assign (user
))
5225 /* The negate operand can be either operand of a PLUS_EXPR
5226 (it can be the LHS if the RHS is a constant for example).
5228 Force the negate operand to the RHS of the PLUS_EXPR, then
5229 transform the PLUS_EXPR into a MINUS_EXPR. */
5230 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
5232 /* If the negated operand appears on the LHS of the
5233 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5234 to force the negated operand to the RHS of the PLUS_EXPR. */
5235 if (gimple_assign_rhs1 (user
) == negate
)
5237 swap_ssa_operands (user
,
5238 gimple_assign_rhs1_ptr (user
),
5239 gimple_assign_rhs2_ptr (user
));
5242 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5243 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5244 if (gimple_assign_rhs2 (user
) == negate
)
5246 tree rhs1
= gimple_assign_rhs1 (user
);
5247 tree rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
5248 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
5249 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
5253 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
5255 if (gimple_assign_rhs1 (user
) == negate
)
5260 which we transform into
5263 This pushes down the negate which we possibly can merge
5264 into some other operation, hence insert it into the
5265 plus_negates vector. */
5266 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
5267 tree a
= gimple_assign_rhs1 (feed
);
5268 tree b
= gimple_assign_rhs2 (user
);
5269 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
5270 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
5271 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
5272 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
5273 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
5274 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
5275 user
= gsi_stmt (gsi2
);
5277 reassoc_remove_stmt (&gsi
);
5278 release_defs (feed
);
5279 plus_negates
.safe_push (gimple_assign_lhs (user
));
5283 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5284 rid of one operation. */
5285 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
5286 tree a
= gimple_assign_rhs1 (feed
);
5287 tree rhs1
= gimple_assign_rhs1 (user
);
5288 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
5289 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
5290 update_stmt (gsi_stmt (gsi
));
5296 /* Returns true if OP is of a type for which we can do reassociation.
5297 That is for integral or non-saturating fixed-point types, and for
5298 floating point type when associative-math is enabled. */
5301 can_reassociate_p (tree op
)
5303 tree type
= TREE_TYPE (op
);
5304 if (TREE_CODE (op
) == SSA_NAME
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op
))
5306 if ((ANY_INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
5307 || NON_SAT_FIXED_POINT_TYPE_P (type
)
5308 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
5313 /* Break up subtract operations in block BB.
5315 We do this top down because we don't know whether the subtract is
5316 part of a possible chain of reassociation except at the top.
5325 we want to break up k = t - q, but we won't until we've transformed q
5326 = b - r, which won't be broken up until we transform b = c - d.
5328 En passant, clear the GIMPLE visited flag on every statement
5329 and set UIDs within each basic block. */
5332 break_up_subtract_bb (basic_block bb
)
5334 gimple_stmt_iterator gsi
;
5336 unsigned int uid
= 1;
5338 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5340 gimple
*stmt
= gsi_stmt (gsi
);
5341 gimple_set_visited (stmt
, false);
5342 gimple_set_uid (stmt
, uid
++);
5344 if (!is_gimple_assign (stmt
)
5345 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
5348 /* Look for simple gimple subtract operations. */
5349 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
5351 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
5352 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
5355 /* Check for a subtract used only in an addition. If this
5356 is the case, transform it into add of a negate for better
5357 reassociation. IE transform C = A-B into C = A + -B if C
5358 is only used in an addition. */
5359 if (should_break_up_subtract (stmt
))
5360 break_up_subtract (stmt
, &gsi
);
5362 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
5363 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
5364 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
5366 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
5368 son
= next_dom_son (CDI_DOMINATORS
, son
))
5369 break_up_subtract_bb (son
);
5372 /* Used for repeated factor analysis. */
5373 struct repeat_factor
5375 /* An SSA name that occurs in a multiply chain. */
5378 /* Cached rank of the factor. */
5381 /* Number of occurrences of the factor in the chain. */
5382 HOST_WIDE_INT count
;
5384 /* An SSA name representing the product of this factor and
5385 all factors appearing later in the repeated factor vector. */
5390 static vec
<repeat_factor
> repeat_factor_vec
;
5392 /* Used for sorting the repeat factor vector. Sort primarily by
5393 ascending occurrence count, secondarily by descending rank. */
5396 compare_repeat_factors (const void *x1
, const void *x2
)
5398 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
5399 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
5401 if (rf1
->count
!= rf2
->count
)
5402 return rf1
->count
- rf2
->count
;
5404 return rf2
->rank
- rf1
->rank
;
5407 /* Look for repeated operands in OPS in the multiply tree rooted at
5408 STMT. Replace them with an optimal sequence of multiplies and powi
5409 builtin calls, and remove the used operands from OPS. Return an
5410 SSA name representing the value of the replacement sequence. */
5413 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
5415 unsigned i
, j
, vec_len
;
5418 repeat_factor
*rf1
, *rf2
;
5419 repeat_factor rfnew
;
5420 tree result
= NULL_TREE
;
5421 tree target_ssa
, iter_result
;
5422 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
5423 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
5424 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5425 gimple
*mul_stmt
, *pow_stmt
;
5427 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5432 /* Allocate the repeated factor vector. */
5433 repeat_factor_vec
.create (10);
5435 /* Scan the OPS vector for all SSA names in the product and build
5436 up a vector of occurrence counts for each factor. */
5437 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
5439 if (TREE_CODE (oe
->op
) == SSA_NAME
)
5441 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5443 if (rf1
->factor
== oe
->op
)
5445 rf1
->count
+= oe
->count
;
5450 if (j
>= repeat_factor_vec
.length ())
5452 rfnew
.factor
= oe
->op
;
5453 rfnew
.rank
= oe
->rank
;
5454 rfnew
.count
= oe
->count
;
5455 rfnew
.repr
= NULL_TREE
;
5456 repeat_factor_vec
.safe_push (rfnew
);
5461 /* Sort the repeated factor vector by (a) increasing occurrence count,
5462 and (b) decreasing rank. */
5463 repeat_factor_vec
.qsort (compare_repeat_factors
);
5465 /* It is generally best to combine as many base factors as possible
5466 into a product before applying __builtin_powi to the result.
5467 However, the sort order chosen for the repeated factor vector
5468 allows us to cache partial results for the product of the base
5469 factors for subsequent use. When we already have a cached partial
5470 result from a previous iteration, it is best to make use of it
5471 before looking for another __builtin_pow opportunity.
5473 As an example, consider x * x * y * y * y * z * z * z * z.
5474 We want to first compose the product x * y * z, raise it to the
5475 second power, then multiply this by y * z, and finally multiply
5476 by z. This can be done in 5 multiplies provided we cache y * z
5477 for use in both expressions:
5485 If we instead ignored the cached y * z and first multiplied by
5486 the __builtin_pow opportunity z * z, we would get the inferior:
5495 vec_len
= repeat_factor_vec
.length ();
5497 /* Repeatedly look for opportunities to create a builtin_powi call. */
5500 HOST_WIDE_INT power
;
5502 /* First look for the largest cached product of factors from
5503 preceding iterations. If found, create a builtin_powi for
5504 it if the minimum occurrence count for its factors is at
5505 least 2, or just use this cached product as our next
5506 multiplicand if the minimum occurrence count is 1. */
5507 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5509 if (rf1
->repr
&& rf1
->count
> 0)
5519 iter_result
= rf1
->repr
;
5521 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5525 fputs ("Multiplying by cached product ", dump_file
);
5526 for (elt
= j
; elt
< vec_len
; elt
++)
5528 rf
= &repeat_factor_vec
[elt
];
5529 print_generic_expr (dump_file
, rf
->factor
);
5530 if (elt
< vec_len
- 1)
5531 fputs (" * ", dump_file
);
5533 fputs ("\n", dump_file
);
5538 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5539 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
5540 build_int_cst (integer_type_node
,
5542 gimple_call_set_lhs (pow_stmt
, iter_result
);
5543 gimple_set_location (pow_stmt
, gimple_location (stmt
));
5544 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
5545 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
5547 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5551 fputs ("Building __builtin_pow call for cached product (",
5553 for (elt
= j
; elt
< vec_len
; elt
++)
5555 rf
= &repeat_factor_vec
[elt
];
5556 print_generic_expr (dump_file
, rf
->factor
);
5557 if (elt
< vec_len
- 1)
5558 fputs (" * ", dump_file
);
5560 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
5567 /* Otherwise, find the first factor in the repeated factor
5568 vector whose occurrence count is at least 2. If no such
5569 factor exists, there are no builtin_powi opportunities
5571 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5573 if (rf1
->count
>= 2)
5582 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5586 fputs ("Building __builtin_pow call for (", dump_file
);
5587 for (elt
= j
; elt
< vec_len
; elt
++)
5589 rf
= &repeat_factor_vec
[elt
];
5590 print_generic_expr (dump_file
, rf
->factor
);
5591 if (elt
< vec_len
- 1)
5592 fputs (" * ", dump_file
);
5594 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
5597 reassociate_stats
.pows_created
++;
5599 /* Visit each element of the vector in reverse order (so that
5600 high-occurrence elements are visited first, and within the
5601 same occurrence count, lower-ranked elements are visited
5602 first). Form a linear product of all elements in this order
5603 whose occurrencce count is at least that of element J.
5604 Record the SSA name representing the product of each element
5605 with all subsequent elements in the vector. */
5606 if (j
== vec_len
- 1)
5607 rf1
->repr
= rf1
->factor
;
5610 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
5614 rf1
= &repeat_factor_vec
[ii
];
5615 rf2
= &repeat_factor_vec
[ii
+ 1];
5617 /* Init the last factor's representative to be itself. */
5619 rf2
->repr
= rf2
->factor
;
5624 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5625 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
5627 gimple_set_location (mul_stmt
, gimple_location (stmt
));
5628 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
5629 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
5630 rf1
->repr
= target_ssa
;
5632 /* Don't reprocess the multiply we just introduced. */
5633 gimple_set_visited (mul_stmt
, true);
5637 /* Form a call to __builtin_powi for the maximum product
5638 just formed, raised to the power obtained earlier. */
5639 rf1
= &repeat_factor_vec
[j
];
5640 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5641 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
5642 build_int_cst (integer_type_node
,
5644 gimple_call_set_lhs (pow_stmt
, iter_result
);
5645 gimple_set_location (pow_stmt
, gimple_location (stmt
));
5646 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
5647 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
5650 /* If we previously formed at least one other builtin_powi call,
5651 form the product of this one and those others. */
5654 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5655 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
5656 result
, iter_result
);
5657 gimple_set_location (mul_stmt
, gimple_location (stmt
));
5658 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
5659 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
5660 gimple_set_visited (mul_stmt
, true);
5661 result
= new_result
;
5664 result
= iter_result
;
5666 /* Decrement the occurrence count of each element in the product
5667 by the count found above, and remove this many copies of each
5669 for (i
= j
; i
< vec_len
; i
++)
5674 rf1
= &repeat_factor_vec
[i
];
5675 rf1
->count
-= power
;
5677 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
5679 if (oe
->op
== rf1
->factor
)
5683 ops
->ordered_remove (n
);
5699 /* At this point all elements in the repeated factor vector have a
5700 remaining occurrence count of 0 or 1, and those with a count of 1
5701 don't have cached representatives. Re-sort the ops vector and
5703 ops
->qsort (sort_by_operand_rank
);
5704 repeat_factor_vec
.release ();
5706 /* Return the final product computed herein. Note that there may
5707 still be some elements with single occurrence count left in OPS;
5708 those will be handled by the normal reassociation logic. */
5712 /* Attempt to optimize
5713 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5714 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5717 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
5721 unsigned int length
= ops
->length ();
5722 tree cst
= ops
->last ()->op
;
5724 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
5727 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
5729 if (TREE_CODE (oe
->op
) == SSA_NAME
5730 && has_single_use (oe
->op
))
5732 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
5733 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
5736 switch (gimple_call_combined_fn (old_call
))
5739 CASE_CFN_COPYSIGN_FN
:
5740 arg0
= gimple_call_arg (old_call
, 0);
5741 arg1
= gimple_call_arg (old_call
, 1);
5742 /* The first argument of copysign must be a constant,
5743 otherwise there's nothing to do. */
5744 if (TREE_CODE (arg0
) == REAL_CST
)
5746 tree type
= TREE_TYPE (arg0
);
5747 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
5748 /* If we couldn't fold to a single constant, skip it.
5749 That happens e.g. for inexact multiplication when
5751 if (mul
== NULL_TREE
)
5753 /* Instead of adjusting OLD_CALL, let's build a new
5754 call to not leak the LHS and prevent keeping bogus
5755 debug statements. DCE will clean up the old call. */
5757 if (gimple_call_internal_p (old_call
))
5758 new_call
= gimple_build_call_internal
5759 (IFN_COPYSIGN
, 2, mul
, arg1
);
5761 new_call
= gimple_build_call
5762 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
5763 tree lhs
= make_ssa_name (type
);
5764 gimple_call_set_lhs (new_call
, lhs
);
5765 gimple_set_location (new_call
,
5766 gimple_location (old_call
));
5767 insert_stmt_after (new_call
, old_call
);
5768 /* We've used the constant, get rid of it. */
5770 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
5771 /* Handle the CST1 < 0 case by negating the result. */
5774 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
5776 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
5777 insert_stmt_after (negate_stmt
, new_call
);
5782 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5784 fprintf (dump_file
, "Optimizing copysign: ");
5785 print_generic_expr (dump_file
, cst
);
5786 fprintf (dump_file
, " * COPYSIGN (");
5787 print_generic_expr (dump_file
, arg0
);
5788 fprintf (dump_file
, ", ");
5789 print_generic_expr (dump_file
, arg1
);
5790 fprintf (dump_file
, ") into %sCOPYSIGN (",
5791 cst1_neg
? "-" : "");
5792 print_generic_expr (dump_file
, mul
);
5793 fprintf (dump_file
, ", ");
5794 print_generic_expr (dump_file
, arg1
);
5795 fprintf (dump_file
, "\n");
5808 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5811 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
5815 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5817 fprintf (dump_file
, "Transforming ");
5818 print_gimple_stmt (dump_file
, stmt
, 0);
5821 rhs1
= gimple_assign_rhs1 (stmt
);
5822 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
5824 remove_visited_stmt_chain (rhs1
);
5826 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5828 fprintf (dump_file
, " into ");
5829 print_gimple_stmt (dump_file
, stmt
, 0);
5833 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5836 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
5837 tree rhs1
, tree rhs2
)
5839 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5841 fprintf (dump_file
, "Transforming ");
5842 print_gimple_stmt (dump_file
, stmt
, 0);
5845 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
5846 update_stmt (gsi_stmt (*gsi
));
5847 remove_visited_stmt_chain (rhs1
);
5849 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5851 fprintf (dump_file
, " into ");
5852 print_gimple_stmt (dump_file
, stmt
, 0);
5856 /* Reassociate expressions in basic block BB and its post-dominator as
5859 Bubble up return status from maybe_optimize_range_tests. */
5862 reassociate_bb (basic_block bb
)
5864 gimple_stmt_iterator gsi
;
5866 gimple
*stmt
= last_stmt (bb
);
5867 bool cfg_cleanup_needed
= false;
5869 if (stmt
&& !gimple_visited_p (stmt
))
5870 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
5872 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
5874 stmt
= gsi_stmt (gsi
);
5876 if (is_gimple_assign (stmt
)
5877 && !stmt_could_throw_p (cfun
, stmt
))
5879 tree lhs
, rhs1
, rhs2
;
5880 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
5882 /* If this is not a gimple binary expression, there is
5883 nothing for us to do with it. */
5884 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
5887 /* If this was part of an already processed statement,
5888 we don't need to touch it again. */
5889 if (gimple_visited_p (stmt
))
5891 /* This statement might have become dead because of previous
5893 if (has_zero_uses (gimple_get_lhs (stmt
)))
5895 reassoc_remove_stmt (&gsi
);
5896 release_defs (stmt
);
5897 /* We might end up removing the last stmt above which
5898 places the iterator to the end of the sequence.
5899 Reset it to the last stmt in this case which might
5900 be the end of the sequence as well if we removed
5901 the last statement of the sequence. In which case
5902 we need to bail out. */
5903 if (gsi_end_p (gsi
))
5905 gsi
= gsi_last_bb (bb
);
5906 if (gsi_end_p (gsi
))
5913 lhs
= gimple_assign_lhs (stmt
);
5914 rhs1
= gimple_assign_rhs1 (stmt
);
5915 rhs2
= gimple_assign_rhs2 (stmt
);
5917 /* For non-bit or min/max operations we can't associate
5918 all types. Verify that here. */
5919 if (rhs_code
!= BIT_IOR_EXPR
5920 && rhs_code
!= BIT_AND_EXPR
5921 && rhs_code
!= BIT_XOR_EXPR
5922 && rhs_code
!= MIN_EXPR
5923 && rhs_code
!= MAX_EXPR
5924 && (!can_reassociate_p (lhs
)
5925 || !can_reassociate_p (rhs1
)
5926 || !can_reassociate_p (rhs2
)))
5929 if (associative_tree_code (rhs_code
))
5931 auto_vec
<operand_entry
*> ops
;
5932 tree powi_result
= NULL_TREE
;
5933 bool is_vector
= VECTOR_TYPE_P (TREE_TYPE (lhs
));
5935 /* There may be no immediate uses left by the time we
5936 get here because we may have eliminated them all. */
5937 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
5940 gimple_set_visited (stmt
, true);
5941 linearize_expr_tree (&ops
, stmt
, true, true);
5942 ops
.qsort (sort_by_operand_rank
);
5943 int orig_len
= ops
.length ();
5944 optimize_ops_list (rhs_code
, &ops
);
5945 if (undistribute_ops_list (rhs_code
, &ops
,
5946 loop_containing_stmt (stmt
)))
5948 ops
.qsort (sort_by_operand_rank
);
5949 optimize_ops_list (rhs_code
, &ops
);
5952 if (rhs_code
== PLUS_EXPR
5953 && transform_add_to_multiply (&ops
))
5954 ops
.qsort (sort_by_operand_rank
);
5956 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
5959 optimize_vec_cond_expr (rhs_code
, &ops
);
5961 optimize_range_tests (rhs_code
, &ops
, NULL
);
5964 if (rhs_code
== MULT_EXPR
&& !is_vector
)
5966 attempt_builtin_copysign (&ops
);
5968 if (reassoc_insert_powi_p
5969 && flag_unsafe_math_optimizations
)
5970 powi_result
= attempt_builtin_powi (stmt
, &ops
);
5973 operand_entry
*last
;
5974 bool negate_result
= false;
5975 if (ops
.length () > 1
5976 && rhs_code
== MULT_EXPR
)
5979 if ((integer_minus_onep (last
->op
)
5980 || real_minus_onep (last
->op
))
5981 && !HONOR_SNANS (TREE_TYPE (lhs
))
5982 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs
))
5983 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs
))))
5986 negate_result
= true;
5991 /* If the operand vector is now empty, all operands were
5992 consumed by the __builtin_powi optimization. */
5993 if (ops
.length () == 0)
5994 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
5995 else if (ops
.length () == 1)
5997 tree last_op
= ops
.last ()->op
;
5999 /* If the stmt that defines operand has to be inserted, insert it
6001 if (ops
.last ()->stmt_to_insert
)
6002 insert_stmt_before_use (stmt
, ops
.last ()->stmt_to_insert
);
6004 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
6007 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
6011 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
6012 int ops_num
= ops
.length ();
6015 /* For binary bit operations, if there are at least 3
6016 operands and the last last operand in OPS is a constant,
6017 move it to the front. This helps ensure that we generate
6018 (X & Y) & C rather than (X & C) & Y. The former will
6019 often match a canonical bit test when we get to RTL. */
6020 if (ops
.length () > 2
6021 && (rhs_code
== BIT_AND_EXPR
6022 || rhs_code
== BIT_IOR_EXPR
6023 || rhs_code
== BIT_XOR_EXPR
)
6024 && TREE_CODE (ops
.last ()->op
) == INTEGER_CST
)
6025 std::swap (*ops
[0], *ops
[ops_num
- 1]);
6027 /* Only rewrite the expression tree to parallel in the
6028 last reassoc pass to avoid useless work back-and-forth
6029 with initial linearization. */
6030 if (!reassoc_insert_powi_p
6031 && ops
.length () > 3
6032 && (width
= get_reassociation_width (ops_num
, rhs_code
,
6035 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6037 "Width = %d was chosen for reassociation\n",
6039 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
6044 /* When there are three operands left, we want
6045 to make sure the ones that get the double
6046 binary op are chosen wisely. */
6047 int len
= ops
.length ();
6049 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
6051 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
6057 /* If we combined some repeated factors into a
6058 __builtin_powi call, multiply that result by the
6059 reassociated operands. */
6062 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
6063 tree type
= TREE_TYPE (lhs
);
6064 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
6066 gimple_set_lhs (lhs_stmt
, target_ssa
);
6067 update_stmt (lhs_stmt
);
6070 target_ssa
= new_lhs
;
6073 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
6074 powi_result
, target_ssa
);
6075 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6076 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6077 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
6083 stmt
= SSA_NAME_DEF_STMT (lhs
);
6084 tree tmp
= make_ssa_name (TREE_TYPE (lhs
));
6085 gimple_set_lhs (stmt
, tmp
);
6088 gassign
*neg_stmt
= gimple_build_assign (lhs
, NEGATE_EXPR
,
6090 gimple_set_uid (neg_stmt
, gimple_uid (stmt
));
6091 gsi_insert_after (&gsi
, neg_stmt
, GSI_NEW_STMT
);
6097 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
6099 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
6100 cfg_cleanup_needed
|= reassociate_bb (son
);
6102 return cfg_cleanup_needed
;
6105 /* Add jumps around shifts for range tests turned into bit tests.
6106 For each SSA_NAME VAR we have code like:
6107 VAR = ...; // final stmt of range comparison
6108 // bit test here...;
6109 OTHERVAR = ...; // final stmt of the bit test sequence
6110 RES = VAR | OTHERVAR;
6111 Turn the above into:
6118 // bit test here...;
6121 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6129 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
6131 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
6134 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
6136 && is_gimple_assign (use_stmt
)
6137 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
6138 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
6140 basic_block cond_bb
= gimple_bb (def_stmt
);
6141 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
6142 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
6144 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6145 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
6146 build_zero_cst (TREE_TYPE (var
)),
6147 NULL_TREE
, NULL_TREE
);
6148 location_t loc
= gimple_location (use_stmt
);
6149 gimple_set_location (g
, loc
);
6150 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
6152 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
6153 etrue
->probability
= profile_probability::even ();
6154 edge efalse
= find_edge (cond_bb
, then_bb
);
6155 efalse
->flags
= EDGE_FALSE_VALUE
;
6156 efalse
->probability
-= etrue
->probability
;
6157 then_bb
->count
-= etrue
->count ();
6159 tree othervar
= NULL_TREE
;
6160 if (gimple_assign_rhs1 (use_stmt
) == var
)
6161 othervar
= gimple_assign_rhs2 (use_stmt
);
6162 else if (gimple_assign_rhs2 (use_stmt
) == var
)
6163 othervar
= gimple_assign_rhs1 (use_stmt
);
6166 tree lhs
= gimple_assign_lhs (use_stmt
);
6167 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
6168 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
6169 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
6170 gsi
= gsi_for_stmt (use_stmt
);
6171 gsi_remove (&gsi
, true);
6173 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
6174 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
6176 reassoc_branch_fixups
.release ();
6179 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
6180 void debug_ops_vector (vec
<operand_entry
*> ops
);
6182 /* Dump the operand entry vector OPS to FILE. */
6185 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
6190 FOR_EACH_VEC_ELT (ops
, i
, oe
)
6192 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
6193 print_generic_expr (file
, oe
->op
);
6194 fprintf (file
, "\n");
6198 /* Dump the operand entry vector OPS to STDERR. */
6201 debug_ops_vector (vec
<operand_entry
*> ops
)
6203 dump_ops_vector (stderr
, ops
);
6206 /* Bubble up return status from reassociate_bb. */
6211 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
6212 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
6215 /* Initialize the reassociation pass. */
6222 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
6224 /* Find the loops, so that we can prevent moving calculations in
6226 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
6228 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
6230 next_operand_entry_id
= 0;
6232 /* Reverse RPO (Reverse Post Order) will give us something where
6233 deeper loops come later. */
6234 pre_and_rev_post_order_compute (NULL
, bbs
, false);
6235 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
6236 operand_rank
= new hash_map
<tree
, long>;
6238 /* Give each default definition a distinct rank. This includes
6239 parameters and the static chain. Walk backwards over all
6240 SSA names so that we get proper rank ordering according
6241 to tree_swap_operands_p. */
6242 for (i
= num_ssa_names
- 1; i
> 0; --i
)
6244 tree name
= ssa_name (i
);
6245 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
6246 insert_operand_rank (name
, ++rank
);
6249 /* Set up rank for each BB */
6250 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
6251 bb_rank
[bbs
[i
]] = ++rank
<< 16;
6254 calculate_dominance_info (CDI_POST_DOMINATORS
);
6255 plus_negates
= vNULL
;
6258 /* Cleanup after the reassociation pass, and print stats if
6264 statistics_counter_event (cfun
, "Linearized",
6265 reassociate_stats
.linearized
);
6266 statistics_counter_event (cfun
, "Constants eliminated",
6267 reassociate_stats
.constants_eliminated
);
6268 statistics_counter_event (cfun
, "Ops eliminated",
6269 reassociate_stats
.ops_eliminated
);
6270 statistics_counter_event (cfun
, "Statements rewritten",
6271 reassociate_stats
.rewritten
);
6272 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
6273 reassociate_stats
.pows_encountered
);
6274 statistics_counter_event (cfun
, "Built-in powi calls created",
6275 reassociate_stats
.pows_created
);
6277 delete operand_rank
;
6278 operand_entry_pool
.release ();
6280 plus_negates
.release ();
6281 free_dominance_info (CDI_POST_DOMINATORS
);
6282 loop_optimizer_finalize ();
6285 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6286 insertion of __builtin_powi calls.
6288 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6289 optimization of a gimple conditional. Otherwise returns zero. */
6292 execute_reassoc (bool insert_powi_p
)
6294 reassoc_insert_powi_p
= insert_powi_p
;
6298 bool cfg_cleanup_needed
;
6299 cfg_cleanup_needed
= do_reassoc ();
6300 repropagate_negates ();
6304 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
6309 const pass_data pass_data_reassoc
=
6311 GIMPLE_PASS
, /* type */
6312 "reassoc", /* name */
6313 OPTGROUP_NONE
, /* optinfo_flags */
6314 TV_TREE_REASSOC
, /* tv_id */
6315 ( PROP_cfg
| PROP_ssa
), /* properties_required */
6316 0, /* properties_provided */
6317 0, /* properties_destroyed */
6318 0, /* todo_flags_start */
6319 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
6322 class pass_reassoc
: public gimple_opt_pass
6325 pass_reassoc (gcc::context
*ctxt
)
6326 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
6329 /* opt_pass methods: */
6330 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
6331 void set_pass_param (unsigned int n
, bool param
)
6333 gcc_assert (n
== 0);
6334 insert_powi_p
= param
;
6336 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
6337 virtual unsigned int execute (function
*)
6338 { return execute_reassoc (insert_powi_p
); }
6341 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6342 point 3a in the pass header comment. */
6344 }; // class pass_reassoc
6349 make_pass_reassoc (gcc::context
*ctxt
)
6351 return new pass_reassoc (ctxt
);