1 /* Reassociation for trees.
2 Copyright (C) 2005-2018 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "gimple-fold.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
46 #include "tree-ssa-loop.h"
49 #include "langhooks.h"
54 #include "case-cfn-macros.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
70 3. Optimization of the operand lists, eliminating things like a +
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
107 You want to first merge all leaves of the same rank, as much as
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
161 newstmt = mergetmp + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p
;
185 int constants_eliminated
;
188 int pows_encountered
;
192 /* Operator, rank pair. */
199 gimple
*stmt_to_insert
;
202 static object_allocator
<operand_entry
> operand_entry_pool
203 ("operand entry pool");
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static unsigned int next_operand_entry_id
;
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
212 static long *bb_rank
;
214 /* Operand->rank hashtable. */
215 static hash_map
<tree
, long> *operand_rank
;
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec
<tree
> reassoc_branch_fixups
;
224 static long get_rank (tree
);
225 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
231 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
233 gimple
*stmt
= gsi_stmt (*gsi
);
235 if (!MAY_HAVE_DEBUG_BIND_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
236 return gsi_remove (gsi
, true);
238 gimple_stmt_iterator prev
= *gsi
;
240 unsigned uid
= gimple_uid (stmt
);
241 basic_block bb
= gimple_bb (stmt
);
242 bool ret
= gsi_remove (gsi
, true);
243 if (!gsi_end_p (prev
))
246 prev
= gsi_start_bb (bb
);
247 gimple
*end_stmt
= gsi_stmt (*gsi
);
248 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
250 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
251 gimple_set_uid (stmt
, uid
);
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
270 phi_rank (gimple
*stmt
)
272 basic_block bb
= gimple_bb (stmt
);
273 struct loop
*father
= bb
->loop_father
;
279 /* We only care about real loops (those with a latch). */
281 return bb_rank
[bb
->index
];
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb
!= father
->header
286 return bb_rank
[bb
->index
];
288 /* Ignore virtual SSA_NAMEs. */
289 res
= gimple_phi_result (stmt
);
290 if (virtual_operand_p (res
))
291 return bb_rank
[bb
->index
];
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res
, &use
, &use_stmt
)
296 || gimple_bb (use_stmt
)->loop_father
!= father
)
297 return bb_rank
[bb
->index
];
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
302 tree arg
= gimple_phi_arg_def (stmt
, i
);
303 if (TREE_CODE (arg
) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
306 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
307 if (gimple_bb (def_stmt
)->loop_father
== father
)
308 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
312 /* Must be an uninteresting phi. */
313 return bb_rank
[bb
->index
];
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
320 loop_carried_phi (tree exp
)
325 if (TREE_CODE (exp
) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp
))
329 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
331 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
339 if (phi_rank (phi_stmt
) != block_rank
)
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
350 propagate_rank (long rank
, tree op
)
354 if (loop_carried_phi (op
))
357 op_rank
= get_rank (op
);
359 return MAX (rank
, op_rank
);
362 /* Look up the operand rank structure for expression E. */
365 find_operand_rank (tree e
)
367 long *slot
= operand_rank
->get (e
);
368 return slot
? *slot
: -1;
371 /* Insert {E,RANK} into the operand rank hashtable. */
374 insert_operand_rank (tree e
, long rank
)
376 gcc_assert (rank
> 0);
377 gcc_assert (!operand_rank
->put (e
, rank
));
380 /* Given an expression E, return the rank of the expression. */
385 /* SSA_NAME's have the rank of the expression they are the result
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
422 if (TREE_CODE (e
) == SSA_NAME
)
429 if (SSA_NAME_IS_DEFAULT_DEF (e
))
430 return find_operand_rank (e
);
432 stmt
= SSA_NAME_DEF_STMT (e
);
433 if (gimple_code (stmt
) == GIMPLE_PHI
)
434 return phi_rank (stmt
);
436 if (!is_gimple_assign (stmt
))
437 return bb_rank
[gimple_bb (stmt
)->index
];
439 /* If we already have a rank for this expression, use that. */
440 rank
= find_operand_rank (e
);
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
451 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
452 rank
= propagate_rank (rank
, op
);
454 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
456 fprintf (dump_file
, "Rank for ");
457 print_generic_expr (dump_file
, e
);
458 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e
, (rank
+ 1));
466 /* Constants, globals, etc., are rank 0 */
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 4
474 #define FLOAT_ONE_CONST_TYPE 1 << 3
475 #define FLOAT_CONST_TYPE 1 << 2
476 #define OTHER_CONST_TYPE 1 << 1
478 /* Classify an invariant tree into integer, float, or other, so that
479 we can sort them to be near other constants of the same type. */
481 constant_type (tree t
)
483 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
484 return INTEGER_CONST_TYPE
;
485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
487 /* Sort -1.0 and 1.0 constants last, while in some cases
488 const_binop can't optimize some inexact operations, multiplication
489 by -1.0 or 1.0 can be always merged with others. */
490 if (real_onep (t
) || real_minus_onep (t
))
491 return FLOAT_ONE_CONST_TYPE
;
492 return FLOAT_CONST_TYPE
;
495 return OTHER_CONST_TYPE
;
498 /* qsort comparison function to sort operand entries PA and PB by rank
499 so that the sorted array is ordered by rank in decreasing order. */
501 sort_by_operand_rank (const void *pa
, const void *pb
)
503 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
504 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
506 if (oeb
->rank
!= oea
->rank
)
507 return oeb
->rank
> oea
->rank
? 1 : -1;
509 /* It's nicer for optimize_expression if constants that are likely
510 to fold when added/multiplied/whatever are put next to each
511 other. Since all constants have rank 0, order them by type. */
514 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
515 return constant_type (oea
->op
) - constant_type (oeb
->op
);
517 /* To make sorting result stable, we use unique IDs to determine
519 return oeb
->id
> oea
->id
? 1 : -1;
522 if (TREE_CODE (oea
->op
) != SSA_NAME
)
524 if (TREE_CODE (oeb
->op
) != SSA_NAME
)
525 return oeb
->id
> oea
->id
? 1 : -1;
529 else if (TREE_CODE (oeb
->op
) != SSA_NAME
)
532 /* Lastly, make sure the versions that are the same go next to each
534 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
536 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
537 versions of removed SSA_NAMEs, so if possible, prefer to sort
538 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
540 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
541 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
542 basic_block bba
= gimple_bb (stmta
);
543 basic_block bbb
= gimple_bb (stmtb
);
546 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
547 but the other might not. */
552 /* If neither is, compare bb_rank. */
553 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
554 return (bb_rank
[bbb
->index
] >> 16) - (bb_rank
[bba
->index
] >> 16);
557 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
558 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
562 return SSA_NAME_VERSION (oeb
->op
) > SSA_NAME_VERSION (oea
->op
) ? 1 : -1;
565 return oeb
->id
> oea
->id
? 1 : -1;
568 /* Add an operand entry to *OPS for the tree operand OP. */
571 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
, gimple
*stmt_to_insert
= NULL
)
573 operand_entry
*oe
= operand_entry_pool
.allocate ();
576 oe
->rank
= get_rank (op
);
577 oe
->id
= next_operand_entry_id
++;
579 oe
->stmt_to_insert
= stmt_to_insert
;
583 /* Add an operand entry to *OPS for the tree operand OP with repeat
587 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
588 HOST_WIDE_INT repeat
)
590 operand_entry
*oe
= operand_entry_pool
.allocate ();
593 oe
->rank
= get_rank (op
);
594 oe
->id
= next_operand_entry_id
++;
596 oe
->stmt_to_insert
= NULL
;
599 reassociate_stats
.pows_encountered
++;
602 /* Return true if STMT is reassociable operation containing a binary
603 operation with tree code CODE, and is inside LOOP. */
606 is_reassociable_op (gimple
*stmt
, enum tree_code code
, struct loop
*loop
)
608 basic_block bb
= gimple_bb (stmt
);
610 if (gimple_bb (stmt
) == NULL
)
613 if (!flow_bb_inside_loop_p (loop
, bb
))
616 if (is_gimple_assign (stmt
)
617 && gimple_assign_rhs_code (stmt
) == code
618 && has_single_use (gimple_assign_lhs (stmt
)))
620 tree rhs1
= gimple_assign_rhs1 (stmt
);
621 tree rhs2
= gimple_assign_rhs1 (stmt
);
622 if (TREE_CODE (rhs1
) == SSA_NAME
623 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1
))
626 && TREE_CODE (rhs2
) == SSA_NAME
627 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2
))
636 /* Return true if STMT is a nop-conversion. */
639 gimple_nop_conversion_p (gimple
*stmt
)
641 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
643 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
644 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
645 TREE_TYPE (gimple_assign_rhs1 (ass
))))
651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
652 operand of the negate operation. Otherwise, return NULL. */
655 get_unary_op (tree name
, enum tree_code opcode
)
657 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
659 /* Look through nop conversions (sign changes). */
660 if (gimple_nop_conversion_p (stmt
)
661 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
662 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
664 if (!is_gimple_assign (stmt
))
667 if (gimple_assign_rhs_code (stmt
) == opcode
)
668 return gimple_assign_rhs1 (stmt
);
672 /* Return true if OP1 and OP2 have the same value if casted to either type. */
675 ops_equal_values_p (tree op1
, tree op2
)
681 if (TREE_CODE (op1
) == SSA_NAME
)
683 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
684 if (gimple_nop_conversion_p (stmt
))
686 op1
= gimple_assign_rhs1 (stmt
);
692 if (TREE_CODE (op2
) == SSA_NAME
)
694 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
695 if (gimple_nop_conversion_p (stmt
))
697 op2
= gimple_assign_rhs1 (stmt
);
708 /* If CURR and LAST are a pair of ops that OPCODE allows us to
709 eliminate through equivalences, do so, remove them from OPS, and
710 return true. Otherwise, return false. */
713 eliminate_duplicate_pair (enum tree_code opcode
,
714 vec
<operand_entry
*> *ops
,
721 /* If we have two of the same op, and the opcode is & |, min, or max,
722 we can eliminate one of them.
723 If we have two of the same op, and the opcode is ^, we can
724 eliminate both of them. */
726 if (last
&& last
->op
== curr
->op
)
734 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
736 fprintf (dump_file
, "Equivalence: ");
737 print_generic_expr (dump_file
, curr
->op
);
738 fprintf (dump_file
, " [&|minmax] ");
739 print_generic_expr (dump_file
, last
->op
);
740 fprintf (dump_file
, " -> ");
741 print_generic_stmt (dump_file
, last
->op
);
744 ops
->ordered_remove (i
);
745 reassociate_stats
.ops_eliminated
++;
750 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
752 fprintf (dump_file
, "Equivalence: ");
753 print_generic_expr (dump_file
, curr
->op
);
754 fprintf (dump_file
, " ^ ");
755 print_generic_expr (dump_file
, last
->op
);
756 fprintf (dump_file
, " -> nothing\n");
759 reassociate_stats
.ops_eliminated
+= 2;
761 if (ops
->length () == 2)
764 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
769 ops
->ordered_remove (i
-1);
770 ops
->ordered_remove (i
-1);
782 static vec
<tree
> plus_negates
;
784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
785 expression, look in OPS for a corresponding positive operation to cancel
786 it out. If we find one, remove the other from OPS, replace
787 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
791 eliminate_plus_minus_pair (enum tree_code opcode
,
792 vec
<operand_entry
*> *ops
,
793 unsigned int currindex
,
801 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
804 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
805 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
806 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
809 /* Any non-negated version will have a rank that is one less than
810 the current rank. So once we hit those ranks, if we don't find
813 for (i
= currindex
+ 1;
814 ops
->iterate (i
, &oe
)
815 && oe
->rank
>= curr
->rank
- 1 ;
819 && ops_equal_values_p (oe
->op
, negateop
))
821 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
823 fprintf (dump_file
, "Equivalence: ");
824 print_generic_expr (dump_file
, negateop
);
825 fprintf (dump_file
, " + -");
826 print_generic_expr (dump_file
, oe
->op
);
827 fprintf (dump_file
, " -> 0\n");
830 ops
->ordered_remove (i
);
831 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
832 ops
->ordered_remove (currindex
);
833 reassociate_stats
.ops_eliminated
++;
838 && ops_equal_values_p (oe
->op
, notop
))
840 tree op_type
= TREE_TYPE (oe
->op
);
842 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
844 fprintf (dump_file
, "Equivalence: ");
845 print_generic_expr (dump_file
, notop
);
846 fprintf (dump_file
, " + ~");
847 print_generic_expr (dump_file
, oe
->op
);
848 fprintf (dump_file
, " -> -1\n");
851 ops
->ordered_remove (i
);
852 add_to_ops_vec (ops
, build_all_ones_cst (op_type
));
853 ops
->ordered_remove (currindex
);
854 reassociate_stats
.ops_eliminated
++;
860 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
861 save it for later inspection in repropagate_negates(). */
862 if (negateop
!= NULL_TREE
863 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
864 plus_negates
.safe_push (curr
->op
);
869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
870 bitwise not expression, look in OPS for a corresponding operand to
871 cancel it out. If we find one, remove the other from OPS, replace
872 OPS[CURRINDEX] with 0, and return true. Otherwise, return
876 eliminate_not_pairs (enum tree_code opcode
,
877 vec
<operand_entry
*> *ops
,
878 unsigned int currindex
,
885 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
886 || TREE_CODE (curr
->op
) != SSA_NAME
)
889 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
890 if (notop
== NULL_TREE
)
893 /* Any non-not version will have a rank that is one less than
894 the current rank. So once we hit those ranks, if we don't find
897 for (i
= currindex
+ 1;
898 ops
->iterate (i
, &oe
)
899 && oe
->rank
>= curr
->rank
- 1;
904 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
906 fprintf (dump_file
, "Equivalence: ");
907 print_generic_expr (dump_file
, notop
);
908 if (opcode
== BIT_AND_EXPR
)
909 fprintf (dump_file
, " & ~");
910 else if (opcode
== BIT_IOR_EXPR
)
911 fprintf (dump_file
, " | ~");
912 print_generic_expr (dump_file
, oe
->op
);
913 if (opcode
== BIT_AND_EXPR
)
914 fprintf (dump_file
, " -> 0\n");
915 else if (opcode
== BIT_IOR_EXPR
)
916 fprintf (dump_file
, " -> -1\n");
919 if (opcode
== BIT_AND_EXPR
)
920 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
921 else if (opcode
== BIT_IOR_EXPR
)
922 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
924 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
926 ops
->quick_push (oe
);
934 /* Use constant value that may be present in OPS to try to eliminate
935 operands. Note that this function is only really used when we've
936 eliminated ops for other reasons, or merged constants. Across
937 single statements, fold already does all of this, plus more. There
938 is little point in duplicating logic, so I've only included the
939 identities that I could ever construct testcases to trigger. */
942 eliminate_using_constants (enum tree_code opcode
,
943 vec
<operand_entry
*> *ops
)
945 operand_entry
*oelast
= ops
->last ();
946 tree type
= TREE_TYPE (oelast
->op
);
948 if (oelast
->rank
== 0
949 && (ANY_INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
954 if (integer_zerop (oelast
->op
))
956 if (ops
->length () != 1)
958 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
959 fprintf (dump_file
, "Found & 0, removing all other ops\n");
961 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
964 ops
->quick_push (oelast
);
968 else if (integer_all_onesp (oelast
->op
))
970 if (ops
->length () != 1)
972 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
973 fprintf (dump_file
, "Found & -1, removing\n");
975 reassociate_stats
.ops_eliminated
++;
980 if (integer_all_onesp (oelast
->op
))
982 if (ops
->length () != 1)
984 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
985 fprintf (dump_file
, "Found | -1, removing all other ops\n");
987 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
990 ops
->quick_push (oelast
);
994 else if (integer_zerop (oelast
->op
))
996 if (ops
->length () != 1)
998 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
999 fprintf (dump_file
, "Found | 0, removing\n");
1001 reassociate_stats
.ops_eliminated
++;
1006 if (integer_zerop (oelast
->op
)
1007 || (FLOAT_TYPE_P (type
)
1008 && !HONOR_NANS (type
)
1009 && !HONOR_SIGNED_ZEROS (type
)
1010 && real_zerop (oelast
->op
)))
1012 if (ops
->length () != 1)
1014 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1015 fprintf (dump_file
, "Found * 0, removing all other ops\n");
1017 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1019 ops
->quick_push (oelast
);
1023 else if (integer_onep (oelast
->op
)
1024 || (FLOAT_TYPE_P (type
)
1025 && !HONOR_SNANS (type
)
1026 && real_onep (oelast
->op
)))
1028 if (ops
->length () != 1)
1030 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1031 fprintf (dump_file
, "Found * 1, removing\n");
1033 reassociate_stats
.ops_eliminated
++;
1041 if (integer_zerop (oelast
->op
)
1042 || (FLOAT_TYPE_P (type
)
1043 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1044 && fold_real_zero_addition_p (type
, oelast
->op
,
1045 opcode
== MINUS_EXPR
)))
1047 if (ops
->length () != 1)
1049 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1050 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1052 reassociate_stats
.ops_eliminated
++;
1064 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1067 /* Structure for tracking and counting operands. */
1071 enum tree_code oecode
;
1076 /* The heap for the oecount hashtable and the sorted list of operands. */
1077 static vec
<oecount
> cvec
;
1080 /* Oecount hashtable helpers. */
1082 struct oecount_hasher
: int_hash
<int, 0, 1>
1084 static inline hashval_t
hash (int);
1085 static inline bool equal (int, int);
1088 /* Hash function for oecount. */
1091 oecount_hasher::hash (int p
)
1093 const oecount
*c
= &cvec
[p
- 42];
1094 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1097 /* Comparison function for oecount. */
1100 oecount_hasher::equal (int p1
, int p2
)
1102 const oecount
*c1
= &cvec
[p1
- 42];
1103 const oecount
*c2
= &cvec
[p2
- 42];
1104 return c1
->oecode
== c2
->oecode
&& c1
->op
== c2
->op
;
1107 /* Comparison function for qsort sorting oecount elements by count. */
1110 oecount_cmp (const void *p1
, const void *p2
)
1112 const oecount
*c1
= (const oecount
*)p1
;
1113 const oecount
*c2
= (const oecount
*)p2
;
1114 if (c1
->cnt
!= c2
->cnt
)
1115 return c1
->cnt
> c2
->cnt
? 1 : -1;
1117 /* If counts are identical, use unique IDs to stabilize qsort. */
1118 return c1
->id
> c2
->id
? 1 : -1;
1121 /* Return TRUE iff STMT represents a builtin call that raises OP
1122 to some exponent. */
1125 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1127 if (!is_gimple_call (stmt
))
1130 switch (gimple_call_combined_fn (stmt
))
1134 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1142 in place and return the result. Assumes that stmt_is_power_of_op
1143 was previously called for STMT and returned TRUE. */
1145 static HOST_WIDE_INT
1146 decrement_power (gimple
*stmt
)
1148 REAL_VALUE_TYPE c
, cint
;
1149 HOST_WIDE_INT power
;
1152 switch (gimple_call_combined_fn (stmt
))
1155 arg1
= gimple_call_arg (stmt
, 1);
1156 c
= TREE_REAL_CST (arg1
);
1157 power
= real_to_integer (&c
) - 1;
1158 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1159 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1163 arg1
= gimple_call_arg (stmt
, 1);
1164 power
= TREE_INT_CST_LOW (arg1
) - 1;
1165 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1173 /* Replace SSA defined by STMT and replace all its uses with new
1174 SSA. Also return the new SSA. */
1177 make_new_ssa_for_def (gimple
*stmt
, enum tree_code opcode
, tree op
)
1181 imm_use_iterator iter
;
1182 tree new_lhs
, new_debug_lhs
= NULL_TREE
;
1183 tree lhs
= gimple_get_lhs (stmt
);
1185 new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
1186 gimple_set_lhs (stmt
, new_lhs
);
1188 /* Also need to update GIMPLE_DEBUGs. */
1189 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
1191 tree repl
= new_lhs
;
1192 if (is_gimple_debug (use_stmt
))
1194 if (new_debug_lhs
== NULL_TREE
)
1196 new_debug_lhs
= make_node (DEBUG_EXPR_DECL
);
1198 = gimple_build_debug_bind (new_debug_lhs
,
1199 build2 (opcode
, TREE_TYPE (lhs
),
1202 DECL_ARTIFICIAL (new_debug_lhs
) = 1;
1203 TREE_TYPE (new_debug_lhs
) = TREE_TYPE (lhs
);
1204 SET_DECL_MODE (new_debug_lhs
, TYPE_MODE (TREE_TYPE (lhs
)));
1205 gimple_set_uid (def_temp
, gimple_uid (stmt
));
1206 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1207 gsi_insert_after (&gsi
, def_temp
, GSI_SAME_STMT
);
1209 repl
= new_debug_lhs
;
1211 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1212 SET_USE (use
, repl
);
1213 update_stmt (use_stmt
);
1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1219 uses with new SSAs. Also do this for the stmt that defines DEF
1220 if *DEF is not OP. */
1223 make_new_ssa_for_all_defs (tree
*def
, enum tree_code opcode
, tree op
,
1224 vec
<gimple
*> &stmts_to_fix
)
1230 && TREE_CODE (*def
) == SSA_NAME
1231 && (stmt
= SSA_NAME_DEF_STMT (*def
))
1232 && gimple_code (stmt
) != GIMPLE_NOP
)
1233 *def
= make_new_ssa_for_def (stmt
, opcode
, op
);
1235 FOR_EACH_VEC_ELT (stmts_to_fix
, i
, stmt
)
1236 make_new_ssa_for_def (stmt
, opcode
, op
);
1239 /* Find the single immediate use of STMT's LHS, and replace it
1240 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1241 replace *DEF with OP as well. */
1244 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1249 gimple_stmt_iterator gsi
;
1251 if (is_gimple_call (stmt
))
1252 lhs
= gimple_call_lhs (stmt
);
1254 lhs
= gimple_assign_lhs (stmt
);
1256 gcc_assert (has_single_use (lhs
));
1257 single_imm_use (lhs
, &use
, &use_stmt
);
1261 if (TREE_CODE (op
) != SSA_NAME
)
1262 update_stmt (use_stmt
);
1263 gsi
= gsi_for_stmt (stmt
);
1264 unlink_stmt_vdef (stmt
);
1265 reassoc_remove_stmt (&gsi
);
1266 release_defs (stmt
);
1269 /* Walks the linear chain with result *DEF searching for an operation
1270 with operand OP and code OPCODE removing that from the chain. *DEF
1271 is updated if there is only one operand but no operation left. */
1274 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1276 tree orig_def
= *def
;
1277 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1278 /* PR72835 - Record the stmt chain that has to be updated such that
1279 we dont use the same LHS when the values computed are different. */
1280 auto_vec
<gimple
*, 64> stmts_to_fix
;
1286 if (opcode
== MULT_EXPR
)
1288 if (stmt_is_power_of_op (stmt
, op
))
1290 if (decrement_power (stmt
) == 1)
1292 if (stmts_to_fix
.length () > 0)
1293 stmts_to_fix
.pop ();
1294 propagate_op_to_single_use (op
, stmt
, def
);
1298 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
)
1300 if (gimple_assign_rhs1 (stmt
) == op
)
1302 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1303 if (stmts_to_fix
.length () > 0)
1304 stmts_to_fix
.pop ();
1305 propagate_op_to_single_use (cst
, stmt
, def
);
1308 else if (integer_minus_onep (op
)
1309 || real_minus_onep (op
))
1311 gimple_assign_set_rhs_code
1312 (stmt
, TREE_CODE (gimple_assign_rhs1 (stmt
)));
1318 name
= gimple_assign_rhs1 (stmt
);
1320 /* If this is the operation we look for and one of the operands
1321 is ours simply propagate the other operand into the stmts
1323 if (gimple_assign_rhs_code (stmt
) == opcode
1325 || gimple_assign_rhs2 (stmt
) == op
))
1328 name
= gimple_assign_rhs2 (stmt
);
1329 if (stmts_to_fix
.length () > 0)
1330 stmts_to_fix
.pop ();
1331 propagate_op_to_single_use (name
, stmt
, def
);
1335 /* We might have a multiply of two __builtin_pow* calls, and
1336 the operand might be hiding in the rightmost one. Likewise
1337 this can happen for a negate. */
1338 if (opcode
== MULT_EXPR
1339 && gimple_assign_rhs_code (stmt
) == opcode
1340 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1341 && has_single_use (gimple_assign_rhs2 (stmt
)))
1343 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1344 if (stmt_is_power_of_op (stmt2
, op
))
1346 if (decrement_power (stmt2
) == 1)
1347 propagate_op_to_single_use (op
, stmt2
, def
);
1349 stmts_to_fix
.safe_push (stmt2
);
1352 else if (is_gimple_assign (stmt2
)
1353 && gimple_assign_rhs_code (stmt2
) == NEGATE_EXPR
)
1355 if (gimple_assign_rhs1 (stmt2
) == op
)
1357 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1358 propagate_op_to_single_use (cst
, stmt2
, def
);
1361 else if (integer_minus_onep (op
)
1362 || real_minus_onep (op
))
1364 stmts_to_fix
.safe_push (stmt2
);
1365 gimple_assign_set_rhs_code
1366 (stmt2
, TREE_CODE (gimple_assign_rhs1 (stmt2
)));
1372 /* Continue walking the chain. */
1373 gcc_assert (name
!= op
1374 && TREE_CODE (name
) == SSA_NAME
);
1375 stmt
= SSA_NAME_DEF_STMT (name
);
1376 stmts_to_fix
.safe_push (stmt
);
1380 if (stmts_to_fix
.length () > 0 || *def
== orig_def
)
1381 make_new_ssa_for_all_defs (def
, opcode
, op
, stmts_to_fix
);
1384 /* Returns true if statement S1 dominates statement S2. Like
1385 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1388 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1390 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1392 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1393 SSA_NAME. Assume it lives at the beginning of function and
1394 thus dominates everything. */
1395 if (!bb1
|| s1
== s2
)
1398 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1404 /* PHIs in the same basic block are assumed to be
1405 executed all in parallel, if only one stmt is a PHI,
1406 it dominates the other stmt in the same basic block. */
1407 if (gimple_code (s1
) == GIMPLE_PHI
)
1410 if (gimple_code (s2
) == GIMPLE_PHI
)
1413 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1415 if (gimple_uid (s1
) < gimple_uid (s2
))
1418 if (gimple_uid (s1
) > gimple_uid (s2
))
1421 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1422 unsigned int uid
= gimple_uid (s1
);
1423 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1425 gimple
*s
= gsi_stmt (gsi
);
1426 if (gimple_uid (s
) != uid
)
1435 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1438 /* Insert STMT after INSERT_POINT. */
1441 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1443 gimple_stmt_iterator gsi
;
1446 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1447 bb
= gimple_bb (insert_point
);
1448 else if (!stmt_ends_bb_p (insert_point
))
1450 gsi
= gsi_for_stmt (insert_point
);
1451 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1452 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1456 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1457 thus if it must end a basic block, it should be a call that can
1458 throw, or some assignment that can throw. If it throws, the LHS
1459 of it will not be initialized though, so only valid places using
1460 the SSA_NAME should be dominated by the fallthru edge. */
1461 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1462 gsi
= gsi_after_labels (bb
);
1463 if (gsi_end_p (gsi
))
1465 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1466 gimple_set_uid (stmt
,
1467 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1470 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1471 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1475 the result. Places the statement after the definition of either
1476 OP1 or OP2. Returns the new statement. */
1479 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1481 gimple
*op1def
= NULL
, *op2def
= NULL
;
1482 gimple_stmt_iterator gsi
;
1486 /* Create the addition statement. */
1487 op
= make_ssa_name (type
);
1488 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1490 /* Find an insertion place and insert. */
1491 if (TREE_CODE (op1
) == SSA_NAME
)
1492 op1def
= SSA_NAME_DEF_STMT (op1
);
1493 if (TREE_CODE (op2
) == SSA_NAME
)
1494 op2def
= SSA_NAME_DEF_STMT (op2
);
1495 if ((!op1def
|| gimple_nop_p (op1def
))
1496 && (!op2def
|| gimple_nop_p (op2def
)))
1498 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1499 if (gsi_end_p (gsi
))
1501 gimple_stmt_iterator gsi2
1502 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1503 gimple_set_uid (sum
,
1504 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1507 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1508 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1512 gimple
*insert_point
;
1513 if ((!op1def
|| gimple_nop_p (op1def
))
1514 || (op2def
&& !gimple_nop_p (op2def
)
1515 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1516 insert_point
= op2def
;
1518 insert_point
= op1def
;
1519 insert_stmt_after (sum
, insert_point
);
1526 /* Perform un-distribution of divisions and multiplications.
1527 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1528 to (A + B) / X for real X.
1530 The algorithm is organized as follows.
1532 - First we walk the addition chain *OPS looking for summands that
1533 are defined by a multiplication or a real division. This results
1534 in the candidates bitmap with relevant indices into *OPS.
1536 - Second we build the chains of multiplications or divisions for
1537 these candidates, counting the number of occurrences of (operand, code)
1538 pairs in all of the candidates chains.
1540 - Third we sort the (operand, code) pairs by number of occurrence and
1541 process them starting with the pair with the most uses.
1543 * For each such pair we walk the candidates again to build a
1544 second candidate bitmap noting all multiplication/division chains
1545 that have at least one occurrence of (operand, code).
1547 * We build an alternate addition chain only covering these
1548 candidates with one (operand, code) operation removed from their
1549 multiplication/division chain.
1551 * The first candidate gets replaced by the alternate addition chain
1552 multiplied/divided by the operand.
1554 * All candidate chains get disabled for further processing and
1555 processing of (operand, code) pairs continues.
1557 The alternate addition chains built are re-processed by the main
1558 reassociation algorithm which allows optimizing a * x * y + b * y * x
1559 to (a + b ) * x * y in one invocation of the reassociation pass. */
1562 undistribute_ops_list (enum tree_code opcode
,
1563 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1565 unsigned int length
= ops
->length ();
1568 unsigned nr_candidates
, nr_candidates2
;
1569 sbitmap_iterator sbi0
;
1570 vec
<operand_entry
*> *subops
;
1571 bool changed
= false;
1572 unsigned int next_oecount_id
= 0;
1575 || opcode
!= PLUS_EXPR
)
1578 /* Build a list of candidates to process. */
1579 auto_sbitmap
candidates (length
);
1580 bitmap_clear (candidates
);
1582 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1584 enum tree_code dcode
;
1587 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1589 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1590 if (!is_gimple_assign (oe1def
))
1592 dcode
= gimple_assign_rhs_code (oe1def
);
1593 if ((dcode
!= MULT_EXPR
1594 && dcode
!= RDIV_EXPR
)
1595 || !is_reassociable_op (oe1def
, dcode
, loop
))
1598 bitmap_set_bit (candidates
, i
);
1602 if (nr_candidates
< 2)
1605 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1607 fprintf (dump_file
, "searching for un-distribute opportunities ");
1608 print_generic_expr (dump_file
,
1609 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1610 fprintf (dump_file
, " %d\n", nr_candidates
);
1613 /* Build linearized sub-operand lists and the counting table. */
1616 hash_table
<oecount_hasher
> ctable (15);
1618 /* ??? Macro arguments cannot have multi-argument template types in
1619 them. This typedef is needed to workaround that limitation. */
1620 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1621 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1622 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1625 enum tree_code oecode
;
1628 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1629 oecode
= gimple_assign_rhs_code (oedef
);
1630 linearize_expr_tree (&subops
[i
], oedef
,
1631 associative_tree_code (oecode
), false);
1633 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1640 c
.id
= next_oecount_id
++;
1643 idx
= cvec
.length () + 41;
1644 slot
= ctable
.find_slot (idx
, INSERT
);
1652 cvec
[*slot
- 42].cnt
++;
1657 /* Sort the counting table. */
1658 cvec
.qsort (oecount_cmp
);
1660 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1663 fprintf (dump_file
, "Candidates:\n");
1664 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1666 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1667 c
->oecode
== MULT_EXPR
1668 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1669 print_generic_expr (dump_file
, c
->op
);
1670 fprintf (dump_file
, "\n");
1674 /* Process the (operand, code) pairs in order of most occurrence. */
1675 auto_sbitmap
candidates2 (length
);
1676 while (!cvec
.is_empty ())
1678 oecount
*c
= &cvec
.last ();
1682 /* Now collect the operands in the outer chain that contain
1683 the common operand in their inner chain. */
1684 bitmap_clear (candidates2
);
1686 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1689 enum tree_code oecode
;
1691 tree op
= (*ops
)[i
]->op
;
1693 /* If we undistributed in this chain already this may be
1695 if (TREE_CODE (op
) != SSA_NAME
)
1698 oedef
= SSA_NAME_DEF_STMT (op
);
1699 oecode
= gimple_assign_rhs_code (oedef
);
1700 if (oecode
!= c
->oecode
)
1703 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1705 if (oe1
->op
== c
->op
)
1707 bitmap_set_bit (candidates2
, i
);
1714 if (nr_candidates2
>= 2)
1716 operand_entry
*oe1
, *oe2
;
1718 int first
= bitmap_first_set_bit (candidates2
);
1720 /* Build the new addition chain. */
1721 oe1
= (*ops
)[first
];
1722 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1724 fprintf (dump_file
, "Building (");
1725 print_generic_expr (dump_file
, oe1
->op
);
1727 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1728 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1732 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1734 fprintf (dump_file
, " + ");
1735 print_generic_expr (dump_file
, oe2
->op
);
1737 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1738 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1739 oe1
->op
, oe2
->op
, opcode
);
1740 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1742 oe1
->op
= gimple_get_lhs (sum
);
1745 /* Apply the multiplication/division. */
1746 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1747 oe1
->op
, c
->op
, c
->oecode
);
1748 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1750 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1751 print_generic_expr (dump_file
, c
->op
);
1752 fprintf (dump_file
, "\n");
1755 /* Record it in the addition chain and disable further
1756 undistribution with this op. */
1757 oe1
->op
= gimple_assign_lhs (prod
);
1758 oe1
->rank
= get_rank (oe1
->op
);
1759 subops
[first
].release ();
1767 for (i
= 0; i
< ops
->length (); ++i
)
1768 subops
[i
].release ();
1775 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1776 expression, examine the other OPS to see if any of them are comparisons
1777 of the same values, which we may be able to combine or eliminate.
1778 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1781 eliminate_redundant_comparison (enum tree_code opcode
,
1782 vec
<operand_entry
*> *ops
,
1783 unsigned int currindex
,
1784 operand_entry
*curr
)
1787 enum tree_code lcode
, rcode
;
1788 gimple
*def1
, *def2
;
1792 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1795 /* Check that CURR is a comparison. */
1796 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1798 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1799 if (!is_gimple_assign (def1
))
1801 lcode
= gimple_assign_rhs_code (def1
);
1802 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1804 op1
= gimple_assign_rhs1 (def1
);
1805 op2
= gimple_assign_rhs2 (def1
);
1807 /* Now look for a similar comparison in the remaining OPS. */
1808 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1812 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1814 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1815 if (!is_gimple_assign (def2
))
1817 rcode
= gimple_assign_rhs_code (def2
);
1818 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1821 /* If we got here, we have a match. See if we can combine the
1823 if (opcode
== BIT_IOR_EXPR
)
1824 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1825 rcode
, gimple_assign_rhs1 (def2
),
1826 gimple_assign_rhs2 (def2
));
1828 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1829 rcode
, gimple_assign_rhs1 (def2
),
1830 gimple_assign_rhs2 (def2
));
1834 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1835 always give us a boolean_type_node value back. If the original
1836 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1837 we need to convert. */
1838 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1839 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1841 if (TREE_CODE (t
) != INTEGER_CST
1842 && !operand_equal_p (t
, curr
->op
, 0))
1844 enum tree_code subcode
;
1845 tree newop1
, newop2
;
1846 if (!COMPARISON_CLASS_P (t
))
1848 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1849 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1850 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1851 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1855 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1857 fprintf (dump_file
, "Equivalence: ");
1858 print_generic_expr (dump_file
, curr
->op
);
1859 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1860 print_generic_expr (dump_file
, oe
->op
);
1861 fprintf (dump_file
, " -> ");
1862 print_generic_expr (dump_file
, t
);
1863 fprintf (dump_file
, "\n");
1866 /* Now we can delete oe, as it has been subsumed by the new combined
1868 ops
->ordered_remove (i
);
1869 reassociate_stats
.ops_eliminated
++;
1871 /* If t is the same as curr->op, we're done. Otherwise we must
1872 replace curr->op with t. Special case is if we got a constant
1873 back, in which case we add it to the end instead of in place of
1874 the current entry. */
1875 if (TREE_CODE (t
) == INTEGER_CST
)
1877 ops
->ordered_remove (currindex
);
1878 add_to_ops_vec (ops
, t
);
1880 else if (!operand_equal_p (t
, curr
->op
, 0))
1883 enum tree_code subcode
;
1886 gcc_assert (COMPARISON_CLASS_P (t
));
1887 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1888 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1889 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1890 gcc_checking_assert (is_gimple_val (newop1
)
1891 && is_gimple_val (newop2
));
1892 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1893 curr
->op
= gimple_get_lhs (sum
);
1902 /* Transform repeated addition of same values into multiply with
1905 transform_add_to_multiply (vec
<operand_entry
*> *ops
)
1908 tree op
= NULL_TREE
;
1910 int i
, start
= -1, end
= 0, count
= 0;
1911 auto_vec
<std::pair
<int, int> > indxs
;
1912 bool changed
= false;
1914 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
1915 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
1916 || !flag_unsafe_math_optimizations
))
1919 /* Look for repeated operands. */
1920 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
1928 else if (operand_equal_p (oe
->op
, op
, 0))
1936 indxs
.safe_push (std::make_pair (start
, end
));
1944 indxs
.safe_push (std::make_pair (start
, end
));
1946 for (j
= indxs
.length () - 1; j
>= 0; --j
)
1948 /* Convert repeated operand addition to multiplication. */
1949 start
= indxs
[j
].first
;
1950 end
= indxs
[j
].second
;
1951 op
= (*ops
)[start
]->op
;
1952 count
= end
- start
+ 1;
1953 for (i
= end
; i
>= start
; --i
)
1954 ops
->unordered_remove (i
);
1955 tree tmp
= make_ssa_name (TREE_TYPE (op
));
1956 tree cst
= build_int_cst (integer_type_node
, count
);
1958 = gimple_build_assign (tmp
, MULT_EXPR
,
1959 op
, fold_convert (TREE_TYPE (op
), cst
));
1960 gimple_set_visited (mul_stmt
, true);
1961 add_to_ops_vec (ops
, tmp
, mul_stmt
);
1969 /* Perform various identities and other optimizations on the list of
1970 operand entries, stored in OPS. The tree code for the binary
1971 operation between all the operands is OPCODE. */
1974 optimize_ops_list (enum tree_code opcode
,
1975 vec
<operand_entry
*> *ops
)
1977 unsigned int length
= ops
->length ();
1980 operand_entry
*oelast
= NULL
;
1981 bool iterate
= false;
1986 oelast
= ops
->last ();
1988 /* If the last two are constants, pop the constants off, merge them
1989 and try the next two. */
1990 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1992 operand_entry
*oelm1
= (*ops
)[length
- 2];
1994 if (oelm1
->rank
== 0
1995 && is_gimple_min_invariant (oelm1
->op
)
1996 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1997 TREE_TYPE (oelast
->op
)))
1999 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
2000 oelm1
->op
, oelast
->op
);
2002 if (folded
&& is_gimple_min_invariant (folded
))
2004 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2005 fprintf (dump_file
, "Merging constants\n");
2010 add_to_ops_vec (ops
, folded
);
2011 reassociate_stats
.constants_eliminated
++;
2013 optimize_ops_list (opcode
, ops
);
2019 eliminate_using_constants (opcode
, ops
);
2022 for (i
= 0; ops
->iterate (i
, &oe
);)
2026 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
2028 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
2029 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
2030 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
2042 length
= ops
->length ();
2043 oelast
= ops
->last ();
2046 optimize_ops_list (opcode
, ops
);
2049 /* The following functions are subroutines to optimize_range_tests and allow
2050 it to try to change a logical combination of comparisons into a range
2054 X == 2 || X == 5 || X == 3 || X == 4
2058 (unsigned) (X - 2) <= 3
2060 For more information see comments above fold_test_range in fold-const.c,
2061 this implementation is for GIMPLE. */
2069 bool strict_overflow_p
;
2070 unsigned int idx
, next
;
2073 /* This is similar to make_range in fold-const.c, but on top of
2074 GIMPLE instead of trees. If EXP is non-NULL, it should be
2075 an SSA_NAME and STMT argument is ignored, otherwise STMT
2076 argument should be a GIMPLE_COND. */
2079 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
2083 bool is_bool
, strict_overflow_p
;
2087 r
->strict_overflow_p
= false;
2089 r
->high
= NULL_TREE
;
2090 if (exp
!= NULL_TREE
2091 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
2094 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2095 and see if we can refine the range. Some of the cases below may not
2096 happen, but it doesn't seem worth worrying about this. We "continue"
2097 the outer loop when we've changed something; otherwise we "break"
2098 the switch, which will "break" the while. */
2099 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
2102 strict_overflow_p
= false;
2104 if (exp
== NULL_TREE
)
2106 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
2108 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
2113 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
2118 enum tree_code code
;
2119 tree arg0
, arg1
, exp_type
;
2123 if (exp
!= NULL_TREE
)
2125 if (TREE_CODE (exp
) != SSA_NAME
2126 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
2129 stmt
= SSA_NAME_DEF_STMT (exp
);
2130 if (!is_gimple_assign (stmt
))
2133 code
= gimple_assign_rhs_code (stmt
);
2134 arg0
= gimple_assign_rhs1 (stmt
);
2135 arg1
= gimple_assign_rhs2 (stmt
);
2136 exp_type
= TREE_TYPE (exp
);
2140 code
= gimple_cond_code (stmt
);
2141 arg0
= gimple_cond_lhs (stmt
);
2142 arg1
= gimple_cond_rhs (stmt
);
2143 exp_type
= boolean_type_node
;
2146 if (TREE_CODE (arg0
) != SSA_NAME
)
2148 loc
= gimple_location (stmt
);
2152 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
2153 /* Ensure the range is either +[-,0], +[0,0],
2154 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2155 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2156 or similar expression of unconditional true or
2157 false, it should not be negated. */
2158 && ((high
&& integer_zerop (high
))
2159 || (low
&& integer_onep (low
))))
2172 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
2174 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
2179 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2194 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2196 &strict_overflow_p
);
2197 if (nexp
!= NULL_TREE
)
2200 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2213 r
->strict_overflow_p
= strict_overflow_p
;
2217 /* Comparison function for qsort. Sort entries
2218 without SSA_NAME exp first, then with SSA_NAMEs sorted
2219 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2220 by increasing ->low and if ->low is the same, by increasing
2221 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2225 range_entry_cmp (const void *a
, const void *b
)
2227 const struct range_entry
*p
= (const struct range_entry
*) a
;
2228 const struct range_entry
*q
= (const struct range_entry
*) b
;
2230 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2232 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2234 /* Group range_entries for the same SSA_NAME together. */
2235 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2237 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2239 /* If ->low is different, NULL low goes first, then by
2241 if (p
->low
!= NULL_TREE
)
2243 if (q
->low
!= NULL_TREE
)
2245 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2247 if (tem
&& integer_onep (tem
))
2249 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2251 if (tem
&& integer_onep (tem
))
2257 else if (q
->low
!= NULL_TREE
)
2259 /* If ->high is different, NULL high goes last, before that by
2261 if (p
->high
!= NULL_TREE
)
2263 if (q
->high
!= NULL_TREE
)
2265 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2267 if (tem
&& integer_onep (tem
))
2269 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2271 if (tem
&& integer_onep (tem
))
2277 else if (q
->high
!= NULL_TREE
)
2279 /* If both ranges are the same, sort below by ascending idx. */
2284 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2287 if (p
->idx
< q
->idx
)
2291 gcc_checking_assert (p
->idx
> q
->idx
);
2296 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2297 insert needed statements BEFORE or after GSI. */
2300 force_into_ssa_name (gimple_stmt_iterator
*gsi
, tree expr
, bool before
)
2302 enum gsi_iterator_update m
= before
? GSI_SAME_STMT
: GSI_CONTINUE_LINKING
;
2303 tree ret
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
, before
, m
);
2304 if (TREE_CODE (ret
) != SSA_NAME
)
2306 gimple
*g
= gimple_build_assign (make_ssa_name (TREE_TYPE (ret
)), ret
);
2308 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2310 gsi_insert_after (gsi
, g
, GSI_CONTINUE_LINKING
);
2311 ret
= gimple_assign_lhs (g
);
2316 /* Helper routine of optimize_range_test.
2317 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2318 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2319 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2320 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2321 an array of COUNT pointers to other ranges. Return
2322 true if the range merge has been successful.
2323 If OPCODE is ERROR_MARK, this is called from within
2324 maybe_optimize_range_tests and is performing inter-bb range optimization.
2325 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2329 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2330 struct range_entry
**otherrangep
,
2331 unsigned int count
, enum tree_code opcode
,
2332 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2333 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2335 operand_entry
*oe
= (*ops
)[range
->idx
];
2337 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2338 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2339 location_t loc
= gimple_location (stmt
);
2340 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2341 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2343 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2344 gimple_stmt_iterator gsi
;
2345 unsigned int i
, uid
;
2347 if (tem
== NULL_TREE
)
2350 /* If op is default def SSA_NAME, there is no place to insert the
2351 new comparison. Give up, unless we can use OP itself as the
2353 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2355 if (op
== range
->exp
2356 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2357 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2359 || (TREE_CODE (tem
) == EQ_EXPR
2360 && TREE_OPERAND (tem
, 0) == op
2361 && integer_onep (TREE_OPERAND (tem
, 1))))
2362 && opcode
!= BIT_IOR_EXPR
2363 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2372 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2373 warning_at (loc
, OPT_Wstrict_overflow
,
2374 "assuming signed overflow does not occur "
2375 "when simplifying range test");
2377 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2379 struct range_entry
*r
;
2380 fprintf (dump_file
, "Optimizing range tests ");
2381 print_generic_expr (dump_file
, range
->exp
);
2382 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2383 print_generic_expr (dump_file
, range
->low
);
2384 fprintf (dump_file
, ", ");
2385 print_generic_expr (dump_file
, range
->high
);
2386 fprintf (dump_file
, "]");
2387 for (i
= 0; i
< count
; i
++)
2394 && r
->exp
!= range
->exp
2395 && TREE_CODE (r
->exp
) == SSA_NAME
)
2397 fprintf (dump_file
, " and ");
2398 print_generic_expr (dump_file
, r
->exp
);
2401 fprintf (dump_file
, " and");
2402 fprintf (dump_file
, " %c[", r
->in_p
? '+' : '-');
2403 print_generic_expr (dump_file
, r
->low
);
2404 fprintf (dump_file
, ", ");
2405 print_generic_expr (dump_file
, r
->high
);
2406 fprintf (dump_file
, "]");
2408 fprintf (dump_file
, "\n into ");
2409 print_generic_expr (dump_file
, tem
);
2410 fprintf (dump_file
, "\n");
2413 if (opcode
== BIT_IOR_EXPR
2414 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2415 tem
= invert_truthvalue_loc (loc
, tem
);
2417 tem
= fold_convert_loc (loc
, optype
, tem
);
2420 gsi
= gsi_for_stmt (stmt
);
2421 uid
= gimple_uid (stmt
);
2429 gcc_checking_assert (tem
== op
);
2430 /* In rare cases range->exp can be equal to lhs of stmt.
2431 In that case we have to insert after the stmt rather then before
2432 it. If stmt is a PHI, insert it at the start of the basic block. */
2433 else if (op
!= range
->exp
)
2435 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2436 tem
= force_into_ssa_name (&gsi
, tem
, true);
2439 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2441 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2442 tem
= force_into_ssa_name (&gsi
, tem
, false);
2446 gsi
= gsi_after_labels (gimple_bb (stmt
));
2447 if (!gsi_end_p (gsi
))
2448 uid
= gimple_uid (gsi_stmt (gsi
));
2451 gsi
= gsi_start_bb (gimple_bb (stmt
));
2453 while (!gsi_end_p (gsi
))
2455 uid
= gimple_uid (gsi_stmt (gsi
));
2459 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2460 tem
= force_into_ssa_name (&gsi
, tem
, true);
2461 if (gsi_end_p (gsi
))
2462 gsi
= gsi_last_bb (gimple_bb (stmt
));
2466 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2467 if (gimple_uid (gsi_stmt (gsi
)))
2470 gimple_set_uid (gsi_stmt (gsi
), uid
);
2477 range
->strict_overflow_p
= false;
2479 for (i
= 0; i
< count
; i
++)
2482 range
= otherrange
+ i
;
2484 range
= otherrangep
[i
];
2485 oe
= (*ops
)[range
->idx
];
2486 /* Now change all the other range test immediate uses, so that
2487 those tests will be optimized away. */
2488 if (opcode
== ERROR_MARK
)
2491 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2492 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2494 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2495 ? boolean_false_node
: boolean_true_node
);
2498 oe
->op
= error_mark_node
;
2499 range
->exp
= NULL_TREE
;
2500 range
->low
= NULL_TREE
;
2501 range
->high
= NULL_TREE
;
2506 /* Optimize X == CST1 || X == CST2
2507 if popcount (CST1 ^ CST2) == 1 into
2508 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2509 Similarly for ranges. E.g.
2510 X != 2 && X != 3 && X != 10 && X != 11
2511 will be transformed by the previous optimization into
2512 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2513 and this loop can transform that into
2514 !(((X & ~8) - 2U) <= 1U). */
2517 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2518 tree lowi
, tree lowj
, tree highi
, tree highj
,
2519 vec
<operand_entry
*> *ops
,
2520 struct range_entry
*rangei
,
2521 struct range_entry
*rangej
)
2523 tree lowxor
, highxor
, tem
, exp
;
2524 /* Check lowi ^ lowj == highi ^ highj and
2525 popcount (lowi ^ lowj) == 1. */
2526 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2527 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2529 if (!integer_pow2p (lowxor
))
2531 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2532 if (!tree_int_cst_equal (lowxor
, highxor
))
2535 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2536 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2537 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2538 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2539 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2540 NULL
, rangei
->in_p
, lowj
, highj
,
2541 rangei
->strict_overflow_p
2542 || rangej
->strict_overflow_p
))
2547 /* Optimize X == CST1 || X == CST2
2548 if popcount (CST2 - CST1) == 1 into
2549 ((X - CST1) & ~(CST2 - CST1)) == 0.
2550 Similarly for ranges. E.g.
2551 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2552 || X == 75 || X == 45
2553 will be transformed by the previous optimization into
2554 (X - 43U) <= 3U || (X - 75U) <= 3U
2555 and this loop can transform that into
2556 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2558 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2559 tree lowi
, tree lowj
, tree highi
, tree highj
,
2560 vec
<operand_entry
*> *ops
,
2561 struct range_entry
*rangei
,
2562 struct range_entry
*rangej
)
2564 tree tem1
, tem2
, mask
;
2565 /* Check highi - lowi == highj - lowj. */
2566 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2567 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2569 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2570 if (!tree_int_cst_equal (tem1
, tem2
))
2572 /* Check popcount (lowj - lowi) == 1. */
2573 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2574 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2576 if (!integer_pow2p (tem1
))
2579 type
= unsigned_type_for (type
);
2580 tem1
= fold_convert (type
, tem1
);
2581 tem2
= fold_convert (type
, tem2
);
2582 lowi
= fold_convert (type
, lowi
);
2583 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2584 tem1
= fold_build2 (MINUS_EXPR
, type
,
2585 fold_convert (type
, rangei
->exp
), lowi
);
2586 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2587 lowj
= build_int_cst (type
, 0);
2588 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2589 NULL
, rangei
->in_p
, lowj
, tem2
,
2590 rangei
->strict_overflow_p
2591 || rangej
->strict_overflow_p
))
2596 /* It does some common checks for function optimize_range_tests_xor and
2597 optimize_range_tests_diff.
2598 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2599 Else it calls optimize_range_tests_diff. */
2602 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2603 bool optimize_xor
, vec
<operand_entry
*> *ops
,
2604 struct range_entry
*ranges
)
2607 bool any_changes
= false;
2608 for (i
= first
; i
< length
; i
++)
2610 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2612 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2614 type
= TREE_TYPE (ranges
[i
].exp
);
2615 if (!INTEGRAL_TYPE_P (type
))
2617 lowi
= ranges
[i
].low
;
2618 if (lowi
== NULL_TREE
)
2619 lowi
= TYPE_MIN_VALUE (type
);
2620 highi
= ranges
[i
].high
;
2621 if (highi
== NULL_TREE
)
2623 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2626 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2628 lowj
= ranges
[j
].low
;
2629 if (lowj
== NULL_TREE
)
2631 highj
= ranges
[j
].high
;
2632 if (highj
== NULL_TREE
)
2633 highj
= TYPE_MAX_VALUE (type
);
2634 /* Check lowj > highi. */
2635 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2637 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2640 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2642 ranges
+ i
, ranges
+ j
);
2644 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2646 ranges
+ i
, ranges
+ j
);
2657 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2658 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2659 EXP on success, NULL otherwise. */
2662 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2663 wide_int
*mask
, tree
*totallowp
)
2665 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2666 if (tem
== NULL_TREE
2667 || TREE_CODE (tem
) != INTEGER_CST
2668 || TREE_OVERFLOW (tem
)
2669 || tree_int_cst_sgn (tem
) == -1
2670 || compare_tree_int (tem
, prec
) != -1)
2673 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2674 *mask
= wi::shifted_mask (0, max
, false, prec
);
2675 if (TREE_CODE (exp
) == BIT_AND_EXPR
2676 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2678 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2679 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2680 if (wi::popcount (msk
) == 1
2681 && wi::ltu_p (msk
, prec
- max
))
2683 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2684 max
+= msk
.to_uhwi ();
2685 exp
= TREE_OPERAND (exp
, 0);
2686 if (integer_zerop (low
)
2687 && TREE_CODE (exp
) == PLUS_EXPR
2688 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2690 tree ret
= TREE_OPERAND (exp
, 0);
2693 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2694 TYPE_PRECISION (TREE_TYPE (low
))));
2695 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
2701 else if (!tree_int_cst_lt (totallow
, tbias
))
2703 bias
= wi::to_widest (tbias
);
2704 bias
-= wi::to_widest (totallow
);
2705 if (bias
>= 0 && bias
< prec
- max
)
2707 *mask
= wi::lshift (*mask
, bias
);
2715 if (!tree_int_cst_lt (totallow
, low
))
2717 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2718 if (tem
== NULL_TREE
2719 || TREE_CODE (tem
) != INTEGER_CST
2720 || TREE_OVERFLOW (tem
)
2721 || compare_tree_int (tem
, prec
- max
) == 1)
2724 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2728 /* Attempt to optimize small range tests using bit test.
2730 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2731 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2732 has been by earlier optimizations optimized into:
2733 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2734 As all the 43 through 82 range is less than 64 numbers,
2735 for 64-bit word targets optimize that into:
2736 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2739 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2740 vec
<operand_entry
*> *ops
,
2741 struct range_entry
*ranges
)
2744 bool any_changes
= false;
2745 int prec
= GET_MODE_BITSIZE (word_mode
);
2746 auto_vec
<struct range_entry
*, 64> candidates
;
2748 for (i
= first
; i
< length
- 2; i
++)
2750 tree lowi
, highi
, lowj
, highj
, type
;
2752 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2754 type
= TREE_TYPE (ranges
[i
].exp
);
2755 if (!INTEGRAL_TYPE_P (type
))
2757 lowi
= ranges
[i
].low
;
2758 if (lowi
== NULL_TREE
)
2759 lowi
= TYPE_MIN_VALUE (type
);
2760 highi
= ranges
[i
].high
;
2761 if (highi
== NULL_TREE
)
2764 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2765 highi
, &mask
, &lowi
);
2766 if (exp
== NULL_TREE
)
2768 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2769 candidates
.truncate (0);
2770 int end
= MIN (i
+ 64, length
);
2771 for (j
= i
+ 1; j
< end
; j
++)
2774 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2776 if (ranges
[j
].exp
== exp
)
2778 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2780 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2783 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2785 exp2
= TREE_OPERAND (exp2
, 0);
2795 lowj
= ranges
[j
].low
;
2796 if (lowj
== NULL_TREE
)
2798 highj
= ranges
[j
].high
;
2799 if (highj
== NULL_TREE
)
2800 highj
= TYPE_MAX_VALUE (type
);
2802 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2803 highj
, &mask2
, NULL
);
2807 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2808 candidates
.safe_push (&ranges
[j
]);
2811 /* If we need otherwise 3 or more comparisons, use a bit test. */
2812 if (candidates
.length () >= 2)
2814 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2815 wi::to_widest (lowi
)
2816 + prec
- 1 - wi::clz (mask
));
2817 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
2819 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2820 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2821 location_t loc
= gimple_location (stmt
);
2822 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2824 /* See if it isn't cheaper to pretend the minimum value of the
2825 range is 0, if maximum value is small enough.
2826 We can avoid then subtraction of the minimum value, but the
2827 mask constant could be perhaps more expensive. */
2828 if (compare_tree_int (lowi
, 0) > 0
2829 && compare_tree_int (high
, prec
) < 0)
2832 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2833 rtx reg
= gen_raw_REG (word_mode
, 10000);
2834 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2835 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2836 GEN_INT (-m
)), speed_p
);
2837 rtx r
= immed_wide_int_const (mask
, word_mode
);
2838 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2839 word_mode
, speed_p
);
2840 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2841 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2842 word_mode
, speed_p
);
2845 mask
= wi::lshift (mask
, m
);
2846 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2850 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2852 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2854 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2855 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2856 fold_convert_loc (loc
, etype
, exp
),
2857 fold_convert_loc (loc
, etype
, lowi
));
2858 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2859 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2860 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2861 build_int_cst (word_type
, 1), exp
);
2862 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2863 wide_int_to_tree (word_type
, mask
));
2864 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2865 build_zero_cst (word_type
));
2866 if (is_gimple_val (exp
))
2869 /* The shift might have undefined behavior if TEM is true,
2870 but reassociate_bb isn't prepared to have basic blocks
2871 split when it is running. So, temporarily emit a code
2872 with BIT_IOR_EXPR instead of &&, and fix it up in
2875 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2876 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2877 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2879 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2880 gimple_seq_add_seq_without_update (&seq
, seq2
);
2881 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2882 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2883 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
2884 BIT_IOR_EXPR
, tem
, exp
);
2885 gimple_set_location (g
, loc
);
2886 gimple_seq_add_stmt_without_update (&seq
, g
);
2887 exp
= gimple_assign_lhs (g
);
2888 tree val
= build_zero_cst (optype
);
2889 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2890 candidates
.length (), opcode
, ops
, exp
,
2891 seq
, false, val
, val
, strict_overflow_p
))
2894 reassoc_branch_fixups
.safe_push (tem
);
2897 gimple_seq_discard (seq
);
2903 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
2904 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
2907 optimize_range_tests_cmp_bitwise (enum tree_code opcode
, int first
, int length
,
2908 vec
<operand_entry
*> *ops
,
2909 struct range_entry
*ranges
)
2913 bool any_changes
= false;
2914 auto_vec
<int, 128> buckets
;
2915 auto_vec
<int, 32> chains
;
2916 auto_vec
<struct range_entry
*, 32> candidates
;
2918 for (i
= first
; i
< length
; i
++)
2920 if (ranges
[i
].exp
== NULL_TREE
2921 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
2923 || TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) <= 1
2924 || TREE_CODE (TREE_TYPE (ranges
[i
].exp
)) == BOOLEAN_TYPE
2925 || ranges
[i
].low
== NULL_TREE
2926 || ranges
[i
].low
!= ranges
[i
].high
)
2929 bool zero_p
= integer_zerop (ranges
[i
].low
);
2930 if (!zero_p
&& !integer_all_onesp (ranges
[i
].low
))
2933 b
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) * 2 + !zero_p
;
2934 if (buckets
.length () <= b
)
2935 buckets
.safe_grow_cleared (b
+ 1);
2936 if (chains
.length () <= (unsigned) i
)
2937 chains
.safe_grow (i
+ 1);
2938 chains
[i
] = buckets
[b
];
2942 FOR_EACH_VEC_ELT (buckets
, b
, i
)
2943 if (i
&& chains
[i
- 1])
2946 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
2948 gimple
*gk
= SSA_NAME_DEF_STMT (ranges
[k
- 1].exp
);
2949 gimple
*gj
= SSA_NAME_DEF_STMT (ranges
[j
- 1].exp
);
2950 if (reassoc_stmt_dominates_stmt_p (gk
, gj
))
2953 tree type1
= TREE_TYPE (ranges
[k
- 1].exp
);
2954 tree type2
= NULL_TREE
;
2955 bool strict_overflow_p
= false;
2956 candidates
.truncate (0);
2957 for (j
= i
; j
; j
= chains
[j
- 1])
2959 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
2960 strict_overflow_p
|= ranges
[j
- 1].strict_overflow_p
;
2962 || useless_type_conversion_p (type1
, type
))
2964 else if (type2
== NULL_TREE
2965 || useless_type_conversion_p (type2
, type
))
2967 if (type2
== NULL_TREE
)
2969 candidates
.safe_push (&ranges
[j
- 1]);
2972 unsigned l
= candidates
.length ();
2973 for (j
= i
; j
; j
= chains
[j
- 1])
2975 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
2978 if (useless_type_conversion_p (type1
, type
))
2980 else if (type2
== NULL_TREE
2981 || useless_type_conversion_p (type2
, type
))
2983 candidates
.safe_push (&ranges
[j
- 1]);
2985 gimple_seq seq
= NULL
;
2986 tree op
= NULL_TREE
;
2988 struct range_entry
*r
;
2989 candidates
.safe_push (&ranges
[k
- 1]);
2990 FOR_EACH_VEC_ELT (candidates
, id
, r
)
3000 g
= gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, op
);
3001 gimple_seq_add_stmt_without_update (&seq
, g
);
3002 op
= gimple_assign_lhs (g
);
3004 tree type
= TREE_TYPE (r
->exp
);
3006 if (id
>= l
&& !useless_type_conversion_p (type1
, type
))
3008 g
= gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, exp
);
3009 gimple_seq_add_stmt_without_update (&seq
, g
);
3010 exp
= gimple_assign_lhs (g
);
3012 g
= gimple_build_assign (make_ssa_name (id
>= l
? type1
: type2
),
3013 (b
& 1) ? BIT_AND_EXPR
: BIT_IOR_EXPR
,
3015 gimple_seq_add_stmt_without_update (&seq
, g
);
3016 op
= gimple_assign_lhs (g
);
3019 if (update_range_test (&ranges
[k
- 1], NULL
, candidates
.address (),
3020 candidates
.length (), opcode
, ops
, op
,
3021 seq
, true, ranges
[k
- 1].low
,
3022 ranges
[k
- 1].low
, strict_overflow_p
))
3025 gimple_seq_discard (seq
);
3031 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3032 a >= 0 && a < b into (unsigned) a < (unsigned) b
3033 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3036 optimize_range_tests_var_bound (enum tree_code opcode
, int first
, int length
,
3037 vec
<operand_entry
*> *ops
,
3038 struct range_entry
*ranges
)
3041 bool any_changes
= false;
3042 hash_map
<tree
, int> *map
= NULL
;
3044 for (i
= first
; i
< length
; i
++)
3046 if (ranges
[i
].exp
== NULL_TREE
3047 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3051 tree type
= TREE_TYPE (ranges
[i
].exp
);
3052 if (!INTEGRAL_TYPE_P (type
)
3053 || TYPE_UNSIGNED (type
)
3054 || ranges
[i
].low
== NULL_TREE
3055 || !integer_zerop (ranges
[i
].low
)
3056 || ranges
[i
].high
!= NULL_TREE
)
3058 /* EXP >= 0 here. */
3060 map
= new hash_map
<tree
, int>;
3061 map
->put (ranges
[i
].exp
, i
);
3067 for (i
= 0; i
< length
; i
++)
3069 bool in_p
= ranges
[i
].in_p
;
3070 if (ranges
[i
].low
== NULL_TREE
3071 || ranges
[i
].high
== NULL_TREE
)
3073 if (!integer_zerop (ranges
[i
].low
)
3074 || !integer_zerop (ranges
[i
].high
))
3077 && TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) == 1
3078 && TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3079 && integer_onep (ranges
[i
].low
)
3080 && integer_onep (ranges
[i
].high
))
3091 if (TREE_CODE (ranges
[i
].exp
) != SSA_NAME
)
3093 stmt
= SSA_NAME_DEF_STMT (ranges
[i
].exp
);
3094 if (!is_gimple_assign (stmt
))
3096 ccode
= gimple_assign_rhs_code (stmt
);
3097 rhs1
= gimple_assign_rhs1 (stmt
);
3098 rhs2
= gimple_assign_rhs2 (stmt
);
3102 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3103 stmt
= last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3104 if (gimple_code (stmt
) != GIMPLE_COND
)
3106 ccode
= gimple_cond_code (stmt
);
3107 rhs1
= gimple_cond_lhs (stmt
);
3108 rhs2
= gimple_cond_rhs (stmt
);
3111 if (TREE_CODE (rhs1
) != SSA_NAME
3112 || rhs2
== NULL_TREE
3113 || TREE_CODE (rhs2
) != SSA_NAME
)
3127 ccode
= invert_tree_comparison (ccode
, false);
3132 std::swap (rhs1
, rhs2
);
3133 ccode
= swap_tree_comparison (ccode
);
3142 int *idx
= map
->get (rhs1
);
3146 wide_int nz
= get_nonzero_bits (rhs2
);
3150 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3151 and RHS2 is known to be RHS2 >= 0. */
3152 tree utype
= unsigned_type_for (TREE_TYPE (rhs1
));
3154 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
3155 if ((ranges
[*idx
].strict_overflow_p
3156 || ranges
[i
].strict_overflow_p
)
3157 && issue_strict_overflow_warning (wc
))
3158 warning_at (gimple_location (stmt
), OPT_Wstrict_overflow
,
3159 "assuming signed overflow does not occur "
3160 "when simplifying range test");
3162 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3164 struct range_entry
*r
= &ranges
[*idx
];
3165 fprintf (dump_file
, "Optimizing range test ");
3166 print_generic_expr (dump_file
, r
->exp
);
3167 fprintf (dump_file
, " +[");
3168 print_generic_expr (dump_file
, r
->low
);
3169 fprintf (dump_file
, ", ");
3170 print_generic_expr (dump_file
, r
->high
);
3171 fprintf (dump_file
, "] and comparison ");
3172 print_generic_expr (dump_file
, rhs1
);
3173 fprintf (dump_file
, " %s ", op_symbol_code (ccode
));
3174 print_generic_expr (dump_file
, rhs2
);
3175 fprintf (dump_file
, "\n into (");
3176 print_generic_expr (dump_file
, utype
);
3177 fprintf (dump_file
, ") ");
3178 print_generic_expr (dump_file
, rhs1
);
3179 fprintf (dump_file
, " %s (", op_symbol_code (ccode
));
3180 print_generic_expr (dump_file
, utype
);
3181 fprintf (dump_file
, ") ");
3182 print_generic_expr (dump_file
, rhs2
);
3183 fprintf (dump_file
, "\n");
3186 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3188 if (opcode
== BIT_IOR_EXPR
3189 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3192 ccode
= invert_tree_comparison (ccode
, false);
3195 unsigned int uid
= gimple_uid (stmt
);
3196 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3197 gimple
*g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs1
);
3198 gimple_set_uid (g
, uid
);
3199 rhs1
= gimple_assign_lhs (g
);
3200 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3201 g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs2
);
3202 gimple_set_uid (g
, uid
);
3203 rhs2
= gimple_assign_lhs (g
);
3204 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3205 if (tree_swap_operands_p (rhs1
, rhs2
))
3207 std::swap (rhs1
, rhs2
);
3208 ccode
= swap_tree_comparison (ccode
);
3210 if (gimple_code (stmt
) == GIMPLE_COND
)
3212 gcond
*c
= as_a
<gcond
*> (stmt
);
3213 gimple_cond_set_code (c
, ccode
);
3214 gimple_cond_set_lhs (c
, rhs1
);
3215 gimple_cond_set_rhs (c
, rhs2
);
3220 tree ctype
= oe
->op
? TREE_TYPE (oe
->op
) : boolean_type_node
;
3221 if (!INTEGRAL_TYPE_P (ctype
)
3222 || (TREE_CODE (ctype
) != BOOLEAN_TYPE
3223 && TYPE_PRECISION (ctype
) != 1))
3224 ctype
= boolean_type_node
;
3225 g
= gimple_build_assign (make_ssa_name (ctype
), ccode
, rhs1
, rhs2
);
3226 gimple_set_uid (g
, uid
);
3227 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3228 if (oe
->op
&& ctype
!= TREE_TYPE (oe
->op
))
3230 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (oe
->op
)),
3231 NOP_EXPR
, gimple_assign_lhs (g
));
3232 gimple_set_uid (g
, uid
);
3233 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3235 ranges
[i
].exp
= gimple_assign_lhs (g
);
3236 oe
->op
= ranges
[i
].exp
;
3237 ranges
[i
].low
= build_zero_cst (TREE_TYPE (ranges
[i
].exp
));
3238 ranges
[i
].high
= ranges
[i
].low
;
3240 ranges
[i
].strict_overflow_p
= false;
3241 oe
= (*ops
)[ranges
[*idx
].idx
];
3242 /* Now change all the other range test immediate uses, so that
3243 those tests will be optimized away. */
3244 if (opcode
== ERROR_MARK
)
3247 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
3248 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
3250 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
3251 ? boolean_false_node
: boolean_true_node
);
3254 oe
->op
= error_mark_node
;
3255 ranges
[*idx
].exp
= NULL_TREE
;
3256 ranges
[*idx
].low
= NULL_TREE
;
3257 ranges
[*idx
].high
= NULL_TREE
;
3265 /* Optimize range tests, similarly how fold_range_test optimizes
3266 it on trees. The tree code for the binary
3267 operation between all the operands is OPCODE.
3268 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3269 maybe_optimize_range_tests for inter-bb range optimization.
3270 In that case if oe->op is NULL, oe->id is bb->index whose
3271 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3272 the actual opcode. */
3275 optimize_range_tests (enum tree_code opcode
,
3276 vec
<operand_entry
*> *ops
)
3278 unsigned int length
= ops
->length (), i
, j
, first
;
3280 struct range_entry
*ranges
;
3281 bool any_changes
= false;
3286 ranges
= XNEWVEC (struct range_entry
, length
);
3287 for (i
= 0; i
< length
; i
++)
3291 init_range_entry (ranges
+ i
, oe
->op
,
3294 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
3295 /* For | invert it now, we will invert it again before emitting
3296 the optimized expression. */
3297 if (opcode
== BIT_IOR_EXPR
3298 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3299 ranges
[i
].in_p
= !ranges
[i
].in_p
;
3302 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
3303 for (i
= 0; i
< length
; i
++)
3304 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
3307 /* Try to merge ranges. */
3308 for (first
= i
; i
< length
; i
++)
3310 tree low
= ranges
[i
].low
;
3311 tree high
= ranges
[i
].high
;
3312 int in_p
= ranges
[i
].in_p
;
3313 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3314 int update_fail_count
= 0;
3316 for (j
= i
+ 1; j
< length
; j
++)
3318 if (ranges
[i
].exp
!= ranges
[j
].exp
)
3320 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
3321 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
3323 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3329 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
3330 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
3331 low
, high
, strict_overflow_p
))
3336 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3337 while update_range_test would fail. */
3338 else if (update_fail_count
== 64)
3341 ++update_fail_count
;
3344 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
3347 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
3348 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
3350 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
3351 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
3353 any_changes
|= optimize_range_tests_cmp_bitwise (opcode
, first
, length
,
3355 any_changes
|= optimize_range_tests_var_bound (opcode
, first
, length
, ops
,
3358 if (any_changes
&& opcode
!= ERROR_MARK
)
3361 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3363 if (oe
->op
== error_mark_node
)
3372 XDELETEVEC (ranges
);
3376 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3377 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3378 otherwise the comparison code. */
3381 ovce_extract_ops (tree var
, gassign
**rets
, bool *reti
)
3383 if (TREE_CODE (var
) != SSA_NAME
)
3386 gassign
*stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (var
));
3390 /* ??? If we start creating more COND_EXPR, we could perform
3391 this same optimization with them. For now, simplify. */
3392 if (gimple_assign_rhs_code (stmt
) != VEC_COND_EXPR
)
3395 tree cond
= gimple_assign_rhs1 (stmt
);
3396 tree_code cmp
= TREE_CODE (cond
);
3397 if (TREE_CODE_CLASS (cmp
) != tcc_comparison
)
3400 /* ??? For now, allow only canonical true and false result vectors.
3401 We could expand this to other constants should the need arise,
3402 but at the moment we don't create them. */
3403 tree t
= gimple_assign_rhs2 (stmt
);
3404 tree f
= gimple_assign_rhs3 (stmt
);
3406 if (integer_all_onesp (t
))
3408 else if (integer_all_onesp (f
))
3410 cmp
= invert_tree_comparison (cmp
, false);
3415 if (!integer_zerop (f
))
3426 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3427 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3430 optimize_vec_cond_expr (tree_code opcode
, vec
<operand_entry
*> *ops
)
3432 unsigned int length
= ops
->length (), i
, j
;
3433 bool any_changes
= false;
3438 for (i
= 0; i
< length
; ++i
)
3440 tree elt0
= (*ops
)[i
]->op
;
3444 tree_code cmp0
= ovce_extract_ops (elt0
, &stmt0
, &invert
);
3445 if (cmp0
== ERROR_MARK
)
3448 for (j
= i
+ 1; j
< length
; ++j
)
3450 tree
&elt1
= (*ops
)[j
]->op
;
3453 tree_code cmp1
= ovce_extract_ops (elt1
, &stmt1
, NULL
);
3454 if (cmp1
== ERROR_MARK
)
3457 tree cond0
= gimple_assign_rhs1 (stmt0
);
3458 tree x0
= TREE_OPERAND (cond0
, 0);
3459 tree y0
= TREE_OPERAND (cond0
, 1);
3461 tree cond1
= gimple_assign_rhs1 (stmt1
);
3462 tree x1
= TREE_OPERAND (cond1
, 0);
3463 tree y1
= TREE_OPERAND (cond1
, 1);
3466 if (opcode
== BIT_AND_EXPR
)
3467 comb
= maybe_fold_and_comparisons (cmp0
, x0
, y0
, cmp1
, x1
, y1
);
3468 else if (opcode
== BIT_IOR_EXPR
)
3469 comb
= maybe_fold_or_comparisons (cmp0
, x0
, y0
, cmp1
, x1
, y1
);
3476 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3478 fprintf (dump_file
, "Transforming ");
3479 print_generic_expr (dump_file
, cond0
);
3480 fprintf (dump_file
, " %c ", opcode
== BIT_AND_EXPR
? '&' : '|');
3481 print_generic_expr (dump_file
, cond1
);
3482 fprintf (dump_file
, " into ");
3483 print_generic_expr (dump_file
, comb
);
3484 fputc ('\n', dump_file
);
3487 gimple_assign_set_rhs1 (stmt0
, comb
);
3489 std::swap (*gimple_assign_rhs2_ptr (stmt0
),
3490 *gimple_assign_rhs3_ptr (stmt0
));
3491 update_stmt (stmt0
);
3493 elt1
= error_mark_node
;
3502 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3504 if (oe
->op
== error_mark_node
)
3516 /* Return true if STMT is a cast like:
3522 # _345 = PHI <_123(N), 1(...), 1(...)>
3523 where _234 has bool type, _123 has single use and
3524 bb N has a single successor M. This is commonly used in
3525 the last block of a range test.
3527 Also Return true if STMT is tcc_compare like:
3533 # _345 = PHI <_234(N), 1(...), 1(...)>
3535 where _234 has booltype, single use and
3536 bb N has a single successor M. This is commonly used in
3537 the last block of a range test. */
3540 final_range_test_p (gimple
*stmt
)
3542 basic_block bb
, rhs_bb
, lhs_bb
;
3545 use_operand_p use_p
;
3548 if (!gimple_assign_cast_p (stmt
)
3549 && (!is_gimple_assign (stmt
)
3550 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
3551 != tcc_comparison
)))
3553 bb
= gimple_bb (stmt
);
3554 if (!single_succ_p (bb
))
3556 e
= single_succ_edge (bb
);
3557 if (e
->flags
& EDGE_COMPLEX
)
3560 lhs
= gimple_assign_lhs (stmt
);
3561 rhs
= gimple_assign_rhs1 (stmt
);
3562 if (gimple_assign_cast_p (stmt
)
3563 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3564 || TREE_CODE (rhs
) != SSA_NAME
3565 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
))
3568 if (!gimple_assign_cast_p (stmt
)
3569 && (TREE_CODE (TREE_TYPE (lhs
)) != BOOLEAN_TYPE
))
3572 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3573 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
3576 if (gimple_code (use_stmt
) != GIMPLE_PHI
3577 || gimple_bb (use_stmt
) != e
->dest
)
3580 /* And that the rhs is defined in the same loop. */
3581 if (gimple_assign_cast_p (stmt
))
3583 if (TREE_CODE (rhs
) != SSA_NAME
3584 || !(rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
)))
3585 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
3590 if (TREE_CODE (lhs
) != SSA_NAME
3591 || !(lhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
)))
3592 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), lhs_bb
))
3599 /* Return true if BB is suitable basic block for inter-bb range test
3600 optimization. If BACKWARD is true, BB should be the only predecessor
3601 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3602 or compared with to find a common basic block to which all conditions
3603 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3604 be the only predecessor of BB. */
3607 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
3610 edge_iterator ei
, ei2
;
3614 bool other_edge_seen
= false;
3619 /* Check last stmt first. */
3620 stmt
= last_stmt (bb
);
3622 || (gimple_code (stmt
) != GIMPLE_COND
3623 && (backward
|| !final_range_test_p (stmt
)))
3624 || gimple_visited_p (stmt
)
3625 || stmt_could_throw_p (stmt
)
3628 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
3631 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3632 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3633 to *OTHER_BB (if not set yet, try to find it out). */
3634 if (EDGE_COUNT (bb
->succs
) != 2)
3636 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3638 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
3640 if (e
->dest
== test_bb
)
3649 if (*other_bb
== NULL
)
3651 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
3652 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
3654 else if (e
->dest
== e2
->dest
)
3655 *other_bb
= e
->dest
;
3656 if (*other_bb
== NULL
)
3659 if (e
->dest
== *other_bb
)
3660 other_edge_seen
= true;
3664 if (*other_bb
== NULL
|| !other_edge_seen
)
3667 else if (single_succ (bb
) != *other_bb
)
3670 /* Now check all PHIs of *OTHER_BB. */
3671 e
= find_edge (bb
, *other_bb
);
3672 e2
= find_edge (test_bb
, *other_bb
);
3673 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3675 gphi
*phi
= gsi
.phi ();
3676 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3677 corresponding to BB and TEST_BB predecessor must be the same. */
3678 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
3679 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
3681 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3682 one of the PHIs should have the lhs of the last stmt in
3683 that block as PHI arg and that PHI should have 0 or 1
3684 corresponding to it in all other range test basic blocks
3688 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
3689 == gimple_assign_lhs (stmt
)
3690 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
3691 || integer_onep (gimple_phi_arg_def (phi
,
3697 gimple
*test_last
= last_stmt (test_bb
);
3698 if (gimple_code (test_last
) != GIMPLE_COND
3699 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
3700 == gimple_assign_lhs (test_last
)
3701 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
3702 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
3712 /* Return true if BB doesn't have side-effects that would disallow
3713 range test optimization, all SSA_NAMEs set in the bb are consumed
3714 in the bb and there are no PHIs. */
3717 no_side_effect_bb (basic_block bb
)
3719 gimple_stmt_iterator gsi
;
3722 if (!gimple_seq_empty_p (phi_nodes (bb
)))
3724 last
= last_stmt (bb
);
3725 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3727 gimple
*stmt
= gsi_stmt (gsi
);
3729 imm_use_iterator imm_iter
;
3730 use_operand_p use_p
;
3732 if (is_gimple_debug (stmt
))
3734 if (gimple_has_side_effects (stmt
))
3738 if (!is_gimple_assign (stmt
))
3740 lhs
= gimple_assign_lhs (stmt
);
3741 if (TREE_CODE (lhs
) != SSA_NAME
)
3743 if (gimple_assign_rhs_could_trap_p (stmt
))
3745 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
3747 gimple
*use_stmt
= USE_STMT (use_p
);
3748 if (is_gimple_debug (use_stmt
))
3750 if (gimple_bb (use_stmt
) != bb
)
3757 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3758 return true and fill in *OPS recursively. */
3761 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
3764 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3768 if (!is_reassociable_op (stmt
, code
, loop
))
3771 rhs
[0] = gimple_assign_rhs1 (stmt
);
3772 rhs
[1] = gimple_assign_rhs2 (stmt
);
3773 gimple_set_visited (stmt
, true);
3774 for (i
= 0; i
< 2; i
++)
3775 if (TREE_CODE (rhs
[i
]) == SSA_NAME
3776 && !get_ops (rhs
[i
], code
, ops
, loop
)
3777 && has_single_use (rhs
[i
]))
3779 operand_entry
*oe
= operand_entry_pool
.allocate ();
3785 oe
->stmt_to_insert
= NULL
;
3786 ops
->safe_push (oe
);
3791 /* Find the ops that were added by get_ops starting from VAR, see if
3792 they were changed during update_range_test and if yes, create new
3796 update_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> ops
,
3797 unsigned int *pidx
, struct loop
*loop
)
3799 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3803 if (!is_reassociable_op (stmt
, code
, loop
))
3806 rhs
[0] = gimple_assign_rhs1 (stmt
);
3807 rhs
[1] = gimple_assign_rhs2 (stmt
);
3810 for (i
= 0; i
< 2; i
++)
3811 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
3813 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
3814 if (rhs
[2 + i
] == NULL_TREE
)
3816 if (has_single_use (rhs
[i
]))
3817 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
3819 rhs
[2 + i
] = rhs
[i
];
3822 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
3823 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
3825 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3826 var
= make_ssa_name (TREE_TYPE (var
));
3827 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
3829 gimple_set_uid (g
, gimple_uid (stmt
));
3830 gimple_set_visited (g
, true);
3831 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3836 /* Structure to track the initial value passed to get_ops and
3837 the range in the ops vector for each basic block. */
3839 struct inter_bb_range_test_entry
3842 unsigned int first_idx
, last_idx
;
3845 /* Inter-bb range test optimization.
3847 Returns TRUE if a gimple conditional is optimized to a true/false,
3848 otherwise return FALSE.
3850 This indicates to the caller that it should run a CFG cleanup pass
3851 once reassociation is completed. */
3854 maybe_optimize_range_tests (gimple
*stmt
)
3856 basic_block first_bb
= gimple_bb (stmt
);
3857 basic_block last_bb
= first_bb
;
3858 basic_block other_bb
= NULL
;
3862 auto_vec
<operand_entry
*> ops
;
3863 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3864 bool any_changes
= false;
3865 bool cfg_cleanup_needed
= false;
3867 /* Consider only basic blocks that end with GIMPLE_COND or
3868 a cast statement satisfying final_range_test_p. All
3869 but the last bb in the first_bb .. last_bb range
3870 should end with GIMPLE_COND. */
3871 if (gimple_code (stmt
) == GIMPLE_COND
)
3873 if (EDGE_COUNT (first_bb
->succs
) != 2)
3874 return cfg_cleanup_needed
;
3876 else if (final_range_test_p (stmt
))
3877 other_bb
= single_succ (first_bb
);
3879 return cfg_cleanup_needed
;
3881 if (stmt_could_throw_p (stmt
))
3882 return cfg_cleanup_needed
;
3884 /* As relative ordering of post-dominator sons isn't fixed,
3885 maybe_optimize_range_tests can be called first on any
3886 bb in the range we want to optimize. So, start searching
3887 backwards, if first_bb can be set to a predecessor. */
3888 while (single_pred_p (first_bb
))
3890 basic_block pred_bb
= single_pred (first_bb
);
3891 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3893 if (!no_side_effect_bb (first_bb
))
3897 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3898 Before starting forward search in last_bb successors, find
3899 out the other_bb. */
3900 if (first_bb
== last_bb
)
3903 /* As non-GIMPLE_COND last stmt always terminates the range,
3904 if forward search didn't discover anything, just give up. */
3905 if (gimple_code (stmt
) != GIMPLE_COND
)
3906 return cfg_cleanup_needed
;
3907 /* Look at both successors. Either it ends with a GIMPLE_COND
3908 and satisfies suitable_cond_bb, or ends with a cast and
3909 other_bb is that cast's successor. */
3910 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
3911 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
3912 || e
->dest
== first_bb
)
3913 return cfg_cleanup_needed
;
3914 else if (single_pred_p (e
->dest
))
3916 stmt
= last_stmt (e
->dest
);
3918 && gimple_code (stmt
) == GIMPLE_COND
3919 && EDGE_COUNT (e
->dest
->succs
) == 2)
3921 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
3927 && final_range_test_p (stmt
)
3928 && find_edge (first_bb
, single_succ (e
->dest
)))
3930 other_bb
= single_succ (e
->dest
);
3931 if (other_bb
== first_bb
)
3935 if (other_bb
== NULL
)
3936 return cfg_cleanup_needed
;
3938 /* Now do the forward search, moving last_bb to successor bbs
3939 that aren't other_bb. */
3940 while (EDGE_COUNT (last_bb
->succs
) == 2)
3942 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
3943 if (e
->dest
!= other_bb
)
3947 if (!single_pred_p (e
->dest
))
3949 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
3951 if (!no_side_effect_bb (e
->dest
))
3955 if (first_bb
== last_bb
)
3956 return cfg_cleanup_needed
;
3957 /* Here basic blocks first_bb through last_bb's predecessor
3958 end with GIMPLE_COND, all of them have one of the edges to
3959 other_bb and another to another block in the range,
3960 all blocks except first_bb don't have side-effects and
3961 last_bb ends with either GIMPLE_COND, or cast satisfying
3962 final_range_test_p. */
3963 for (bb
= last_bb
; ; bb
= single_pred (bb
))
3965 enum tree_code code
;
3967 inter_bb_range_test_entry bb_ent
;
3969 bb_ent
.op
= NULL_TREE
;
3970 bb_ent
.first_idx
= ops
.length ();
3971 bb_ent
.last_idx
= bb_ent
.first_idx
;
3972 e
= find_edge (bb
, other_bb
);
3973 stmt
= last_stmt (bb
);
3974 gimple_set_visited (stmt
, true);
3975 if (gimple_code (stmt
) != GIMPLE_COND
)
3977 use_operand_p use_p
;
3982 lhs
= gimple_assign_lhs (stmt
);
3983 rhs
= gimple_assign_rhs1 (stmt
);
3984 gcc_assert (bb
== last_bb
);
3993 # _345 = PHI <_123(N), 1(...), 1(...)>
3995 or 0 instead of 1. If it is 0, the _234
3996 range test is anded together with all the
3997 other range tests, if it is 1, it is ored with
3999 single_imm_use (lhs
, &use_p
, &phi
);
4000 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
4001 e2
= find_edge (first_bb
, other_bb
);
4003 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
4004 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
4005 code
= BIT_AND_EXPR
;
4008 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
4009 code
= BIT_IOR_EXPR
;
4012 /* If _234 SSA_NAME_DEF_STMT is
4014 (or &, corresponding to 1/0 in the phi arguments,
4015 push into ops the individual range test arguments
4016 of the bitwise or resp. and, recursively. */
4017 if (TREE_CODE (rhs
) == SSA_NAME
4018 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4020 && !get_ops (rhs
, code
, &ops
,
4021 loop_containing_stmt (stmt
))
4022 && has_single_use (rhs
))
4024 /* Otherwise, push the _234 range test itself. */
4025 operand_entry
*oe
= operand_entry_pool
.allocate ();
4031 oe
->stmt_to_insert
= NULL
;
4036 else if (is_gimple_assign (stmt
)
4037 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4039 && !get_ops (lhs
, code
, &ops
,
4040 loop_containing_stmt (stmt
))
4041 && has_single_use (lhs
))
4043 operand_entry
*oe
= operand_entry_pool
.allocate ();
4054 bb_ent
.last_idx
= ops
.length ();
4057 bbinfo
.safe_push (bb_ent
);
4060 /* Otherwise stmt is GIMPLE_COND. */
4061 code
= gimple_cond_code (stmt
);
4062 lhs
= gimple_cond_lhs (stmt
);
4063 rhs
= gimple_cond_rhs (stmt
);
4064 if (TREE_CODE (lhs
) == SSA_NAME
4065 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4066 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4067 || rhs
!= boolean_false_node
4068 /* Either push into ops the individual bitwise
4069 or resp. and operands, depending on which
4070 edge is other_bb. */
4071 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
4072 ^ (code
== EQ_EXPR
))
4073 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
4074 loop_containing_stmt (stmt
))))
4076 /* Or push the GIMPLE_COND stmt itself. */
4077 operand_entry
*oe
= operand_entry_pool
.allocate ();
4080 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
4081 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
4082 /* oe->op = NULL signs that there is no SSA_NAME
4083 for the range test, and oe->id instead is the
4084 basic block number, at which's end the GIMPLE_COND
4088 oe
->stmt_to_insert
= NULL
;
4093 else if (ops
.length () > bb_ent
.first_idx
)
4096 bb_ent
.last_idx
= ops
.length ();
4098 bbinfo
.safe_push (bb_ent
);
4102 if (ops
.length () > 1)
4103 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
4106 unsigned int idx
, max_idx
= 0;
4107 /* update_ops relies on has_single_use predicates returning the
4108 same values as it did during get_ops earlier. Additionally it
4109 never removes statements, only adds new ones and it should walk
4110 from the single imm use and check the predicate already before
4111 making those changes.
4112 On the other side, the handling of GIMPLE_COND directly can turn
4113 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4114 it needs to be done in a separate loop afterwards. */
4115 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4117 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4118 && bbinfo
[idx
].op
!= NULL_TREE
)
4123 stmt
= last_stmt (bb
);
4124 new_op
= update_ops (bbinfo
[idx
].op
,
4126 ops
[bbinfo
[idx
].first_idx
]->rank
,
4127 ops
, &bbinfo
[idx
].first_idx
,
4128 loop_containing_stmt (stmt
));
4129 if (new_op
== NULL_TREE
)
4131 gcc_assert (bb
== last_bb
);
4132 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
4134 if (bbinfo
[idx
].op
!= new_op
)
4136 imm_use_iterator iter
;
4137 use_operand_p use_p
;
4138 gimple
*use_stmt
, *cast_or_tcc_cmp_stmt
= NULL
;
4140 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
4141 if (is_gimple_debug (use_stmt
))
4143 else if (gimple_code (use_stmt
) == GIMPLE_COND
4144 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4145 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4146 SET_USE (use_p
, new_op
);
4147 else if ((is_gimple_assign (use_stmt
)
4149 (gimple_assign_rhs_code (use_stmt
))
4150 == tcc_comparison
)))
4151 cast_or_tcc_cmp_stmt
= use_stmt
;
4152 else if (gimple_assign_cast_p (use_stmt
))
4153 cast_or_tcc_cmp_stmt
= use_stmt
;
4157 if (cast_or_tcc_cmp_stmt
)
4159 gcc_assert (bb
== last_bb
);
4160 tree lhs
= gimple_assign_lhs (cast_or_tcc_cmp_stmt
);
4161 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
4162 enum tree_code rhs_code
4163 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt
)
4164 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt
)
4167 if (is_gimple_min_invariant (new_op
))
4169 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
4170 g
= gimple_build_assign (new_lhs
, new_op
);
4173 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
4174 gimple_stmt_iterator gsi
4175 = gsi_for_stmt (cast_or_tcc_cmp_stmt
);
4176 gimple_set_uid (g
, gimple_uid (cast_or_tcc_cmp_stmt
));
4177 gimple_set_visited (g
, true);
4178 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4179 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
4180 if (is_gimple_debug (use_stmt
))
4182 else if (gimple_code (use_stmt
) == GIMPLE_COND
4183 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4184 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4185 SET_USE (use_p
, new_lhs
);
4194 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4196 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4197 && bbinfo
[idx
].op
== NULL_TREE
4198 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
4200 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
4205 /* If we collapse the conditional to a true/false
4206 condition, then bubble that knowledge up to our caller. */
4207 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
4209 gimple_cond_make_false (cond_stmt
);
4210 cfg_cleanup_needed
= true;
4212 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
4214 gimple_cond_make_true (cond_stmt
);
4215 cfg_cleanup_needed
= true;
4219 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
4220 gimple_cond_set_lhs (cond_stmt
,
4221 ops
[bbinfo
[idx
].first_idx
]->op
);
4222 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
4224 update_stmt (cond_stmt
);
4230 /* The above changes could result in basic blocks after the first
4231 modified one, up to and including last_bb, to be executed even if
4232 they would not be in the original program. If the value ranges of
4233 assignment lhs' in those bbs were dependent on the conditions
4234 guarding those basic blocks which now can change, the VRs might
4235 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4236 are only used within the same bb, it should be not a big deal if
4237 we just reset all the VRs in those bbs. See PR68671. */
4238 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
4239 reset_flow_sensitive_info_in_bb (bb
);
4241 return cfg_cleanup_needed
;
4244 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4245 of STMT in it's operands. This is also known as a "destructive
4246 update" operation. */
4249 is_phi_for_stmt (gimple
*stmt
, tree operand
)
4254 use_operand_p arg_p
;
4257 if (TREE_CODE (operand
) != SSA_NAME
)
4260 lhs
= gimple_assign_lhs (stmt
);
4262 def_stmt
= SSA_NAME_DEF_STMT (operand
);
4263 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
4267 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
4268 if (lhs
== USE_FROM_PTR (arg_p
))
4273 /* Remove def stmt of VAR if VAR has zero uses and recurse
4274 on rhs1 operand if so. */
4277 remove_visited_stmt_chain (tree var
)
4280 gimple_stmt_iterator gsi
;
4284 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
4286 stmt
= SSA_NAME_DEF_STMT (var
);
4287 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
4289 var
= gimple_assign_rhs1 (stmt
);
4290 gsi
= gsi_for_stmt (stmt
);
4291 reassoc_remove_stmt (&gsi
);
4292 release_defs (stmt
);
4299 /* This function checks three consequtive operands in
4300 passed operands vector OPS starting from OPINDEX and
4301 swaps two operands if it is profitable for binary operation
4302 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4304 We pair ops with the same rank if possible.
4306 The alternative we try is to see if STMT is a destructive
4307 update style statement, which is like:
4310 In that case, we want to use the destructive update form to
4311 expose the possible vectorizer sum reduction opportunity.
4312 In that case, the third operand will be the phi node. This
4313 check is not performed if STMT is null.
4315 We could, of course, try to be better as noted above, and do a
4316 lot of work to try to find these opportunities in >3 operand
4317 cases, but it is unlikely to be worth it. */
4320 swap_ops_for_binary_stmt (vec
<operand_entry
*> ops
,
4321 unsigned int opindex
, gimple
*stmt
)
4323 operand_entry
*oe1
, *oe2
, *oe3
;
4326 oe2
= ops
[opindex
+ 1];
4327 oe3
= ops
[opindex
+ 2];
4329 if ((oe1
->rank
== oe2
->rank
4330 && oe2
->rank
!= oe3
->rank
)
4331 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
4332 && !is_phi_for_stmt (stmt
, oe1
->op
)
4333 && !is_phi_for_stmt (stmt
, oe2
->op
)))
4334 std::swap (*oe1
, *oe3
);
4335 else if ((oe1
->rank
== oe3
->rank
4336 && oe2
->rank
!= oe3
->rank
)
4337 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
4338 && !is_phi_for_stmt (stmt
, oe1
->op
)
4339 && !is_phi_for_stmt (stmt
, oe3
->op
)))
4340 std::swap (*oe1
, *oe2
);
4343 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4344 two definitions, otherwise return STMT. */
4346 static inline gimple
*
4347 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
)
4349 if (TREE_CODE (rhs1
) == SSA_NAME
4350 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
4351 stmt
= SSA_NAME_DEF_STMT (rhs1
);
4352 if (TREE_CODE (rhs2
) == SSA_NAME
4353 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
4354 stmt
= SSA_NAME_DEF_STMT (rhs2
);
4358 /* If the stmt that defines operand has to be inserted, insert it
4361 insert_stmt_before_use (gimple
*stmt
, gimple
*stmt_to_insert
)
4363 gcc_assert (is_gimple_assign (stmt_to_insert
));
4364 tree rhs1
= gimple_assign_rhs1 (stmt_to_insert
);
4365 tree rhs2
= gimple_assign_rhs2 (stmt_to_insert
);
4366 gimple
*insert_point
= find_insert_point (stmt
, rhs1
, rhs2
);
4367 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_point
);
4368 gimple_set_uid (stmt_to_insert
, gimple_uid (insert_point
));
4370 /* If the insert point is not stmt, then insert_point would be
4371 the point where operand rhs1 or rhs2 is defined. In this case,
4372 stmt_to_insert has to be inserted afterwards. This would
4373 only happen when the stmt insertion point is flexible. */
4374 if (stmt
== insert_point
)
4375 gsi_insert_before (&gsi
, stmt_to_insert
, GSI_NEW_STMT
);
4377 insert_stmt_after (stmt_to_insert
, insert_point
);
4381 /* Recursively rewrite our linearized statements so that the operators
4382 match those in OPS[OPINDEX], putting the computation in rank
4383 order. Return new lhs.
4384 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4385 the current stmt and during recursive invocations.
4386 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4387 recursive invocations. */
4390 rewrite_expr_tree (gimple
*stmt
, unsigned int opindex
,
4391 vec
<operand_entry
*> ops
, bool changed
, bool next_changed
)
4393 tree rhs1
= gimple_assign_rhs1 (stmt
);
4394 tree rhs2
= gimple_assign_rhs2 (stmt
);
4395 tree lhs
= gimple_assign_lhs (stmt
);
4398 /* The final recursion case for this function is that you have
4399 exactly two operations left.
4400 If we had exactly one op in the entire list to start with, we
4401 would have never called this function, and the tail recursion
4402 rewrites them one at a time. */
4403 if (opindex
+ 2 == ops
.length ())
4405 operand_entry
*oe1
, *oe2
;
4408 oe2
= ops
[opindex
+ 1];
4410 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
4412 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4413 unsigned int uid
= gimple_uid (stmt
);
4415 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4417 fprintf (dump_file
, "Transforming ");
4418 print_gimple_stmt (dump_file
, stmt
, 0);
4421 /* If the stmt that defines operand has to be inserted, insert it
4423 if (oe1
->stmt_to_insert
)
4424 insert_stmt_before_use (stmt
, oe1
->stmt_to_insert
);
4425 if (oe2
->stmt_to_insert
)
4426 insert_stmt_before_use (stmt
, oe2
->stmt_to_insert
);
4427 /* Even when changed is false, reassociation could have e.g. removed
4428 some redundant operations, so unless we are just swapping the
4429 arguments or unless there is no change at all (then we just
4430 return lhs), force creation of a new SSA_NAME. */
4431 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
4433 gimple
*insert_point
4434 = find_insert_point (stmt
, oe1
->op
, oe2
->op
);
4435 lhs
= make_ssa_name (TREE_TYPE (lhs
));
4437 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
4439 gimple_set_uid (stmt
, uid
);
4440 gimple_set_visited (stmt
, true);
4441 if (insert_point
== gsi_stmt (gsi
))
4442 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
4444 insert_stmt_after (stmt
, insert_point
);
4448 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
4450 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
4451 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
4455 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
4456 remove_visited_stmt_chain (rhs1
);
4458 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4460 fprintf (dump_file
, " into ");
4461 print_gimple_stmt (dump_file
, stmt
, 0);
4467 /* If we hit here, we should have 3 or more ops left. */
4468 gcc_assert (opindex
+ 2 < ops
.length ());
4470 /* Rewrite the next operator. */
4473 /* If the stmt that defines operand has to be inserted, insert it
4475 if (oe
->stmt_to_insert
)
4476 insert_stmt_before_use (stmt
, oe
->stmt_to_insert
);
4478 /* Recurse on the LHS of the binary operator, which is guaranteed to
4479 be the non-leaf side. */
4481 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
4482 changed
|| oe
->op
!= rhs2
|| next_changed
,
4485 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
4487 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4489 fprintf (dump_file
, "Transforming ");
4490 print_gimple_stmt (dump_file
, stmt
, 0);
4493 /* If changed is false, this is either opindex == 0
4494 or all outer rhs2's were equal to corresponding oe->op,
4495 and powi_result is NULL.
4496 That means lhs is equivalent before and after reassociation.
4497 Otherwise ensure the old lhs SSA_NAME is not reused and
4498 create a new stmt as well, so that any debug stmts will be
4499 properly adjusted. */
4502 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4503 unsigned int uid
= gimple_uid (stmt
);
4504 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
4506 lhs
= make_ssa_name (TREE_TYPE (lhs
));
4507 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
4509 gimple_set_uid (stmt
, uid
);
4510 gimple_set_visited (stmt
, true);
4511 if (insert_point
== gsi_stmt (gsi
))
4512 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
4514 insert_stmt_after (stmt
, insert_point
);
4518 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
4520 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
4521 gimple_assign_set_rhs2 (stmt
, oe
->op
);
4525 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4527 fprintf (dump_file
, " into ");
4528 print_gimple_stmt (dump_file
, stmt
, 0);
4534 /* Find out how many cycles we need to compute statements chain.
4535 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4536 maximum number of independent statements we may execute per cycle. */
4539 get_required_cycles (int ops_num
, int cpu_width
)
4545 /* While we have more than 2 * cpu_width operands
4546 we may reduce number of operands by cpu_width
4548 res
= ops_num
/ (2 * cpu_width
);
4550 /* Remained operands count may be reduced twice per cycle
4551 until we have only one operand. */
4552 rest
= (unsigned)(ops_num
- res
* cpu_width
);
4553 elog
= exact_log2 (rest
);
4557 res
+= floor_log2 (rest
) + 1;
4562 /* Returns an optimal number of registers to use for computation of
4563 given statements. */
4566 get_reassociation_width (int ops_num
, enum tree_code opc
,
4569 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
4574 if (param_width
> 0)
4575 width
= param_width
;
4577 width
= targetm
.sched
.reassociation_width (opc
, mode
);
4582 /* Get the minimal time required for sequence computation. */
4583 cycles_best
= get_required_cycles (ops_num
, width
);
4585 /* Check if we may use less width and still compute sequence for
4586 the same time. It will allow us to reduce registers usage.
4587 get_required_cycles is monotonically increasing with lower width
4588 so we can perform a binary search for the minimal width that still
4589 results in the optimal cycle count. */
4591 while (width
> width_min
)
4593 int width_mid
= (width
+ width_min
) / 2;
4595 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
4597 else if (width_min
< width_mid
)
4598 width_min
= width_mid
;
4606 /* Recursively rewrite our linearized statements so that the operators
4607 match those in OPS[OPINDEX], putting the computation in rank
4608 order and trying to allow operations to be executed in
4612 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
4613 vec
<operand_entry
*> ops
)
4615 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
4616 int op_num
= ops
.length ();
4617 gcc_assert (op_num
> 0);
4618 int stmt_num
= op_num
- 1;
4619 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
4620 int op_index
= op_num
- 1;
4622 int ready_stmts_end
= 0;
4624 gimple
*stmt1
= NULL
, *stmt2
= NULL
;
4625 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
4627 /* We start expression rewriting from the top statements.
4628 So, in this loop we create a full list of statements
4629 we will work with. */
4630 stmts
[stmt_num
- 1] = stmt
;
4631 for (i
= stmt_num
- 2; i
>= 0; i
--)
4632 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
4634 for (i
= 0; i
< stmt_num
; i
++)
4638 /* Determine whether we should use results of
4639 already handled statements or not. */
4640 if (ready_stmts_end
== 0
4641 && (i
- stmt_index
>= width
|| op_index
< 1))
4642 ready_stmts_end
= i
;
4644 /* Now we choose operands for the next statement. Non zero
4645 value in ready_stmts_end means here that we should use
4646 the result of already generated statements as new operand. */
4647 if (ready_stmts_end
> 0)
4649 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
4650 if (ready_stmts_end
> stmt_index
)
4651 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
4652 else if (op_index
>= 0)
4654 operand_entry
*oe
= ops
[op_index
--];
4655 stmt2
= oe
->stmt_to_insert
;
4660 gcc_assert (stmt_index
< i
);
4661 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
4664 if (stmt_index
>= ready_stmts_end
)
4665 ready_stmts_end
= 0;
4670 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
4671 operand_entry
*oe2
= ops
[op_index
--];
4672 operand_entry
*oe1
= ops
[op_index
--];
4674 stmt2
= oe2
->stmt_to_insert
;
4676 stmt1
= oe1
->stmt_to_insert
;
4679 /* If we emit the last statement then we should put
4680 operands into the last statement. It will also
4682 if (op_index
< 0 && stmt_index
== i
)
4685 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4687 fprintf (dump_file
, "Transforming ");
4688 print_gimple_stmt (dump_file
, stmts
[i
], 0);
4691 /* If the stmt that defines operand has to be inserted, insert it
4694 insert_stmt_before_use (stmts
[i
], stmt1
);
4696 insert_stmt_before_use (stmts
[i
], stmt2
);
4697 stmt1
= stmt2
= NULL
;
4699 /* We keep original statement only for the last one. All
4700 others are recreated. */
4701 if (i
== stmt_num
- 1)
4703 gimple_assign_set_rhs1 (stmts
[i
], op1
);
4704 gimple_assign_set_rhs2 (stmts
[i
], op2
);
4705 update_stmt (stmts
[i
]);
4709 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
4711 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4713 fprintf (dump_file
, " into ");
4714 print_gimple_stmt (dump_file
, stmts
[i
], 0);
4718 remove_visited_stmt_chain (last_rhs1
);
4721 /* Transform STMT, which is really (A +B) + (C + D) into the left
4722 linear form, ((A+B)+C)+D.
4723 Recurse on D if necessary. */
4726 linearize_expr (gimple
*stmt
)
4728 gimple_stmt_iterator gsi
;
4729 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
4730 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
4731 gimple
*oldbinrhs
= binrhs
;
4732 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4733 gimple
*newbinrhs
= NULL
;
4734 struct loop
*loop
= loop_containing_stmt (stmt
);
4735 tree lhs
= gimple_assign_lhs (stmt
);
4737 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
4738 && is_reassociable_op (binrhs
, rhscode
, loop
));
4740 gsi
= gsi_for_stmt (stmt
);
4742 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
4743 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
4744 gimple_assign_rhs_code (binrhs
),
4745 gimple_assign_lhs (binlhs
),
4746 gimple_assign_rhs2 (binrhs
));
4747 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
4748 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
4749 gimple_set_uid (binrhs
, gimple_uid (stmt
));
4751 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
4752 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
4754 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4756 fprintf (dump_file
, "Linearized: ");
4757 print_gimple_stmt (dump_file
, stmt
, 0);
4760 reassociate_stats
.linearized
++;
4763 gsi
= gsi_for_stmt (oldbinrhs
);
4764 reassoc_remove_stmt (&gsi
);
4765 release_defs (oldbinrhs
);
4767 gimple_set_visited (stmt
, true);
4768 gimple_set_visited (binlhs
, true);
4769 gimple_set_visited (binrhs
, true);
4771 /* Tail recurse on the new rhs if it still needs reassociation. */
4772 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
4773 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4774 want to change the algorithm while converting to tuples. */
4775 linearize_expr (stmt
);
4778 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4779 it. Otherwise, return NULL. */
4782 get_single_immediate_use (tree lhs
)
4784 use_operand_p immuse
;
4787 if (TREE_CODE (lhs
) == SSA_NAME
4788 && single_imm_use (lhs
, &immuse
, &immusestmt
)
4789 && is_gimple_assign (immusestmt
))
4795 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4796 representing the negated value. Insertions of any necessary
4797 instructions go before GSI.
4798 This function is recursive in that, if you hand it "a_5" as the
4799 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4800 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4803 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
4805 gimple
*negatedefstmt
= NULL
;
4806 tree resultofnegate
;
4807 gimple_stmt_iterator gsi
;
4810 /* If we are trying to negate a name, defined by an add, negate the
4811 add operands instead. */
4812 if (TREE_CODE (tonegate
) == SSA_NAME
)
4813 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
4814 if (TREE_CODE (tonegate
) == SSA_NAME
4815 && is_gimple_assign (negatedefstmt
)
4816 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
4817 && has_single_use (gimple_assign_lhs (negatedefstmt
))
4818 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
4820 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
4821 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
4822 tree lhs
= gimple_assign_lhs (negatedefstmt
);
4825 gsi
= gsi_for_stmt (negatedefstmt
);
4826 rhs1
= negate_value (rhs1
, &gsi
);
4828 gsi
= gsi_for_stmt (negatedefstmt
);
4829 rhs2
= negate_value (rhs2
, &gsi
);
4831 gsi
= gsi_for_stmt (negatedefstmt
);
4832 lhs
= make_ssa_name (TREE_TYPE (lhs
));
4833 gimple_set_visited (negatedefstmt
, true);
4834 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
4835 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
4836 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4840 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
4841 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
4842 NULL_TREE
, true, GSI_SAME_STMT
);
4844 uid
= gimple_uid (gsi_stmt (gsi
));
4845 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4847 gimple
*stmt
= gsi_stmt (gsi
);
4848 if (gimple_uid (stmt
) != 0)
4850 gimple_set_uid (stmt
, uid
);
4852 return resultofnegate
;
4855 /* Return true if we should break up the subtract in STMT into an add
4856 with negate. This is true when we the subtract operands are really
4857 adds, or the subtract itself is used in an add expression. In
4858 either case, breaking up the subtract into an add with negate
4859 exposes the adds to reassociation. */
4862 should_break_up_subtract (gimple
*stmt
)
4864 tree lhs
= gimple_assign_lhs (stmt
);
4865 tree binlhs
= gimple_assign_rhs1 (stmt
);
4866 tree binrhs
= gimple_assign_rhs2 (stmt
);
4868 struct loop
*loop
= loop_containing_stmt (stmt
);
4870 if (TREE_CODE (binlhs
) == SSA_NAME
4871 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
4874 if (TREE_CODE (binrhs
) == SSA_NAME
4875 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
4878 if (TREE_CODE (lhs
) == SSA_NAME
4879 && (immusestmt
= get_single_immediate_use (lhs
))
4880 && is_gimple_assign (immusestmt
)
4881 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
4882 || (gimple_assign_rhs_code (immusestmt
) == MINUS_EXPR
4883 && gimple_assign_rhs1 (immusestmt
) == lhs
)
4884 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
4889 /* Transform STMT from A - B into A + -B. */
4892 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
4894 tree rhs1
= gimple_assign_rhs1 (stmt
);
4895 tree rhs2
= gimple_assign_rhs2 (stmt
);
4897 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4899 fprintf (dump_file
, "Breaking up subtract ");
4900 print_gimple_stmt (dump_file
, stmt
, 0);
4903 rhs2
= negate_value (rhs2
, gsip
);
4904 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
4908 /* Determine whether STMT is a builtin call that raises an SSA name
4909 to an integer power and has only one use. If so, and this is early
4910 reassociation and unsafe math optimizations are permitted, place
4911 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4912 If any of these conditions does not hold, return FALSE. */
4915 acceptable_pow_call (gcall
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
4918 REAL_VALUE_TYPE c
, cint
;
4920 switch (gimple_call_combined_fn (stmt
))
4923 if (flag_errno_math
)
4926 *base
= gimple_call_arg (stmt
, 0);
4927 arg1
= gimple_call_arg (stmt
, 1);
4929 if (TREE_CODE (arg1
) != REAL_CST
)
4932 c
= TREE_REAL_CST (arg1
);
4934 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
4937 *exponent
= real_to_integer (&c
);
4938 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
4939 if (!real_identical (&c
, &cint
))
4945 *base
= gimple_call_arg (stmt
, 0);
4946 arg1
= gimple_call_arg (stmt
, 1);
4948 if (!tree_fits_shwi_p (arg1
))
4951 *exponent
= tree_to_shwi (arg1
);
4958 /* Expanding negative exponents is generally unproductive, so we don't
4959 complicate matters with those. Exponents of zero and one should
4960 have been handled by expression folding. */
4961 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
4967 /* Try to derive and add operand entry for OP to *OPS. Return false if
4971 try_special_add_to_ops (vec
<operand_entry
*> *ops
,
4972 enum tree_code code
,
4973 tree op
, gimple
* def_stmt
)
4975 tree base
= NULL_TREE
;
4976 HOST_WIDE_INT exponent
= 0;
4978 if (TREE_CODE (op
) != SSA_NAME
4979 || ! has_single_use (op
))
4982 if (code
== MULT_EXPR
4983 && reassoc_insert_powi_p
4984 && flag_unsafe_math_optimizations
4985 && is_gimple_call (def_stmt
)
4986 && acceptable_pow_call (as_a
<gcall
*> (def_stmt
), &base
, &exponent
))
4988 add_repeat_to_ops_vec (ops
, base
, exponent
);
4989 gimple_set_visited (def_stmt
, true);
4992 else if (code
== MULT_EXPR
4993 && is_gimple_assign (def_stmt
)
4994 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
4995 && !HONOR_SNANS (TREE_TYPE (op
))
4996 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op
))
4997 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op
))))
4999 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5000 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
5001 add_to_ops_vec (ops
, rhs1
);
5002 add_to_ops_vec (ops
, cst
);
5003 gimple_set_visited (def_stmt
, true);
5010 /* Recursively linearize a binary expression that is the RHS of STMT.
5011 Place the operands of the expression tree in the vector named OPS. */
5014 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
5015 bool is_associative
, bool set_visited
)
5017 tree binlhs
= gimple_assign_rhs1 (stmt
);
5018 tree binrhs
= gimple_assign_rhs2 (stmt
);
5019 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
5020 bool binlhsisreassoc
= false;
5021 bool binrhsisreassoc
= false;
5022 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5023 struct loop
*loop
= loop_containing_stmt (stmt
);
5026 gimple_set_visited (stmt
, true);
5028 if (TREE_CODE (binlhs
) == SSA_NAME
)
5030 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
5031 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
5032 && !stmt_could_throw_p (binlhsdef
));
5035 if (TREE_CODE (binrhs
) == SSA_NAME
)
5037 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
5038 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
5039 && !stmt_could_throw_p (binrhsdef
));
5042 /* If the LHS is not reassociable, but the RHS is, we need to swap
5043 them. If neither is reassociable, there is nothing we can do, so
5044 just put them in the ops vector. If the LHS is reassociable,
5045 linearize it. If both are reassociable, then linearize the RHS
5048 if (!binlhsisreassoc
)
5050 /* If this is not a associative operation like division, give up. */
5051 if (!is_associative
)
5053 add_to_ops_vec (ops
, binrhs
);
5057 if (!binrhsisreassoc
)
5059 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5060 add_to_ops_vec (ops
, binrhs
);
5062 if (!try_special_add_to_ops (ops
, rhscode
, binlhs
, binlhsdef
))
5063 add_to_ops_vec (ops
, binlhs
);
5068 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5070 fprintf (dump_file
, "swapping operands of ");
5071 print_gimple_stmt (dump_file
, stmt
, 0);
5074 swap_ssa_operands (stmt
,
5075 gimple_assign_rhs1_ptr (stmt
),
5076 gimple_assign_rhs2_ptr (stmt
));
5079 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5081 fprintf (dump_file
, " is now ");
5082 print_gimple_stmt (dump_file
, stmt
, 0);
5085 /* We want to make it so the lhs is always the reassociative op,
5087 std::swap (binlhs
, binrhs
);
5089 else if (binrhsisreassoc
)
5091 linearize_expr (stmt
);
5092 binlhs
= gimple_assign_rhs1 (stmt
);
5093 binrhs
= gimple_assign_rhs2 (stmt
);
5096 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
5097 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
5099 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
5100 is_associative
, set_visited
);
5102 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5103 add_to_ops_vec (ops
, binrhs
);
5106 /* Repropagate the negates back into subtracts, since no other pass
5107 currently does it. */
5110 repropagate_negates (void)
5115 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
5117 gimple
*user
= get_single_immediate_use (negate
);
5119 if (!user
|| !is_gimple_assign (user
))
5122 /* The negate operand can be either operand of a PLUS_EXPR
5123 (it can be the LHS if the RHS is a constant for example).
5125 Force the negate operand to the RHS of the PLUS_EXPR, then
5126 transform the PLUS_EXPR into a MINUS_EXPR. */
5127 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
5129 /* If the negated operand appears on the LHS of the
5130 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5131 to force the negated operand to the RHS of the PLUS_EXPR. */
5132 if (gimple_assign_rhs1 (user
) == negate
)
5134 swap_ssa_operands (user
,
5135 gimple_assign_rhs1_ptr (user
),
5136 gimple_assign_rhs2_ptr (user
));
5139 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5140 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5141 if (gimple_assign_rhs2 (user
) == negate
)
5143 tree rhs1
= gimple_assign_rhs1 (user
);
5144 tree rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
5145 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
5146 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
5150 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
5152 if (gimple_assign_rhs1 (user
) == negate
)
5157 which we transform into
5160 This pushes down the negate which we possibly can merge
5161 into some other operation, hence insert it into the
5162 plus_negates vector. */
5163 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
5164 tree a
= gimple_assign_rhs1 (feed
);
5165 tree b
= gimple_assign_rhs2 (user
);
5166 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
5167 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
5168 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
5169 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
5170 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
5171 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
5172 user
= gsi_stmt (gsi2
);
5174 reassoc_remove_stmt (&gsi
);
5175 release_defs (feed
);
5176 plus_negates
.safe_push (gimple_assign_lhs (user
));
5180 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5181 rid of one operation. */
5182 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
5183 tree a
= gimple_assign_rhs1 (feed
);
5184 tree rhs1
= gimple_assign_rhs1 (user
);
5185 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
5186 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
5187 update_stmt (gsi_stmt (gsi
));
5193 /* Returns true if OP is of a type for which we can do reassociation.
5194 That is for integral or non-saturating fixed-point types, and for
5195 floating point type when associative-math is enabled. */
5198 can_reassociate_p (tree op
)
5200 tree type
= TREE_TYPE (op
);
5201 if (TREE_CODE (op
) == SSA_NAME
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op
))
5203 if ((ANY_INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
5204 || NON_SAT_FIXED_POINT_TYPE_P (type
)
5205 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
5210 /* Break up subtract operations in block BB.
5212 We do this top down because we don't know whether the subtract is
5213 part of a possible chain of reassociation except at the top.
5222 we want to break up k = t - q, but we won't until we've transformed q
5223 = b - r, which won't be broken up until we transform b = c - d.
5225 En passant, clear the GIMPLE visited flag on every statement
5226 and set UIDs within each basic block. */
5229 break_up_subtract_bb (basic_block bb
)
5231 gimple_stmt_iterator gsi
;
5233 unsigned int uid
= 1;
5235 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5237 gimple
*stmt
= gsi_stmt (gsi
);
5238 gimple_set_visited (stmt
, false);
5239 gimple_set_uid (stmt
, uid
++);
5241 if (!is_gimple_assign (stmt
)
5242 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
5245 /* Look for simple gimple subtract operations. */
5246 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
5248 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
5249 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
5252 /* Check for a subtract used only in an addition. If this
5253 is the case, transform it into add of a negate for better
5254 reassociation. IE transform C = A-B into C = A + -B if C
5255 is only used in an addition. */
5256 if (should_break_up_subtract (stmt
))
5257 break_up_subtract (stmt
, &gsi
);
5259 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
5260 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
5261 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
5263 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
5265 son
= next_dom_son (CDI_DOMINATORS
, son
))
5266 break_up_subtract_bb (son
);
5269 /* Used for repeated factor analysis. */
5270 struct repeat_factor
5272 /* An SSA name that occurs in a multiply chain. */
5275 /* Cached rank of the factor. */
5278 /* Number of occurrences of the factor in the chain. */
5279 HOST_WIDE_INT count
;
5281 /* An SSA name representing the product of this factor and
5282 all factors appearing later in the repeated factor vector. */
5287 static vec
<repeat_factor
> repeat_factor_vec
;
5289 /* Used for sorting the repeat factor vector. Sort primarily by
5290 ascending occurrence count, secondarily by descending rank. */
5293 compare_repeat_factors (const void *x1
, const void *x2
)
5295 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
5296 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
5298 if (rf1
->count
!= rf2
->count
)
5299 return rf1
->count
- rf2
->count
;
5301 return rf2
->rank
- rf1
->rank
;
5304 /* Look for repeated operands in OPS in the multiply tree rooted at
5305 STMT. Replace them with an optimal sequence of multiplies and powi
5306 builtin calls, and remove the used operands from OPS. Return an
5307 SSA name representing the value of the replacement sequence. */
5310 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
5312 unsigned i
, j
, vec_len
;
5315 repeat_factor
*rf1
, *rf2
;
5316 repeat_factor rfnew
;
5317 tree result
= NULL_TREE
;
5318 tree target_ssa
, iter_result
;
5319 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
5320 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
5321 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5322 gimple
*mul_stmt
, *pow_stmt
;
5324 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5329 /* Allocate the repeated factor vector. */
5330 repeat_factor_vec
.create (10);
5332 /* Scan the OPS vector for all SSA names in the product and build
5333 up a vector of occurrence counts for each factor. */
5334 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
5336 if (TREE_CODE (oe
->op
) == SSA_NAME
)
5338 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5340 if (rf1
->factor
== oe
->op
)
5342 rf1
->count
+= oe
->count
;
5347 if (j
>= repeat_factor_vec
.length ())
5349 rfnew
.factor
= oe
->op
;
5350 rfnew
.rank
= oe
->rank
;
5351 rfnew
.count
= oe
->count
;
5352 rfnew
.repr
= NULL_TREE
;
5353 repeat_factor_vec
.safe_push (rfnew
);
5358 /* Sort the repeated factor vector by (a) increasing occurrence count,
5359 and (b) decreasing rank. */
5360 repeat_factor_vec
.qsort (compare_repeat_factors
);
5362 /* It is generally best to combine as many base factors as possible
5363 into a product before applying __builtin_powi to the result.
5364 However, the sort order chosen for the repeated factor vector
5365 allows us to cache partial results for the product of the base
5366 factors for subsequent use. When we already have a cached partial
5367 result from a previous iteration, it is best to make use of it
5368 before looking for another __builtin_pow opportunity.
5370 As an example, consider x * x * y * y * y * z * z * z * z.
5371 We want to first compose the product x * y * z, raise it to the
5372 second power, then multiply this by y * z, and finally multiply
5373 by z. This can be done in 5 multiplies provided we cache y * z
5374 for use in both expressions:
5382 If we instead ignored the cached y * z and first multiplied by
5383 the __builtin_pow opportunity z * z, we would get the inferior:
5392 vec_len
= repeat_factor_vec
.length ();
5394 /* Repeatedly look for opportunities to create a builtin_powi call. */
5397 HOST_WIDE_INT power
;
5399 /* First look for the largest cached product of factors from
5400 preceding iterations. If found, create a builtin_powi for
5401 it if the minimum occurrence count for its factors is at
5402 least 2, or just use this cached product as our next
5403 multiplicand if the minimum occurrence count is 1. */
5404 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5406 if (rf1
->repr
&& rf1
->count
> 0)
5416 iter_result
= rf1
->repr
;
5418 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5422 fputs ("Multiplying by cached product ", dump_file
);
5423 for (elt
= j
; elt
< vec_len
; elt
++)
5425 rf
= &repeat_factor_vec
[elt
];
5426 print_generic_expr (dump_file
, rf
->factor
);
5427 if (elt
< vec_len
- 1)
5428 fputs (" * ", dump_file
);
5430 fputs ("\n", dump_file
);
5435 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5436 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
5437 build_int_cst (integer_type_node
,
5439 gimple_call_set_lhs (pow_stmt
, iter_result
);
5440 gimple_set_location (pow_stmt
, gimple_location (stmt
));
5441 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
5442 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
5444 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5448 fputs ("Building __builtin_pow call for cached product (",
5450 for (elt
= j
; elt
< vec_len
; elt
++)
5452 rf
= &repeat_factor_vec
[elt
];
5453 print_generic_expr (dump_file
, rf
->factor
);
5454 if (elt
< vec_len
- 1)
5455 fputs (" * ", dump_file
);
5457 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
5464 /* Otherwise, find the first factor in the repeated factor
5465 vector whose occurrence count is at least 2. If no such
5466 factor exists, there are no builtin_powi opportunities
5468 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5470 if (rf1
->count
>= 2)
5479 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5483 fputs ("Building __builtin_pow call for (", dump_file
);
5484 for (elt
= j
; elt
< vec_len
; elt
++)
5486 rf
= &repeat_factor_vec
[elt
];
5487 print_generic_expr (dump_file
, rf
->factor
);
5488 if (elt
< vec_len
- 1)
5489 fputs (" * ", dump_file
);
5491 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
5494 reassociate_stats
.pows_created
++;
5496 /* Visit each element of the vector in reverse order (so that
5497 high-occurrence elements are visited first, and within the
5498 same occurrence count, lower-ranked elements are visited
5499 first). Form a linear product of all elements in this order
5500 whose occurrencce count is at least that of element J.
5501 Record the SSA name representing the product of each element
5502 with all subsequent elements in the vector. */
5503 if (j
== vec_len
- 1)
5504 rf1
->repr
= rf1
->factor
;
5507 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
5511 rf1
= &repeat_factor_vec
[ii
];
5512 rf2
= &repeat_factor_vec
[ii
+ 1];
5514 /* Init the last factor's representative to be itself. */
5516 rf2
->repr
= rf2
->factor
;
5521 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5522 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
5524 gimple_set_location (mul_stmt
, gimple_location (stmt
));
5525 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
5526 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
5527 rf1
->repr
= target_ssa
;
5529 /* Don't reprocess the multiply we just introduced. */
5530 gimple_set_visited (mul_stmt
, true);
5534 /* Form a call to __builtin_powi for the maximum product
5535 just formed, raised to the power obtained earlier. */
5536 rf1
= &repeat_factor_vec
[j
];
5537 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5538 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
5539 build_int_cst (integer_type_node
,
5541 gimple_call_set_lhs (pow_stmt
, iter_result
);
5542 gimple_set_location (pow_stmt
, gimple_location (stmt
));
5543 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
5544 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
5547 /* If we previously formed at least one other builtin_powi call,
5548 form the product of this one and those others. */
5551 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5552 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
5553 result
, iter_result
);
5554 gimple_set_location (mul_stmt
, gimple_location (stmt
));
5555 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
5556 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
5557 gimple_set_visited (mul_stmt
, true);
5558 result
= new_result
;
5561 result
= iter_result
;
5563 /* Decrement the occurrence count of each element in the product
5564 by the count found above, and remove this many copies of each
5566 for (i
= j
; i
< vec_len
; i
++)
5571 rf1
= &repeat_factor_vec
[i
];
5572 rf1
->count
-= power
;
5574 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
5576 if (oe
->op
== rf1
->factor
)
5580 ops
->ordered_remove (n
);
5596 /* At this point all elements in the repeated factor vector have a
5597 remaining occurrence count of 0 or 1, and those with a count of 1
5598 don't have cached representatives. Re-sort the ops vector and
5600 ops
->qsort (sort_by_operand_rank
);
5601 repeat_factor_vec
.release ();
5603 /* Return the final product computed herein. Note that there may
5604 still be some elements with single occurrence count left in OPS;
5605 those will be handled by the normal reassociation logic. */
5609 /* Attempt to optimize
5610 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5611 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5614 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
5618 unsigned int length
= ops
->length ();
5619 tree cst
= ops
->last ()->op
;
5621 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
5624 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
5626 if (TREE_CODE (oe
->op
) == SSA_NAME
5627 && has_single_use (oe
->op
))
5629 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
5630 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
5633 switch (gimple_call_combined_fn (old_call
))
5636 CASE_CFN_COPYSIGN_FN
:
5637 arg0
= gimple_call_arg (old_call
, 0);
5638 arg1
= gimple_call_arg (old_call
, 1);
5639 /* The first argument of copysign must be a constant,
5640 otherwise there's nothing to do. */
5641 if (TREE_CODE (arg0
) == REAL_CST
)
5643 tree type
= TREE_TYPE (arg0
);
5644 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
5645 /* If we couldn't fold to a single constant, skip it.
5646 That happens e.g. for inexact multiplication when
5648 if (mul
== NULL_TREE
)
5650 /* Instead of adjusting OLD_CALL, let's build a new
5651 call to not leak the LHS and prevent keeping bogus
5652 debug statements. DCE will clean up the old call. */
5654 if (gimple_call_internal_p (old_call
))
5655 new_call
= gimple_build_call_internal
5656 (IFN_COPYSIGN
, 2, mul
, arg1
);
5658 new_call
= gimple_build_call
5659 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
5660 tree lhs
= make_ssa_name (type
);
5661 gimple_call_set_lhs (new_call
, lhs
);
5662 gimple_set_location (new_call
,
5663 gimple_location (old_call
));
5664 insert_stmt_after (new_call
, old_call
);
5665 /* We've used the constant, get rid of it. */
5667 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
5668 /* Handle the CST1 < 0 case by negating the result. */
5671 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
5673 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
5674 insert_stmt_after (negate_stmt
, new_call
);
5679 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5681 fprintf (dump_file
, "Optimizing copysign: ");
5682 print_generic_expr (dump_file
, cst
);
5683 fprintf (dump_file
, " * COPYSIGN (");
5684 print_generic_expr (dump_file
, arg0
);
5685 fprintf (dump_file
, ", ");
5686 print_generic_expr (dump_file
, arg1
);
5687 fprintf (dump_file
, ") into %sCOPYSIGN (",
5688 cst1_neg
? "-" : "");
5689 print_generic_expr (dump_file
, mul
);
5690 fprintf (dump_file
, ", ");
5691 print_generic_expr (dump_file
, arg1
);
5692 fprintf (dump_file
, "\n");
5705 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5708 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
5712 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5714 fprintf (dump_file
, "Transforming ");
5715 print_gimple_stmt (dump_file
, stmt
, 0);
5718 rhs1
= gimple_assign_rhs1 (stmt
);
5719 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
5721 remove_visited_stmt_chain (rhs1
);
5723 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5725 fprintf (dump_file
, " into ");
5726 print_gimple_stmt (dump_file
, stmt
, 0);
5730 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5733 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
5734 tree rhs1
, tree rhs2
)
5736 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5738 fprintf (dump_file
, "Transforming ");
5739 print_gimple_stmt (dump_file
, stmt
, 0);
5742 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
5743 update_stmt (gsi_stmt (*gsi
));
5744 remove_visited_stmt_chain (rhs1
);
5746 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5748 fprintf (dump_file
, " into ");
5749 print_gimple_stmt (dump_file
, stmt
, 0);
5753 /* Reassociate expressions in basic block BB and its post-dominator as
5756 Bubble up return status from maybe_optimize_range_tests. */
5759 reassociate_bb (basic_block bb
)
5761 gimple_stmt_iterator gsi
;
5763 gimple
*stmt
= last_stmt (bb
);
5764 bool cfg_cleanup_needed
= false;
5766 if (stmt
&& !gimple_visited_p (stmt
))
5767 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
5769 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
5771 stmt
= gsi_stmt (gsi
);
5773 if (is_gimple_assign (stmt
)
5774 && !stmt_could_throw_p (stmt
))
5776 tree lhs
, rhs1
, rhs2
;
5777 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
5779 /* If this is not a gimple binary expression, there is
5780 nothing for us to do with it. */
5781 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
5784 /* If this was part of an already processed statement,
5785 we don't need to touch it again. */
5786 if (gimple_visited_p (stmt
))
5788 /* This statement might have become dead because of previous
5790 if (has_zero_uses (gimple_get_lhs (stmt
)))
5792 reassoc_remove_stmt (&gsi
);
5793 release_defs (stmt
);
5794 /* We might end up removing the last stmt above which
5795 places the iterator to the end of the sequence.
5796 Reset it to the last stmt in this case which might
5797 be the end of the sequence as well if we removed
5798 the last statement of the sequence. In which case
5799 we need to bail out. */
5800 if (gsi_end_p (gsi
))
5802 gsi
= gsi_last_bb (bb
);
5803 if (gsi_end_p (gsi
))
5810 lhs
= gimple_assign_lhs (stmt
);
5811 rhs1
= gimple_assign_rhs1 (stmt
);
5812 rhs2
= gimple_assign_rhs2 (stmt
);
5814 /* For non-bit or min/max operations we can't associate
5815 all types. Verify that here. */
5816 if (rhs_code
!= BIT_IOR_EXPR
5817 && rhs_code
!= BIT_AND_EXPR
5818 && rhs_code
!= BIT_XOR_EXPR
5819 && rhs_code
!= MIN_EXPR
5820 && rhs_code
!= MAX_EXPR
5821 && (!can_reassociate_p (lhs
)
5822 || !can_reassociate_p (rhs1
)
5823 || !can_reassociate_p (rhs2
)))
5826 if (associative_tree_code (rhs_code
))
5828 auto_vec
<operand_entry
*> ops
;
5829 tree powi_result
= NULL_TREE
;
5830 bool is_vector
= VECTOR_TYPE_P (TREE_TYPE (lhs
));
5832 /* There may be no immediate uses left by the time we
5833 get here because we may have eliminated them all. */
5834 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
5837 gimple_set_visited (stmt
, true);
5838 linearize_expr_tree (&ops
, stmt
, true, true);
5839 ops
.qsort (sort_by_operand_rank
);
5840 int orig_len
= ops
.length ();
5841 optimize_ops_list (rhs_code
, &ops
);
5842 if (undistribute_ops_list (rhs_code
, &ops
,
5843 loop_containing_stmt (stmt
)))
5845 ops
.qsort (sort_by_operand_rank
);
5846 optimize_ops_list (rhs_code
, &ops
);
5849 if (rhs_code
== PLUS_EXPR
5850 && transform_add_to_multiply (&ops
))
5851 ops
.qsort (sort_by_operand_rank
);
5853 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
5856 optimize_vec_cond_expr (rhs_code
, &ops
);
5858 optimize_range_tests (rhs_code
, &ops
);
5861 if (rhs_code
== MULT_EXPR
&& !is_vector
)
5863 attempt_builtin_copysign (&ops
);
5865 if (reassoc_insert_powi_p
5866 && flag_unsafe_math_optimizations
)
5867 powi_result
= attempt_builtin_powi (stmt
, &ops
);
5870 operand_entry
*last
;
5871 bool negate_result
= false;
5872 if (ops
.length () > 1
5873 && rhs_code
== MULT_EXPR
)
5876 if ((integer_minus_onep (last
->op
)
5877 || real_minus_onep (last
->op
))
5878 && !HONOR_SNANS (TREE_TYPE (lhs
))
5879 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs
))
5880 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs
))))
5883 negate_result
= true;
5888 /* If the operand vector is now empty, all operands were
5889 consumed by the __builtin_powi optimization. */
5890 if (ops
.length () == 0)
5891 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
5892 else if (ops
.length () == 1)
5894 tree last_op
= ops
.last ()->op
;
5896 /* If the stmt that defines operand has to be inserted, insert it
5898 if (ops
.last ()->stmt_to_insert
)
5899 insert_stmt_before_use (stmt
, ops
.last ()->stmt_to_insert
);
5901 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
5904 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
5908 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
5909 int ops_num
= ops
.length ();
5910 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
5912 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5914 "Width = %d was chosen for reassociation\n", width
);
5917 /* For binary bit operations, if there are at least 3
5918 operands and the last last operand in OPS is a constant,
5919 move it to the front. This helps ensure that we generate
5920 (X & Y) & C rather than (X & C) & Y. The former will
5921 often match a canonical bit test when we get to RTL. */
5922 if (ops
.length () > 2
5923 && (rhs_code
== BIT_AND_EXPR
5924 || rhs_code
== BIT_IOR_EXPR
5925 || rhs_code
== BIT_XOR_EXPR
)
5926 && TREE_CODE (ops
.last ()->op
) == INTEGER_CST
)
5927 std::swap (*ops
[0], *ops
[ops_num
- 1]);
5930 && ops
.length () > 3)
5931 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
5935 /* When there are three operands left, we want
5936 to make sure the ones that get the double
5937 binary op are chosen wisely. */
5938 int len
= ops
.length ();
5940 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
5942 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
5948 /* If we combined some repeated factors into a
5949 __builtin_powi call, multiply that result by the
5950 reassociated operands. */
5953 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
5954 tree type
= TREE_TYPE (lhs
);
5955 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
5957 gimple_set_lhs (lhs_stmt
, target_ssa
);
5958 update_stmt (lhs_stmt
);
5961 target_ssa
= new_lhs
;
5964 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
5965 powi_result
, target_ssa
);
5966 gimple_set_location (mul_stmt
, gimple_location (stmt
));
5967 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
5968 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
5974 stmt
= SSA_NAME_DEF_STMT (lhs
);
5975 tree tmp
= make_ssa_name (TREE_TYPE (lhs
));
5976 gimple_set_lhs (stmt
, tmp
);
5979 gassign
*neg_stmt
= gimple_build_assign (lhs
, NEGATE_EXPR
,
5981 gimple_set_uid (neg_stmt
, gimple_uid (stmt
));
5982 gsi_insert_after (&gsi
, neg_stmt
, GSI_NEW_STMT
);
5988 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
5990 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
5991 cfg_cleanup_needed
|= reassociate_bb (son
);
5993 return cfg_cleanup_needed
;
5996 /* Add jumps around shifts for range tests turned into bit tests.
5997 For each SSA_NAME VAR we have code like:
5998 VAR = ...; // final stmt of range comparison
5999 // bit test here...;
6000 OTHERVAR = ...; // final stmt of the bit test sequence
6001 RES = VAR | OTHERVAR;
6002 Turn the above into:
6009 // bit test here...;
6012 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6020 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
6022 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
6025 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
6027 && is_gimple_assign (use_stmt
)
6028 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
6029 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
6031 basic_block cond_bb
= gimple_bb (def_stmt
);
6032 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
6033 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
6035 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6036 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
6037 build_zero_cst (TREE_TYPE (var
)),
6038 NULL_TREE
, NULL_TREE
);
6039 location_t loc
= gimple_location (use_stmt
);
6040 gimple_set_location (g
, loc
);
6041 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
6043 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
6044 etrue
->probability
= profile_probability::even ();
6045 edge efalse
= find_edge (cond_bb
, then_bb
);
6046 efalse
->flags
= EDGE_FALSE_VALUE
;
6047 efalse
->probability
-= etrue
->probability
;
6048 then_bb
->count
-= etrue
->count ();
6050 tree othervar
= NULL_TREE
;
6051 if (gimple_assign_rhs1 (use_stmt
) == var
)
6052 othervar
= gimple_assign_rhs2 (use_stmt
);
6053 else if (gimple_assign_rhs2 (use_stmt
) == var
)
6054 othervar
= gimple_assign_rhs1 (use_stmt
);
6057 tree lhs
= gimple_assign_lhs (use_stmt
);
6058 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
6059 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
6060 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
6061 gsi
= gsi_for_stmt (use_stmt
);
6062 gsi_remove (&gsi
, true);
6064 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
6065 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
6067 reassoc_branch_fixups
.release ();
6070 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
6071 void debug_ops_vector (vec
<operand_entry
*> ops
);
6073 /* Dump the operand entry vector OPS to FILE. */
6076 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
6081 FOR_EACH_VEC_ELT (ops
, i
, oe
)
6083 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
6084 print_generic_expr (file
, oe
->op
);
6085 fprintf (file
, "\n");
6089 /* Dump the operand entry vector OPS to STDERR. */
6092 debug_ops_vector (vec
<operand_entry
*> ops
)
6094 dump_ops_vector (stderr
, ops
);
6097 /* Bubble up return status from reassociate_bb. */
6102 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
6103 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
6106 /* Initialize the reassociation pass. */
6113 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
6115 /* Find the loops, so that we can prevent moving calculations in
6117 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
6119 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
6121 next_operand_entry_id
= 0;
6123 /* Reverse RPO (Reverse Post Order) will give us something where
6124 deeper loops come later. */
6125 pre_and_rev_post_order_compute (NULL
, bbs
, false);
6126 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
6127 operand_rank
= new hash_map
<tree
, long>;
6129 /* Give each default definition a distinct rank. This includes
6130 parameters and the static chain. Walk backwards over all
6131 SSA names so that we get proper rank ordering according
6132 to tree_swap_operands_p. */
6133 for (i
= num_ssa_names
- 1; i
> 0; --i
)
6135 tree name
= ssa_name (i
);
6136 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
6137 insert_operand_rank (name
, ++rank
);
6140 /* Set up rank for each BB */
6141 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
6142 bb_rank
[bbs
[i
]] = ++rank
<< 16;
6145 calculate_dominance_info (CDI_POST_DOMINATORS
);
6146 plus_negates
= vNULL
;
6149 /* Cleanup after the reassociation pass, and print stats if
6155 statistics_counter_event (cfun
, "Linearized",
6156 reassociate_stats
.linearized
);
6157 statistics_counter_event (cfun
, "Constants eliminated",
6158 reassociate_stats
.constants_eliminated
);
6159 statistics_counter_event (cfun
, "Ops eliminated",
6160 reassociate_stats
.ops_eliminated
);
6161 statistics_counter_event (cfun
, "Statements rewritten",
6162 reassociate_stats
.rewritten
);
6163 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
6164 reassociate_stats
.pows_encountered
);
6165 statistics_counter_event (cfun
, "Built-in powi calls created",
6166 reassociate_stats
.pows_created
);
6168 delete operand_rank
;
6169 operand_entry_pool
.release ();
6171 plus_negates
.release ();
6172 free_dominance_info (CDI_POST_DOMINATORS
);
6173 loop_optimizer_finalize ();
6176 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6177 insertion of __builtin_powi calls.
6179 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6180 optimization of a gimple conditional. Otherwise returns zero. */
6183 execute_reassoc (bool insert_powi_p
)
6185 reassoc_insert_powi_p
= insert_powi_p
;
6189 bool cfg_cleanup_needed
;
6190 cfg_cleanup_needed
= do_reassoc ();
6191 repropagate_negates ();
6195 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
6200 const pass_data pass_data_reassoc
=
6202 GIMPLE_PASS
, /* type */
6203 "reassoc", /* name */
6204 OPTGROUP_NONE
, /* optinfo_flags */
6205 TV_TREE_REASSOC
, /* tv_id */
6206 ( PROP_cfg
| PROP_ssa
), /* properties_required */
6207 0, /* properties_provided */
6208 0, /* properties_destroyed */
6209 0, /* todo_flags_start */
6210 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
6213 class pass_reassoc
: public gimple_opt_pass
6216 pass_reassoc (gcc::context
*ctxt
)
6217 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
6220 /* opt_pass methods: */
6221 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
6222 void set_pass_param (unsigned int n
, bool param
)
6224 gcc_assert (n
== 0);
6225 insert_powi_p
= param
;
6227 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
6228 virtual unsigned int execute (function
*)
6229 { return execute_reassoc (insert_powi_p
); }
6232 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6233 point 3a in the pass header comment. */
6235 }; // class pass_reassoc
6240 make_pass_reassoc (gcc::context
*ctxt
)
6242 return new pass_reassoc (ctxt
);