1 /* Reassociation for trees.
2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "gimple-fold.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
46 #include "tree-ssa-loop.h"
49 #include "langhooks.h"
54 #include "case-cfn-macros.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
70 3. Optimization of the operand lists, eliminating things like a +
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
107 You want to first merge all leaves of the same rank, as much as
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
161 newstmt = mergetmp + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p
;
185 int constants_eliminated
;
188 int pows_encountered
;
192 /* Operator, rank pair. */
199 gimple
*stmt_to_insert
;
202 static object_allocator
<operand_entry
> operand_entry_pool
203 ("operand entry pool");
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static unsigned int next_operand_entry_id
;
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
212 static long *bb_rank
;
214 /* Operand->rank hashtable. */
215 static hash_map
<tree
, long> *operand_rank
;
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec
<tree
> reassoc_branch_fixups
;
224 static long get_rank (tree
);
225 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
231 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
233 gimple
*stmt
= gsi_stmt (*gsi
);
235 if (!MAY_HAVE_DEBUG_BIND_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
236 return gsi_remove (gsi
, true);
238 gimple_stmt_iterator prev
= *gsi
;
240 unsigned uid
= gimple_uid (stmt
);
241 basic_block bb
= gimple_bb (stmt
);
242 bool ret
= gsi_remove (gsi
, true);
243 if (!gsi_end_p (prev
))
246 prev
= gsi_start_bb (bb
);
247 gimple
*end_stmt
= gsi_stmt (*gsi
);
248 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
250 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
251 gimple_set_uid (stmt
, uid
);
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
270 phi_rank (gimple
*stmt
)
272 basic_block bb
= gimple_bb (stmt
);
273 struct loop
*father
= bb
->loop_father
;
279 /* We only care about real loops (those with a latch). */
281 return bb_rank
[bb
->index
];
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb
!= father
->header
286 return bb_rank
[bb
->index
];
288 /* Ignore virtual SSA_NAMEs. */
289 res
= gimple_phi_result (stmt
);
290 if (virtual_operand_p (res
))
291 return bb_rank
[bb
->index
];
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res
, &use
, &use_stmt
)
296 || gimple_bb (use_stmt
)->loop_father
!= father
)
297 return bb_rank
[bb
->index
];
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
302 tree arg
= gimple_phi_arg_def (stmt
, i
);
303 if (TREE_CODE (arg
) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
306 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
307 if (gimple_bb (def_stmt
)->loop_father
== father
)
308 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
312 /* Must be an uninteresting phi. */
313 return bb_rank
[bb
->index
];
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
320 loop_carried_phi (tree exp
)
325 if (TREE_CODE (exp
) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp
))
329 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
331 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
339 if (phi_rank (phi_stmt
) != block_rank
)
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
350 propagate_rank (long rank
, tree op
)
354 if (loop_carried_phi (op
))
357 op_rank
= get_rank (op
);
359 return MAX (rank
, op_rank
);
362 /* Look up the operand rank structure for expression E. */
365 find_operand_rank (tree e
)
367 long *slot
= operand_rank
->get (e
);
368 return slot
? *slot
: -1;
371 /* Insert {E,RANK} into the operand rank hashtable. */
374 insert_operand_rank (tree e
, long rank
)
376 gcc_assert (rank
> 0);
377 gcc_assert (!operand_rank
->put (e
, rank
));
380 /* Given an expression E, return the rank of the expression. */
385 /* SSA_NAME's have the rank of the expression they are the result
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
422 if (TREE_CODE (e
) == SSA_NAME
)
429 if (SSA_NAME_IS_DEFAULT_DEF (e
))
430 return find_operand_rank (e
);
432 stmt
= SSA_NAME_DEF_STMT (e
);
433 if (gimple_code (stmt
) == GIMPLE_PHI
)
434 return phi_rank (stmt
);
436 if (!is_gimple_assign (stmt
))
437 return bb_rank
[gimple_bb (stmt
)->index
];
439 /* If we already have a rank for this expression, use that. */
440 rank
= find_operand_rank (e
);
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
451 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
452 rank
= propagate_rank (rank
, op
);
454 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
456 fprintf (dump_file
, "Rank for ");
457 print_generic_expr (dump_file
, e
);
458 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e
, (rank
+ 1));
466 /* Constants, globals, etc., are rank 0 */
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 3
474 #define FLOAT_CONST_TYPE 1 << 2
475 #define OTHER_CONST_TYPE 1 << 1
477 /* Classify an invariant tree into integer, float, or other, so that
478 we can sort them to be near other constants of the same type. */
480 constant_type (tree t
)
482 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
483 return INTEGER_CONST_TYPE
;
484 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
485 return FLOAT_CONST_TYPE
;
487 return OTHER_CONST_TYPE
;
490 /* qsort comparison function to sort operand entries PA and PB by rank
491 so that the sorted array is ordered by rank in decreasing order. */
493 sort_by_operand_rank (const void *pa
, const void *pb
)
495 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
496 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
498 if (oeb
->rank
!= oea
->rank
)
499 return oeb
->rank
> oea
->rank
? 1 : -1;
501 /* It's nicer for optimize_expression if constants that are likely
502 to fold when added/multiplied/whatever are put next to each
503 other. Since all constants have rank 0, order them by type. */
506 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
507 return constant_type (oeb
->op
) - constant_type (oea
->op
);
509 /* To make sorting result stable, we use unique IDs to determine
511 return oeb
->id
> oea
->id
? 1 : -1;
514 if (TREE_CODE (oea
->op
) != SSA_NAME
)
516 if (TREE_CODE (oeb
->op
) != SSA_NAME
)
517 return oeb
->id
> oea
->id
? 1 : -1;
521 else if (TREE_CODE (oeb
->op
) != SSA_NAME
)
524 /* Lastly, make sure the versions that are the same go next to each
526 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
528 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
529 versions of removed SSA_NAMEs, so if possible, prefer to sort
530 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
532 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
533 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
534 basic_block bba
= gimple_bb (stmta
);
535 basic_block bbb
= gimple_bb (stmtb
);
538 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
539 but the other might not. */
544 /* If neither is, compare bb_rank. */
545 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
546 return (bb_rank
[bbb
->index
] >> 16) - (bb_rank
[bba
->index
] >> 16);
549 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
550 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
554 return SSA_NAME_VERSION (oeb
->op
) > SSA_NAME_VERSION (oea
->op
) ? 1 : -1;
557 return oeb
->id
> oea
->id
? 1 : -1;
560 /* Add an operand entry to *OPS for the tree operand OP. */
563 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
, gimple
*stmt_to_insert
= NULL
)
565 operand_entry
*oe
= operand_entry_pool
.allocate ();
568 oe
->rank
= get_rank (op
);
569 oe
->id
= next_operand_entry_id
++;
571 oe
->stmt_to_insert
= stmt_to_insert
;
575 /* Add an operand entry to *OPS for the tree operand OP with repeat
579 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
580 HOST_WIDE_INT repeat
)
582 operand_entry
*oe
= operand_entry_pool
.allocate ();
585 oe
->rank
= get_rank (op
);
586 oe
->id
= next_operand_entry_id
++;
588 oe
->stmt_to_insert
= NULL
;
591 reassociate_stats
.pows_encountered
++;
594 /* Return true if STMT is reassociable operation containing a binary
595 operation with tree code CODE, and is inside LOOP. */
598 is_reassociable_op (gimple
*stmt
, enum tree_code code
, struct loop
*loop
)
600 basic_block bb
= gimple_bb (stmt
);
602 if (gimple_bb (stmt
) == NULL
)
605 if (!flow_bb_inside_loop_p (loop
, bb
))
608 if (is_gimple_assign (stmt
)
609 && gimple_assign_rhs_code (stmt
) == code
610 && has_single_use (gimple_assign_lhs (stmt
)))
612 tree rhs1
= gimple_assign_rhs1 (stmt
);
613 tree rhs2
= gimple_assign_rhs1 (stmt
);
614 if (TREE_CODE (rhs1
) == SSA_NAME
615 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1
))
618 && TREE_CODE (rhs2
) == SSA_NAME
619 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2
))
628 /* Return true if STMT is a nop-conversion. */
631 gimple_nop_conversion_p (gimple
*stmt
)
633 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
635 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
636 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
637 TREE_TYPE (gimple_assign_rhs1 (ass
))))
643 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
644 operand of the negate operation. Otherwise, return NULL. */
647 get_unary_op (tree name
, enum tree_code opcode
)
649 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
651 /* Look through nop conversions (sign changes). */
652 if (gimple_nop_conversion_p (stmt
)
653 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
654 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
656 if (!is_gimple_assign (stmt
))
659 if (gimple_assign_rhs_code (stmt
) == opcode
)
660 return gimple_assign_rhs1 (stmt
);
664 /* Return true if OP1 and OP2 have the same value if casted to either type. */
667 ops_equal_values_p (tree op1
, tree op2
)
673 if (TREE_CODE (op1
) == SSA_NAME
)
675 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
676 if (gimple_nop_conversion_p (stmt
))
678 op1
= gimple_assign_rhs1 (stmt
);
684 if (TREE_CODE (op2
) == SSA_NAME
)
686 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
687 if (gimple_nop_conversion_p (stmt
))
689 op2
= gimple_assign_rhs1 (stmt
);
700 /* If CURR and LAST are a pair of ops that OPCODE allows us to
701 eliminate through equivalences, do so, remove them from OPS, and
702 return true. Otherwise, return false. */
705 eliminate_duplicate_pair (enum tree_code opcode
,
706 vec
<operand_entry
*> *ops
,
713 /* If we have two of the same op, and the opcode is & |, min, or max,
714 we can eliminate one of them.
715 If we have two of the same op, and the opcode is ^, we can
716 eliminate both of them. */
718 if (last
&& last
->op
== curr
->op
)
726 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
728 fprintf (dump_file
, "Equivalence: ");
729 print_generic_expr (dump_file
, curr
->op
);
730 fprintf (dump_file
, " [&|minmax] ");
731 print_generic_expr (dump_file
, last
->op
);
732 fprintf (dump_file
, " -> ");
733 print_generic_stmt (dump_file
, last
->op
);
736 ops
->ordered_remove (i
);
737 reassociate_stats
.ops_eliminated
++;
742 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
744 fprintf (dump_file
, "Equivalence: ");
745 print_generic_expr (dump_file
, curr
->op
);
746 fprintf (dump_file
, " ^ ");
747 print_generic_expr (dump_file
, last
->op
);
748 fprintf (dump_file
, " -> nothing\n");
751 reassociate_stats
.ops_eliminated
+= 2;
753 if (ops
->length () == 2)
756 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
761 ops
->ordered_remove (i
-1);
762 ops
->ordered_remove (i
-1);
774 static vec
<tree
> plus_negates
;
776 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
777 expression, look in OPS for a corresponding positive operation to cancel
778 it out. If we find one, remove the other from OPS, replace
779 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
783 eliminate_plus_minus_pair (enum tree_code opcode
,
784 vec
<operand_entry
*> *ops
,
785 unsigned int currindex
,
793 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
796 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
797 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
798 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
801 /* Any non-negated version will have a rank that is one less than
802 the current rank. So once we hit those ranks, if we don't find
805 for (i
= currindex
+ 1;
806 ops
->iterate (i
, &oe
)
807 && oe
->rank
>= curr
->rank
- 1 ;
811 && ops_equal_values_p (oe
->op
, negateop
))
813 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
815 fprintf (dump_file
, "Equivalence: ");
816 print_generic_expr (dump_file
, negateop
);
817 fprintf (dump_file
, " + -");
818 print_generic_expr (dump_file
, oe
->op
);
819 fprintf (dump_file
, " -> 0\n");
822 ops
->ordered_remove (i
);
823 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
824 ops
->ordered_remove (currindex
);
825 reassociate_stats
.ops_eliminated
++;
830 && ops_equal_values_p (oe
->op
, notop
))
832 tree op_type
= TREE_TYPE (oe
->op
);
834 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
836 fprintf (dump_file
, "Equivalence: ");
837 print_generic_expr (dump_file
, notop
);
838 fprintf (dump_file
, " + ~");
839 print_generic_expr (dump_file
, oe
->op
);
840 fprintf (dump_file
, " -> -1\n");
843 ops
->ordered_remove (i
);
844 add_to_ops_vec (ops
, build_all_ones_cst (op_type
));
845 ops
->ordered_remove (currindex
);
846 reassociate_stats
.ops_eliminated
++;
852 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
853 save it for later inspection in repropagate_negates(). */
854 if (negateop
!= NULL_TREE
855 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
856 plus_negates
.safe_push (curr
->op
);
861 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
862 bitwise not expression, look in OPS for a corresponding operand to
863 cancel it out. If we find one, remove the other from OPS, replace
864 OPS[CURRINDEX] with 0, and return true. Otherwise, return
868 eliminate_not_pairs (enum tree_code opcode
,
869 vec
<operand_entry
*> *ops
,
870 unsigned int currindex
,
877 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
878 || TREE_CODE (curr
->op
) != SSA_NAME
)
881 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
882 if (notop
== NULL_TREE
)
885 /* Any non-not version will have a rank that is one less than
886 the current rank. So once we hit those ranks, if we don't find
889 for (i
= currindex
+ 1;
890 ops
->iterate (i
, &oe
)
891 && oe
->rank
>= curr
->rank
- 1;
896 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
898 fprintf (dump_file
, "Equivalence: ");
899 print_generic_expr (dump_file
, notop
);
900 if (opcode
== BIT_AND_EXPR
)
901 fprintf (dump_file
, " & ~");
902 else if (opcode
== BIT_IOR_EXPR
)
903 fprintf (dump_file
, " | ~");
904 print_generic_expr (dump_file
, oe
->op
);
905 if (opcode
== BIT_AND_EXPR
)
906 fprintf (dump_file
, " -> 0\n");
907 else if (opcode
== BIT_IOR_EXPR
)
908 fprintf (dump_file
, " -> -1\n");
911 if (opcode
== BIT_AND_EXPR
)
912 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
913 else if (opcode
== BIT_IOR_EXPR
)
914 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
916 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
918 ops
->quick_push (oe
);
926 /* Use constant value that may be present in OPS to try to eliminate
927 operands. Note that this function is only really used when we've
928 eliminated ops for other reasons, or merged constants. Across
929 single statements, fold already does all of this, plus more. There
930 is little point in duplicating logic, so I've only included the
931 identities that I could ever construct testcases to trigger. */
934 eliminate_using_constants (enum tree_code opcode
,
935 vec
<operand_entry
*> *ops
)
937 operand_entry
*oelast
= ops
->last ();
938 tree type
= TREE_TYPE (oelast
->op
);
940 if (oelast
->rank
== 0
941 && (ANY_INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
946 if (integer_zerop (oelast
->op
))
948 if (ops
->length () != 1)
950 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
951 fprintf (dump_file
, "Found & 0, removing all other ops\n");
953 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
956 ops
->quick_push (oelast
);
960 else if (integer_all_onesp (oelast
->op
))
962 if (ops
->length () != 1)
964 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
965 fprintf (dump_file
, "Found & -1, removing\n");
967 reassociate_stats
.ops_eliminated
++;
972 if (integer_all_onesp (oelast
->op
))
974 if (ops
->length () != 1)
976 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
977 fprintf (dump_file
, "Found | -1, removing all other ops\n");
979 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
982 ops
->quick_push (oelast
);
986 else if (integer_zerop (oelast
->op
))
988 if (ops
->length () != 1)
990 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
991 fprintf (dump_file
, "Found | 0, removing\n");
993 reassociate_stats
.ops_eliminated
++;
998 if (integer_zerop (oelast
->op
)
999 || (FLOAT_TYPE_P (type
)
1000 && !HONOR_NANS (type
)
1001 && !HONOR_SIGNED_ZEROS (type
)
1002 && real_zerop (oelast
->op
)))
1004 if (ops
->length () != 1)
1006 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1007 fprintf (dump_file
, "Found * 0, removing all other ops\n");
1009 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1011 ops
->quick_push (oelast
);
1015 else if (integer_onep (oelast
->op
)
1016 || (FLOAT_TYPE_P (type
)
1017 && !HONOR_SNANS (type
)
1018 && real_onep (oelast
->op
)))
1020 if (ops
->length () != 1)
1022 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1023 fprintf (dump_file
, "Found * 1, removing\n");
1025 reassociate_stats
.ops_eliminated
++;
1033 if (integer_zerop (oelast
->op
)
1034 || (FLOAT_TYPE_P (type
)
1035 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1036 && fold_real_zero_addition_p (type
, oelast
->op
,
1037 opcode
== MINUS_EXPR
)))
1039 if (ops
->length () != 1)
1041 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1042 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1044 reassociate_stats
.ops_eliminated
++;
1056 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1059 /* Structure for tracking and counting operands. */
1063 enum tree_code oecode
;
1068 /* The heap for the oecount hashtable and the sorted list of operands. */
1069 static vec
<oecount
> cvec
;
1072 /* Oecount hashtable helpers. */
1074 struct oecount_hasher
: int_hash
<int, 0, 1>
1076 static inline hashval_t
hash (int);
1077 static inline bool equal (int, int);
1080 /* Hash function for oecount. */
1083 oecount_hasher::hash (int p
)
1085 const oecount
*c
= &cvec
[p
- 42];
1086 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1089 /* Comparison function for oecount. */
1092 oecount_hasher::equal (int p1
, int p2
)
1094 const oecount
*c1
= &cvec
[p1
- 42];
1095 const oecount
*c2
= &cvec
[p2
- 42];
1096 return c1
->oecode
== c2
->oecode
&& c1
->op
== c2
->op
;
1099 /* Comparison function for qsort sorting oecount elements by count. */
1102 oecount_cmp (const void *p1
, const void *p2
)
1104 const oecount
*c1
= (const oecount
*)p1
;
1105 const oecount
*c2
= (const oecount
*)p2
;
1106 if (c1
->cnt
!= c2
->cnt
)
1107 return c1
->cnt
> c2
->cnt
? 1 : -1;
1109 /* If counts are identical, use unique IDs to stabilize qsort. */
1110 return c1
->id
> c2
->id
? 1 : -1;
1113 /* Return TRUE iff STMT represents a builtin call that raises OP
1114 to some exponent. */
1117 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1119 if (!is_gimple_call (stmt
))
1122 switch (gimple_call_combined_fn (stmt
))
1126 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1133 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1134 in place and return the result. Assumes that stmt_is_power_of_op
1135 was previously called for STMT and returned TRUE. */
1137 static HOST_WIDE_INT
1138 decrement_power (gimple
*stmt
)
1140 REAL_VALUE_TYPE c
, cint
;
1141 HOST_WIDE_INT power
;
1144 switch (gimple_call_combined_fn (stmt
))
1147 arg1
= gimple_call_arg (stmt
, 1);
1148 c
= TREE_REAL_CST (arg1
);
1149 power
= real_to_integer (&c
) - 1;
1150 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1151 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1155 arg1
= gimple_call_arg (stmt
, 1);
1156 power
= TREE_INT_CST_LOW (arg1
) - 1;
1157 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1165 /* Replace SSA defined by STMT and replace all its uses with new
1166 SSA. Also return the new SSA. */
1169 make_new_ssa_for_def (gimple
*stmt
, enum tree_code opcode
, tree op
)
1173 imm_use_iterator iter
;
1174 tree new_lhs
, new_debug_lhs
= NULL_TREE
;
1175 tree lhs
= gimple_get_lhs (stmt
);
1177 new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
1178 gimple_set_lhs (stmt
, new_lhs
);
1180 /* Also need to update GIMPLE_DEBUGs. */
1181 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
1183 tree repl
= new_lhs
;
1184 if (is_gimple_debug (use_stmt
))
1186 if (new_debug_lhs
== NULL_TREE
)
1188 new_debug_lhs
= make_node (DEBUG_EXPR_DECL
);
1190 = gimple_build_debug_bind (new_debug_lhs
,
1191 build2 (opcode
, TREE_TYPE (lhs
),
1194 DECL_ARTIFICIAL (new_debug_lhs
) = 1;
1195 TREE_TYPE (new_debug_lhs
) = TREE_TYPE (lhs
);
1196 SET_DECL_MODE (new_debug_lhs
, TYPE_MODE (TREE_TYPE (lhs
)));
1197 gimple_set_uid (def_temp
, gimple_uid (stmt
));
1198 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1199 gsi_insert_after (&gsi
, def_temp
, GSI_SAME_STMT
);
1201 repl
= new_debug_lhs
;
1203 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1204 SET_USE (use
, repl
);
1205 update_stmt (use_stmt
);
1210 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1211 uses with new SSAs. Also do this for the stmt that defines DEF
1212 if *DEF is not OP. */
1215 make_new_ssa_for_all_defs (tree
*def
, enum tree_code opcode
, tree op
,
1216 vec
<gimple
*> &stmts_to_fix
)
1222 && TREE_CODE (*def
) == SSA_NAME
1223 && (stmt
= SSA_NAME_DEF_STMT (*def
))
1224 && gimple_code (stmt
) != GIMPLE_NOP
)
1225 *def
= make_new_ssa_for_def (stmt
, opcode
, op
);
1227 FOR_EACH_VEC_ELT (stmts_to_fix
, i
, stmt
)
1228 make_new_ssa_for_def (stmt
, opcode
, op
);
1231 /* Find the single immediate use of STMT's LHS, and replace it
1232 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1233 replace *DEF with OP as well. */
1236 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1241 gimple_stmt_iterator gsi
;
1243 if (is_gimple_call (stmt
))
1244 lhs
= gimple_call_lhs (stmt
);
1246 lhs
= gimple_assign_lhs (stmt
);
1248 gcc_assert (has_single_use (lhs
));
1249 single_imm_use (lhs
, &use
, &use_stmt
);
1253 if (TREE_CODE (op
) != SSA_NAME
)
1254 update_stmt (use_stmt
);
1255 gsi
= gsi_for_stmt (stmt
);
1256 unlink_stmt_vdef (stmt
);
1257 reassoc_remove_stmt (&gsi
);
1258 release_defs (stmt
);
1261 /* Walks the linear chain with result *DEF searching for an operation
1262 with operand OP and code OPCODE removing that from the chain. *DEF
1263 is updated if there is only one operand but no operation left. */
1266 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1268 tree orig_def
= *def
;
1269 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1270 /* PR72835 - Record the stmt chain that has to be updated such that
1271 we dont use the same LHS when the values computed are different. */
1272 auto_vec
<gimple
*, 64> stmts_to_fix
;
1278 if (opcode
== MULT_EXPR
)
1280 if (stmt_is_power_of_op (stmt
, op
))
1282 if (decrement_power (stmt
) == 1)
1284 if (stmts_to_fix
.length () > 0)
1285 stmts_to_fix
.pop ();
1286 propagate_op_to_single_use (op
, stmt
, def
);
1290 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
)
1292 if (gimple_assign_rhs1 (stmt
) == op
)
1294 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1295 if (stmts_to_fix
.length () > 0)
1296 stmts_to_fix
.pop ();
1297 propagate_op_to_single_use (cst
, stmt
, def
);
1300 else if (integer_minus_onep (op
)
1301 || real_minus_onep (op
))
1303 gimple_assign_set_rhs_code
1304 (stmt
, TREE_CODE (gimple_assign_rhs1 (stmt
)));
1310 name
= gimple_assign_rhs1 (stmt
);
1312 /* If this is the operation we look for and one of the operands
1313 is ours simply propagate the other operand into the stmts
1315 if (gimple_assign_rhs_code (stmt
) == opcode
1317 || gimple_assign_rhs2 (stmt
) == op
))
1320 name
= gimple_assign_rhs2 (stmt
);
1321 if (stmts_to_fix
.length () > 0)
1322 stmts_to_fix
.pop ();
1323 propagate_op_to_single_use (name
, stmt
, def
);
1327 /* We might have a multiply of two __builtin_pow* calls, and
1328 the operand might be hiding in the rightmost one. Likewise
1329 this can happen for a negate. */
1330 if (opcode
== MULT_EXPR
1331 && gimple_assign_rhs_code (stmt
) == opcode
1332 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1333 && has_single_use (gimple_assign_rhs2 (stmt
)))
1335 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1336 if (stmt_is_power_of_op (stmt2
, op
))
1338 if (decrement_power (stmt2
) == 1)
1339 propagate_op_to_single_use (op
, stmt2
, def
);
1341 stmts_to_fix
.safe_push (stmt2
);
1344 else if (is_gimple_assign (stmt2
)
1345 && gimple_assign_rhs_code (stmt2
) == NEGATE_EXPR
)
1347 if (gimple_assign_rhs1 (stmt2
) == op
)
1349 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1350 propagate_op_to_single_use (cst
, stmt2
, def
);
1353 else if (integer_minus_onep (op
)
1354 || real_minus_onep (op
))
1356 stmts_to_fix
.safe_push (stmt2
);
1357 gimple_assign_set_rhs_code
1358 (stmt2
, TREE_CODE (gimple_assign_rhs1 (stmt2
)));
1364 /* Continue walking the chain. */
1365 gcc_assert (name
!= op
1366 && TREE_CODE (name
) == SSA_NAME
);
1367 stmt
= SSA_NAME_DEF_STMT (name
);
1368 stmts_to_fix
.safe_push (stmt
);
1372 if (stmts_to_fix
.length () > 0 || *def
== orig_def
)
1373 make_new_ssa_for_all_defs (def
, opcode
, op
, stmts_to_fix
);
1376 /* Returns true if statement S1 dominates statement S2. Like
1377 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1380 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1382 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1384 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1385 SSA_NAME. Assume it lives at the beginning of function and
1386 thus dominates everything. */
1387 if (!bb1
|| s1
== s2
)
1390 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1396 /* PHIs in the same basic block are assumed to be
1397 executed all in parallel, if only one stmt is a PHI,
1398 it dominates the other stmt in the same basic block. */
1399 if (gimple_code (s1
) == GIMPLE_PHI
)
1402 if (gimple_code (s2
) == GIMPLE_PHI
)
1405 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1407 if (gimple_uid (s1
) < gimple_uid (s2
))
1410 if (gimple_uid (s1
) > gimple_uid (s2
))
1413 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1414 unsigned int uid
= gimple_uid (s1
);
1415 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1417 gimple
*s
= gsi_stmt (gsi
);
1418 if (gimple_uid (s
) != uid
)
1427 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1430 /* Insert STMT after INSERT_POINT. */
1433 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1435 gimple_stmt_iterator gsi
;
1438 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1439 bb
= gimple_bb (insert_point
);
1440 else if (!stmt_ends_bb_p (insert_point
))
1442 gsi
= gsi_for_stmt (insert_point
);
1443 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1444 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1448 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1449 thus if it must end a basic block, it should be a call that can
1450 throw, or some assignment that can throw. If it throws, the LHS
1451 of it will not be initialized though, so only valid places using
1452 the SSA_NAME should be dominated by the fallthru edge. */
1453 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1454 gsi
= gsi_after_labels (bb
);
1455 if (gsi_end_p (gsi
))
1457 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1458 gimple_set_uid (stmt
,
1459 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1462 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1463 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1466 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1467 the result. Places the statement after the definition of either
1468 OP1 or OP2. Returns the new statement. */
1471 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1473 gimple
*op1def
= NULL
, *op2def
= NULL
;
1474 gimple_stmt_iterator gsi
;
1478 /* Create the addition statement. */
1479 op
= make_ssa_name (type
);
1480 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1482 /* Find an insertion place and insert. */
1483 if (TREE_CODE (op1
) == SSA_NAME
)
1484 op1def
= SSA_NAME_DEF_STMT (op1
);
1485 if (TREE_CODE (op2
) == SSA_NAME
)
1486 op2def
= SSA_NAME_DEF_STMT (op2
);
1487 if ((!op1def
|| gimple_nop_p (op1def
))
1488 && (!op2def
|| gimple_nop_p (op2def
)))
1490 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1491 if (gsi_end_p (gsi
))
1493 gimple_stmt_iterator gsi2
1494 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1495 gimple_set_uid (sum
,
1496 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1499 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1500 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1504 gimple
*insert_point
;
1505 if ((!op1def
|| gimple_nop_p (op1def
))
1506 || (op2def
&& !gimple_nop_p (op2def
)
1507 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1508 insert_point
= op2def
;
1510 insert_point
= op1def
;
1511 insert_stmt_after (sum
, insert_point
);
1518 /* Perform un-distribution of divisions and multiplications.
1519 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1520 to (A + B) / X for real X.
1522 The algorithm is organized as follows.
1524 - First we walk the addition chain *OPS looking for summands that
1525 are defined by a multiplication or a real division. This results
1526 in the candidates bitmap with relevant indices into *OPS.
1528 - Second we build the chains of multiplications or divisions for
1529 these candidates, counting the number of occurrences of (operand, code)
1530 pairs in all of the candidates chains.
1532 - Third we sort the (operand, code) pairs by number of occurrence and
1533 process them starting with the pair with the most uses.
1535 * For each such pair we walk the candidates again to build a
1536 second candidate bitmap noting all multiplication/division chains
1537 that have at least one occurrence of (operand, code).
1539 * We build an alternate addition chain only covering these
1540 candidates with one (operand, code) operation removed from their
1541 multiplication/division chain.
1543 * The first candidate gets replaced by the alternate addition chain
1544 multiplied/divided by the operand.
1546 * All candidate chains get disabled for further processing and
1547 processing of (operand, code) pairs continues.
1549 The alternate addition chains built are re-processed by the main
1550 reassociation algorithm which allows optimizing a * x * y + b * y * x
1551 to (a + b ) * x * y in one invocation of the reassociation pass. */
1554 undistribute_ops_list (enum tree_code opcode
,
1555 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1557 unsigned int length
= ops
->length ();
1560 unsigned nr_candidates
, nr_candidates2
;
1561 sbitmap_iterator sbi0
;
1562 vec
<operand_entry
*> *subops
;
1563 bool changed
= false;
1564 unsigned int next_oecount_id
= 0;
1567 || opcode
!= PLUS_EXPR
)
1570 /* Build a list of candidates to process. */
1571 auto_sbitmap
candidates (length
);
1572 bitmap_clear (candidates
);
1574 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1576 enum tree_code dcode
;
1579 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1581 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1582 if (!is_gimple_assign (oe1def
))
1584 dcode
= gimple_assign_rhs_code (oe1def
);
1585 if ((dcode
!= MULT_EXPR
1586 && dcode
!= RDIV_EXPR
)
1587 || !is_reassociable_op (oe1def
, dcode
, loop
))
1590 bitmap_set_bit (candidates
, i
);
1594 if (nr_candidates
< 2)
1597 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1599 fprintf (dump_file
, "searching for un-distribute opportunities ");
1600 print_generic_expr (dump_file
,
1601 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1602 fprintf (dump_file
, " %d\n", nr_candidates
);
1605 /* Build linearized sub-operand lists and the counting table. */
1608 hash_table
<oecount_hasher
> ctable (15);
1610 /* ??? Macro arguments cannot have multi-argument template types in
1611 them. This typedef is needed to workaround that limitation. */
1612 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1613 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1614 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1617 enum tree_code oecode
;
1620 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1621 oecode
= gimple_assign_rhs_code (oedef
);
1622 linearize_expr_tree (&subops
[i
], oedef
,
1623 associative_tree_code (oecode
), false);
1625 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1632 c
.id
= next_oecount_id
++;
1635 idx
= cvec
.length () + 41;
1636 slot
= ctable
.find_slot (idx
, INSERT
);
1644 cvec
[*slot
- 42].cnt
++;
1649 /* Sort the counting table. */
1650 cvec
.qsort (oecount_cmp
);
1652 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1655 fprintf (dump_file
, "Candidates:\n");
1656 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1658 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1659 c
->oecode
== MULT_EXPR
1660 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1661 print_generic_expr (dump_file
, c
->op
);
1662 fprintf (dump_file
, "\n");
1666 /* Process the (operand, code) pairs in order of most occurrence. */
1667 auto_sbitmap
candidates2 (length
);
1668 while (!cvec
.is_empty ())
1670 oecount
*c
= &cvec
.last ();
1674 /* Now collect the operands in the outer chain that contain
1675 the common operand in their inner chain. */
1676 bitmap_clear (candidates2
);
1678 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1681 enum tree_code oecode
;
1683 tree op
= (*ops
)[i
]->op
;
1685 /* If we undistributed in this chain already this may be
1687 if (TREE_CODE (op
) != SSA_NAME
)
1690 oedef
= SSA_NAME_DEF_STMT (op
);
1691 oecode
= gimple_assign_rhs_code (oedef
);
1692 if (oecode
!= c
->oecode
)
1695 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1697 if (oe1
->op
== c
->op
)
1699 bitmap_set_bit (candidates2
, i
);
1706 if (nr_candidates2
>= 2)
1708 operand_entry
*oe1
, *oe2
;
1710 int first
= bitmap_first_set_bit (candidates2
);
1712 /* Build the new addition chain. */
1713 oe1
= (*ops
)[first
];
1714 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1716 fprintf (dump_file
, "Building (");
1717 print_generic_expr (dump_file
, oe1
->op
);
1719 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1720 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1724 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1726 fprintf (dump_file
, " + ");
1727 print_generic_expr (dump_file
, oe2
->op
);
1729 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1730 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1731 oe1
->op
, oe2
->op
, opcode
);
1732 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1734 oe1
->op
= gimple_get_lhs (sum
);
1737 /* Apply the multiplication/division. */
1738 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1739 oe1
->op
, c
->op
, c
->oecode
);
1740 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1742 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1743 print_generic_expr (dump_file
, c
->op
);
1744 fprintf (dump_file
, "\n");
1747 /* Record it in the addition chain and disable further
1748 undistribution with this op. */
1749 oe1
->op
= gimple_assign_lhs (prod
);
1750 oe1
->rank
= get_rank (oe1
->op
);
1751 subops
[first
].release ();
1759 for (i
= 0; i
< ops
->length (); ++i
)
1760 subops
[i
].release ();
1767 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1768 expression, examine the other OPS to see if any of them are comparisons
1769 of the same values, which we may be able to combine or eliminate.
1770 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1773 eliminate_redundant_comparison (enum tree_code opcode
,
1774 vec
<operand_entry
*> *ops
,
1775 unsigned int currindex
,
1776 operand_entry
*curr
)
1779 enum tree_code lcode
, rcode
;
1780 gimple
*def1
, *def2
;
1784 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1787 /* Check that CURR is a comparison. */
1788 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1790 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1791 if (!is_gimple_assign (def1
))
1793 lcode
= gimple_assign_rhs_code (def1
);
1794 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1796 op1
= gimple_assign_rhs1 (def1
);
1797 op2
= gimple_assign_rhs2 (def1
);
1799 /* Now look for a similar comparison in the remaining OPS. */
1800 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1804 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1806 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1807 if (!is_gimple_assign (def2
))
1809 rcode
= gimple_assign_rhs_code (def2
);
1810 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1813 /* If we got here, we have a match. See if we can combine the
1815 if (opcode
== BIT_IOR_EXPR
)
1816 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1817 rcode
, gimple_assign_rhs1 (def2
),
1818 gimple_assign_rhs2 (def2
));
1820 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1821 rcode
, gimple_assign_rhs1 (def2
),
1822 gimple_assign_rhs2 (def2
));
1826 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1827 always give us a boolean_type_node value back. If the original
1828 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1829 we need to convert. */
1830 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1831 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1833 if (TREE_CODE (t
) != INTEGER_CST
1834 && !operand_equal_p (t
, curr
->op
, 0))
1836 enum tree_code subcode
;
1837 tree newop1
, newop2
;
1838 if (!COMPARISON_CLASS_P (t
))
1840 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1841 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1842 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1843 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1847 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1849 fprintf (dump_file
, "Equivalence: ");
1850 print_generic_expr (dump_file
, curr
->op
);
1851 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1852 print_generic_expr (dump_file
, oe
->op
);
1853 fprintf (dump_file
, " -> ");
1854 print_generic_expr (dump_file
, t
);
1855 fprintf (dump_file
, "\n");
1858 /* Now we can delete oe, as it has been subsumed by the new combined
1860 ops
->ordered_remove (i
);
1861 reassociate_stats
.ops_eliminated
++;
1863 /* If t is the same as curr->op, we're done. Otherwise we must
1864 replace curr->op with t. Special case is if we got a constant
1865 back, in which case we add it to the end instead of in place of
1866 the current entry. */
1867 if (TREE_CODE (t
) == INTEGER_CST
)
1869 ops
->ordered_remove (currindex
);
1870 add_to_ops_vec (ops
, t
);
1872 else if (!operand_equal_p (t
, curr
->op
, 0))
1875 enum tree_code subcode
;
1878 gcc_assert (COMPARISON_CLASS_P (t
));
1879 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1880 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1881 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1882 gcc_checking_assert (is_gimple_val (newop1
)
1883 && is_gimple_val (newop2
));
1884 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1885 curr
->op
= gimple_get_lhs (sum
);
1894 /* Transform repeated addition of same values into multiply with
1897 transform_add_to_multiply (vec
<operand_entry
*> *ops
)
1900 tree op
= NULL_TREE
;
1902 int i
, start
= -1, end
= 0, count
= 0;
1903 auto_vec
<std::pair
<int, int> > indxs
;
1904 bool changed
= false;
1906 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
1907 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
1908 || !flag_unsafe_math_optimizations
))
1911 /* Look for repeated operands. */
1912 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
1920 else if (operand_equal_p (oe
->op
, op
, 0))
1928 indxs
.safe_push (std::make_pair (start
, end
));
1936 indxs
.safe_push (std::make_pair (start
, end
));
1938 for (j
= indxs
.length () - 1; j
>= 0; --j
)
1940 /* Convert repeated operand addition to multiplication. */
1941 start
= indxs
[j
].first
;
1942 end
= indxs
[j
].second
;
1943 op
= (*ops
)[start
]->op
;
1944 count
= end
- start
+ 1;
1945 for (i
= end
; i
>= start
; --i
)
1946 ops
->unordered_remove (i
);
1947 tree tmp
= make_ssa_name (TREE_TYPE (op
));
1948 tree cst
= build_int_cst (integer_type_node
, count
);
1950 = gimple_build_assign (tmp
, MULT_EXPR
,
1951 op
, fold_convert (TREE_TYPE (op
), cst
));
1952 gimple_set_visited (mul_stmt
, true);
1953 add_to_ops_vec (ops
, tmp
, mul_stmt
);
1961 /* Perform various identities and other optimizations on the list of
1962 operand entries, stored in OPS. The tree code for the binary
1963 operation between all the operands is OPCODE. */
1966 optimize_ops_list (enum tree_code opcode
,
1967 vec
<operand_entry
*> *ops
)
1969 unsigned int length
= ops
->length ();
1972 operand_entry
*oelast
= NULL
;
1973 bool iterate
= false;
1978 oelast
= ops
->last ();
1980 /* If the last two are constants, pop the constants off, merge them
1981 and try the next two. */
1982 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1984 operand_entry
*oelm1
= (*ops
)[length
- 2];
1986 if (oelm1
->rank
== 0
1987 && is_gimple_min_invariant (oelm1
->op
)
1988 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1989 TREE_TYPE (oelast
->op
)))
1991 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1992 oelm1
->op
, oelast
->op
);
1994 if (folded
&& is_gimple_min_invariant (folded
))
1996 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1997 fprintf (dump_file
, "Merging constants\n");
2002 add_to_ops_vec (ops
, folded
);
2003 reassociate_stats
.constants_eliminated
++;
2005 optimize_ops_list (opcode
, ops
);
2011 eliminate_using_constants (opcode
, ops
);
2014 for (i
= 0; ops
->iterate (i
, &oe
);)
2018 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
2020 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
2021 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
2022 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
2034 length
= ops
->length ();
2035 oelast
= ops
->last ();
2038 optimize_ops_list (opcode
, ops
);
2041 /* The following functions are subroutines to optimize_range_tests and allow
2042 it to try to change a logical combination of comparisons into a range
2046 X == 2 || X == 5 || X == 3 || X == 4
2050 (unsigned) (X - 2) <= 3
2052 For more information see comments above fold_test_range in fold-const.c,
2053 this implementation is for GIMPLE. */
2061 bool strict_overflow_p
;
2062 unsigned int idx
, next
;
2065 /* This is similar to make_range in fold-const.c, but on top of
2066 GIMPLE instead of trees. If EXP is non-NULL, it should be
2067 an SSA_NAME and STMT argument is ignored, otherwise STMT
2068 argument should be a GIMPLE_COND. */
2071 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
2075 bool is_bool
, strict_overflow_p
;
2079 r
->strict_overflow_p
= false;
2081 r
->high
= NULL_TREE
;
2082 if (exp
!= NULL_TREE
2083 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
2086 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2087 and see if we can refine the range. Some of the cases below may not
2088 happen, but it doesn't seem worth worrying about this. We "continue"
2089 the outer loop when we've changed something; otherwise we "break"
2090 the switch, which will "break" the while. */
2091 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
2094 strict_overflow_p
= false;
2096 if (exp
== NULL_TREE
)
2098 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
2100 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
2105 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
2110 enum tree_code code
;
2111 tree arg0
, arg1
, exp_type
;
2115 if (exp
!= NULL_TREE
)
2117 if (TREE_CODE (exp
) != SSA_NAME
2118 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
2121 stmt
= SSA_NAME_DEF_STMT (exp
);
2122 if (!is_gimple_assign (stmt
))
2125 code
= gimple_assign_rhs_code (stmt
);
2126 arg0
= gimple_assign_rhs1 (stmt
);
2127 arg1
= gimple_assign_rhs2 (stmt
);
2128 exp_type
= TREE_TYPE (exp
);
2132 code
= gimple_cond_code (stmt
);
2133 arg0
= gimple_cond_lhs (stmt
);
2134 arg1
= gimple_cond_rhs (stmt
);
2135 exp_type
= boolean_type_node
;
2138 if (TREE_CODE (arg0
) != SSA_NAME
)
2140 loc
= gimple_location (stmt
);
2144 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
2145 /* Ensure the range is either +[-,0], +[0,0],
2146 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2147 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2148 or similar expression of unconditional true or
2149 false, it should not be negated. */
2150 && ((high
&& integer_zerop (high
))
2151 || (low
&& integer_onep (low
))))
2164 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
2166 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
2171 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2186 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2188 &strict_overflow_p
);
2189 if (nexp
!= NULL_TREE
)
2192 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2205 r
->strict_overflow_p
= strict_overflow_p
;
2209 /* Comparison function for qsort. Sort entries
2210 without SSA_NAME exp first, then with SSA_NAMEs sorted
2211 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2212 by increasing ->low and if ->low is the same, by increasing
2213 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2217 range_entry_cmp (const void *a
, const void *b
)
2219 const struct range_entry
*p
= (const struct range_entry
*) a
;
2220 const struct range_entry
*q
= (const struct range_entry
*) b
;
2222 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2224 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2226 /* Group range_entries for the same SSA_NAME together. */
2227 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2229 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2231 /* If ->low is different, NULL low goes first, then by
2233 if (p
->low
!= NULL_TREE
)
2235 if (q
->low
!= NULL_TREE
)
2237 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2239 if (tem
&& integer_onep (tem
))
2241 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2243 if (tem
&& integer_onep (tem
))
2249 else if (q
->low
!= NULL_TREE
)
2251 /* If ->high is different, NULL high goes last, before that by
2253 if (p
->high
!= NULL_TREE
)
2255 if (q
->high
!= NULL_TREE
)
2257 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2259 if (tem
&& integer_onep (tem
))
2261 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2263 if (tem
&& integer_onep (tem
))
2269 else if (q
->high
!= NULL_TREE
)
2271 /* If both ranges are the same, sort below by ascending idx. */
2276 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2279 if (p
->idx
< q
->idx
)
2283 gcc_checking_assert (p
->idx
> q
->idx
);
2288 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2289 insert needed statements BEFORE or after GSI. */
2292 force_into_ssa_name (gimple_stmt_iterator
*gsi
, tree expr
, bool before
)
2294 enum gsi_iterator_update m
= before
? GSI_SAME_STMT
: GSI_CONTINUE_LINKING
;
2295 tree ret
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
, before
, m
);
2296 if (TREE_CODE (ret
) != SSA_NAME
)
2298 gimple
*g
= gimple_build_assign (make_ssa_name (TREE_TYPE (ret
)), ret
);
2300 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2302 gsi_insert_after (gsi
, g
, GSI_CONTINUE_LINKING
);
2303 ret
= gimple_assign_lhs (g
);
2308 /* Helper routine of optimize_range_test.
2309 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2310 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2311 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2312 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2313 an array of COUNT pointers to other ranges. Return
2314 true if the range merge has been successful.
2315 If OPCODE is ERROR_MARK, this is called from within
2316 maybe_optimize_range_tests and is performing inter-bb range optimization.
2317 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2321 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2322 struct range_entry
**otherrangep
,
2323 unsigned int count
, enum tree_code opcode
,
2324 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2325 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2327 operand_entry
*oe
= (*ops
)[range
->idx
];
2329 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2330 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2331 location_t loc
= gimple_location (stmt
);
2332 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2333 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2335 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2336 gimple_stmt_iterator gsi
;
2337 unsigned int i
, uid
;
2339 if (tem
== NULL_TREE
)
2342 /* If op is default def SSA_NAME, there is no place to insert the
2343 new comparison. Give up, unless we can use OP itself as the
2345 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2347 if (op
== range
->exp
2348 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2349 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2351 || (TREE_CODE (tem
) == EQ_EXPR
2352 && TREE_OPERAND (tem
, 0) == op
2353 && integer_onep (TREE_OPERAND (tem
, 1))))
2354 && opcode
!= BIT_IOR_EXPR
2355 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2364 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2365 warning_at (loc
, OPT_Wstrict_overflow
,
2366 "assuming signed overflow does not occur "
2367 "when simplifying range test");
2369 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2371 struct range_entry
*r
;
2372 fprintf (dump_file
, "Optimizing range tests ");
2373 print_generic_expr (dump_file
, range
->exp
);
2374 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2375 print_generic_expr (dump_file
, range
->low
);
2376 fprintf (dump_file
, ", ");
2377 print_generic_expr (dump_file
, range
->high
);
2378 fprintf (dump_file
, "]");
2379 for (i
= 0; i
< count
; i
++)
2386 && r
->exp
!= range
->exp
2387 && TREE_CODE (r
->exp
) == SSA_NAME
)
2389 fprintf (dump_file
, " and ");
2390 print_generic_expr (dump_file
, r
->exp
);
2393 fprintf (dump_file
, " and");
2394 fprintf (dump_file
, " %c[", r
->in_p
? '+' : '-');
2395 print_generic_expr (dump_file
, r
->low
);
2396 fprintf (dump_file
, ", ");
2397 print_generic_expr (dump_file
, r
->high
);
2398 fprintf (dump_file
, "]");
2400 fprintf (dump_file
, "\n into ");
2401 print_generic_expr (dump_file
, tem
);
2402 fprintf (dump_file
, "\n");
2405 if (opcode
== BIT_IOR_EXPR
2406 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2407 tem
= invert_truthvalue_loc (loc
, tem
);
2409 tem
= fold_convert_loc (loc
, optype
, tem
);
2412 gsi
= gsi_for_stmt (stmt
);
2413 uid
= gimple_uid (stmt
);
2421 gcc_checking_assert (tem
== op
);
2422 /* In rare cases range->exp can be equal to lhs of stmt.
2423 In that case we have to insert after the stmt rather then before
2424 it. If stmt is a PHI, insert it at the start of the basic block. */
2425 else if (op
!= range
->exp
)
2427 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2428 tem
= force_into_ssa_name (&gsi
, tem
, true);
2431 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2433 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2434 tem
= force_into_ssa_name (&gsi
, tem
, false);
2438 gsi
= gsi_after_labels (gimple_bb (stmt
));
2439 if (!gsi_end_p (gsi
))
2440 uid
= gimple_uid (gsi_stmt (gsi
));
2443 gsi
= gsi_start_bb (gimple_bb (stmt
));
2445 while (!gsi_end_p (gsi
))
2447 uid
= gimple_uid (gsi_stmt (gsi
));
2451 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2452 tem
= force_into_ssa_name (&gsi
, tem
, true);
2453 if (gsi_end_p (gsi
))
2454 gsi
= gsi_last_bb (gimple_bb (stmt
));
2458 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2459 if (gimple_uid (gsi_stmt (gsi
)))
2462 gimple_set_uid (gsi_stmt (gsi
), uid
);
2469 range
->strict_overflow_p
= false;
2471 for (i
= 0; i
< count
; i
++)
2474 range
= otherrange
+ i
;
2476 range
= otherrangep
[i
];
2477 oe
= (*ops
)[range
->idx
];
2478 /* Now change all the other range test immediate uses, so that
2479 those tests will be optimized away. */
2480 if (opcode
== ERROR_MARK
)
2483 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2484 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2486 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2487 ? boolean_false_node
: boolean_true_node
);
2490 oe
->op
= error_mark_node
;
2491 range
->exp
= NULL_TREE
;
2492 range
->low
= NULL_TREE
;
2493 range
->high
= NULL_TREE
;
2498 /* Optimize X == CST1 || X == CST2
2499 if popcount (CST1 ^ CST2) == 1 into
2500 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2501 Similarly for ranges. E.g.
2502 X != 2 && X != 3 && X != 10 && X != 11
2503 will be transformed by the previous optimization into
2504 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2505 and this loop can transform that into
2506 !(((X & ~8) - 2U) <= 1U). */
2509 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2510 tree lowi
, tree lowj
, tree highi
, tree highj
,
2511 vec
<operand_entry
*> *ops
,
2512 struct range_entry
*rangei
,
2513 struct range_entry
*rangej
)
2515 tree lowxor
, highxor
, tem
, exp
;
2516 /* Check lowi ^ lowj == highi ^ highj and
2517 popcount (lowi ^ lowj) == 1. */
2518 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2519 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2521 if (!integer_pow2p (lowxor
))
2523 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2524 if (!tree_int_cst_equal (lowxor
, highxor
))
2527 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2528 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2529 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2530 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2531 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2532 NULL
, rangei
->in_p
, lowj
, highj
,
2533 rangei
->strict_overflow_p
2534 || rangej
->strict_overflow_p
))
2539 /* Optimize X == CST1 || X == CST2
2540 if popcount (CST2 - CST1) == 1 into
2541 ((X - CST1) & ~(CST2 - CST1)) == 0.
2542 Similarly for ranges. E.g.
2543 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2544 || X == 75 || X == 45
2545 will be transformed by the previous optimization into
2546 (X - 43U) <= 3U || (X - 75U) <= 3U
2547 and this loop can transform that into
2548 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2550 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2551 tree lowi
, tree lowj
, tree highi
, tree highj
,
2552 vec
<operand_entry
*> *ops
,
2553 struct range_entry
*rangei
,
2554 struct range_entry
*rangej
)
2556 tree tem1
, tem2
, mask
;
2557 /* Check highi - lowi == highj - lowj. */
2558 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2559 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2561 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2562 if (!tree_int_cst_equal (tem1
, tem2
))
2564 /* Check popcount (lowj - lowi) == 1. */
2565 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2566 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2568 if (!integer_pow2p (tem1
))
2571 type
= unsigned_type_for (type
);
2572 tem1
= fold_convert (type
, tem1
);
2573 tem2
= fold_convert (type
, tem2
);
2574 lowi
= fold_convert (type
, lowi
);
2575 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2576 tem1
= fold_build2 (MINUS_EXPR
, type
,
2577 fold_convert (type
, rangei
->exp
), lowi
);
2578 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2579 lowj
= build_int_cst (type
, 0);
2580 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2581 NULL
, rangei
->in_p
, lowj
, tem2
,
2582 rangei
->strict_overflow_p
2583 || rangej
->strict_overflow_p
))
2588 /* It does some common checks for function optimize_range_tests_xor and
2589 optimize_range_tests_diff.
2590 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2591 Else it calls optimize_range_tests_diff. */
2594 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2595 bool optimize_xor
, vec
<operand_entry
*> *ops
,
2596 struct range_entry
*ranges
)
2599 bool any_changes
= false;
2600 for (i
= first
; i
< length
; i
++)
2602 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2604 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2606 type
= TREE_TYPE (ranges
[i
].exp
);
2607 if (!INTEGRAL_TYPE_P (type
))
2609 lowi
= ranges
[i
].low
;
2610 if (lowi
== NULL_TREE
)
2611 lowi
= TYPE_MIN_VALUE (type
);
2612 highi
= ranges
[i
].high
;
2613 if (highi
== NULL_TREE
)
2615 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2618 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2620 lowj
= ranges
[j
].low
;
2621 if (lowj
== NULL_TREE
)
2623 highj
= ranges
[j
].high
;
2624 if (highj
== NULL_TREE
)
2625 highj
= TYPE_MAX_VALUE (type
);
2626 /* Check lowj > highi. */
2627 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2629 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2632 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2634 ranges
+ i
, ranges
+ j
);
2636 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2638 ranges
+ i
, ranges
+ j
);
2649 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2650 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2651 EXP on success, NULL otherwise. */
2654 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2655 wide_int
*mask
, tree
*totallowp
)
2657 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2658 if (tem
== NULL_TREE
2659 || TREE_CODE (tem
) != INTEGER_CST
2660 || TREE_OVERFLOW (tem
)
2661 || tree_int_cst_sgn (tem
) == -1
2662 || compare_tree_int (tem
, prec
) != -1)
2665 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2666 *mask
= wi::shifted_mask (0, max
, false, prec
);
2667 if (TREE_CODE (exp
) == BIT_AND_EXPR
2668 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2670 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2671 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2672 if (wi::popcount (msk
) == 1
2673 && wi::ltu_p (msk
, prec
- max
))
2675 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2676 max
+= msk
.to_uhwi ();
2677 exp
= TREE_OPERAND (exp
, 0);
2678 if (integer_zerop (low
)
2679 && TREE_CODE (exp
) == PLUS_EXPR
2680 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2682 tree ret
= TREE_OPERAND (exp
, 0);
2685 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2686 TYPE_PRECISION (TREE_TYPE (low
))));
2687 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
2693 else if (!tree_int_cst_lt (totallow
, tbias
))
2695 bias
= wi::to_widest (tbias
);
2696 bias
-= wi::to_widest (totallow
);
2697 if (bias
>= 0 && bias
< prec
- max
)
2699 *mask
= wi::lshift (*mask
, bias
);
2707 if (!tree_int_cst_lt (totallow
, low
))
2709 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2710 if (tem
== NULL_TREE
2711 || TREE_CODE (tem
) != INTEGER_CST
2712 || TREE_OVERFLOW (tem
)
2713 || compare_tree_int (tem
, prec
- max
) == 1)
2716 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2720 /* Attempt to optimize small range tests using bit test.
2722 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2723 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2724 has been by earlier optimizations optimized into:
2725 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2726 As all the 43 through 82 range is less than 64 numbers,
2727 for 64-bit word targets optimize that into:
2728 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2731 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2732 vec
<operand_entry
*> *ops
,
2733 struct range_entry
*ranges
)
2736 bool any_changes
= false;
2737 int prec
= GET_MODE_BITSIZE (word_mode
);
2738 auto_vec
<struct range_entry
*, 64> candidates
;
2740 for (i
= first
; i
< length
- 2; i
++)
2742 tree lowi
, highi
, lowj
, highj
, type
;
2744 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2746 type
= TREE_TYPE (ranges
[i
].exp
);
2747 if (!INTEGRAL_TYPE_P (type
))
2749 lowi
= ranges
[i
].low
;
2750 if (lowi
== NULL_TREE
)
2751 lowi
= TYPE_MIN_VALUE (type
);
2752 highi
= ranges
[i
].high
;
2753 if (highi
== NULL_TREE
)
2756 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2757 highi
, &mask
, &lowi
);
2758 if (exp
== NULL_TREE
)
2760 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2761 candidates
.truncate (0);
2762 int end
= MIN (i
+ 64, length
);
2763 for (j
= i
+ 1; j
< end
; j
++)
2766 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2768 if (ranges
[j
].exp
== exp
)
2770 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2772 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2775 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2777 exp2
= TREE_OPERAND (exp2
, 0);
2787 lowj
= ranges
[j
].low
;
2788 if (lowj
== NULL_TREE
)
2790 highj
= ranges
[j
].high
;
2791 if (highj
== NULL_TREE
)
2792 highj
= TYPE_MAX_VALUE (type
);
2794 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2795 highj
, &mask2
, NULL
);
2799 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2800 candidates
.safe_push (&ranges
[j
]);
2803 /* If we need otherwise 3 or more comparisons, use a bit test. */
2804 if (candidates
.length () >= 2)
2806 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2807 wi::to_widest (lowi
)
2808 + prec
- 1 - wi::clz (mask
));
2809 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
2811 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2812 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2813 location_t loc
= gimple_location (stmt
);
2814 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2816 /* See if it isn't cheaper to pretend the minimum value of the
2817 range is 0, if maximum value is small enough.
2818 We can avoid then subtraction of the minimum value, but the
2819 mask constant could be perhaps more expensive. */
2820 if (compare_tree_int (lowi
, 0) > 0
2821 && compare_tree_int (high
, prec
) < 0)
2824 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2825 rtx reg
= gen_raw_REG (word_mode
, 10000);
2826 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2827 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2828 GEN_INT (-m
)), speed_p
);
2829 rtx r
= immed_wide_int_const (mask
, word_mode
);
2830 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2831 word_mode
, speed_p
);
2832 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2833 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2834 word_mode
, speed_p
);
2837 mask
= wi::lshift (mask
, m
);
2838 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2842 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2844 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2846 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2847 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2848 fold_convert_loc (loc
, etype
, exp
),
2849 fold_convert_loc (loc
, etype
, lowi
));
2850 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2851 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2852 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2853 build_int_cst (word_type
, 1), exp
);
2854 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2855 wide_int_to_tree (word_type
, mask
));
2856 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2857 build_zero_cst (word_type
));
2858 if (is_gimple_val (exp
))
2861 /* The shift might have undefined behavior if TEM is true,
2862 but reassociate_bb isn't prepared to have basic blocks
2863 split when it is running. So, temporarily emit a code
2864 with BIT_IOR_EXPR instead of &&, and fix it up in
2867 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2868 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2869 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2871 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2872 gimple_seq_add_seq_without_update (&seq
, seq2
);
2873 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2874 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2875 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
2876 BIT_IOR_EXPR
, tem
, exp
);
2877 gimple_set_location (g
, loc
);
2878 gimple_seq_add_stmt_without_update (&seq
, g
);
2879 exp
= gimple_assign_lhs (g
);
2880 tree val
= build_zero_cst (optype
);
2881 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2882 candidates
.length (), opcode
, ops
, exp
,
2883 seq
, false, val
, val
, strict_overflow_p
))
2886 reassoc_branch_fixups
.safe_push (tem
);
2889 gimple_seq_discard (seq
);
2895 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
2896 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
2899 optimize_range_tests_cmp_bitwise (enum tree_code opcode
, int first
, int length
,
2900 vec
<operand_entry
*> *ops
,
2901 struct range_entry
*ranges
)
2905 bool any_changes
= false;
2906 auto_vec
<int, 128> buckets
;
2907 auto_vec
<int, 32> chains
;
2908 auto_vec
<struct range_entry
*, 32> candidates
;
2910 for (i
= first
; i
< length
; i
++)
2912 if (ranges
[i
].exp
== NULL_TREE
2913 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
2915 || TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) <= 1
2916 || TREE_CODE (TREE_TYPE (ranges
[i
].exp
)) == BOOLEAN_TYPE
2917 || ranges
[i
].low
== NULL_TREE
2918 || ranges
[i
].low
!= ranges
[i
].high
)
2921 bool zero_p
= integer_zerop (ranges
[i
].low
);
2922 if (!zero_p
&& !integer_all_onesp (ranges
[i
].low
))
2925 b
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) * 2 + !zero_p
;
2926 if (buckets
.length () <= b
)
2927 buckets
.safe_grow_cleared (b
+ 1);
2928 if (chains
.length () <= (unsigned) i
)
2929 chains
.safe_grow (i
+ 1);
2930 chains
[i
] = buckets
[b
];
2934 FOR_EACH_VEC_ELT (buckets
, b
, i
)
2935 if (i
&& chains
[i
- 1])
2938 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
2940 gimple
*gk
= SSA_NAME_DEF_STMT (ranges
[k
- 1].exp
);
2941 gimple
*gj
= SSA_NAME_DEF_STMT (ranges
[j
- 1].exp
);
2942 if (reassoc_stmt_dominates_stmt_p (gk
, gj
))
2945 tree type1
= TREE_TYPE (ranges
[k
- 1].exp
);
2946 tree type2
= NULL_TREE
;
2947 bool strict_overflow_p
= false;
2948 candidates
.truncate (0);
2949 for (j
= i
; j
; j
= chains
[j
- 1])
2951 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
2952 strict_overflow_p
|= ranges
[j
- 1].strict_overflow_p
;
2954 || useless_type_conversion_p (type1
, type
))
2956 else if (type2
== NULL_TREE
2957 || useless_type_conversion_p (type2
, type
))
2959 if (type2
== NULL_TREE
)
2961 candidates
.safe_push (&ranges
[j
- 1]);
2964 unsigned l
= candidates
.length ();
2965 for (j
= i
; j
; j
= chains
[j
- 1])
2967 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
2970 if (useless_type_conversion_p (type1
, type
))
2972 else if (type2
== NULL_TREE
2973 || useless_type_conversion_p (type2
, type
))
2975 candidates
.safe_push (&ranges
[j
- 1]);
2977 gimple_seq seq
= NULL
;
2978 tree op
= NULL_TREE
;
2980 struct range_entry
*r
;
2981 candidates
.safe_push (&ranges
[k
- 1]);
2982 FOR_EACH_VEC_ELT (candidates
, id
, r
)
2992 g
= gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, op
);
2993 gimple_seq_add_stmt_without_update (&seq
, g
);
2994 op
= gimple_assign_lhs (g
);
2996 tree type
= TREE_TYPE (r
->exp
);
2998 if (id
>= l
&& !useless_type_conversion_p (type1
, type
))
3000 g
= gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, exp
);
3001 gimple_seq_add_stmt_without_update (&seq
, g
);
3002 exp
= gimple_assign_lhs (g
);
3004 g
= gimple_build_assign (make_ssa_name (id
>= l
? type1
: type2
),
3005 (b
& 1) ? BIT_AND_EXPR
: BIT_IOR_EXPR
,
3007 gimple_seq_add_stmt_without_update (&seq
, g
);
3008 op
= gimple_assign_lhs (g
);
3011 if (update_range_test (&ranges
[k
- 1], NULL
, candidates
.address (),
3012 candidates
.length (), opcode
, ops
, op
,
3013 seq
, true, ranges
[k
- 1].low
,
3014 ranges
[k
- 1].low
, strict_overflow_p
))
3017 gimple_seq_discard (seq
);
3023 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3024 a >= 0 && a < b into (unsigned) a < (unsigned) b
3025 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3028 optimize_range_tests_var_bound (enum tree_code opcode
, int first
, int length
,
3029 vec
<operand_entry
*> *ops
,
3030 struct range_entry
*ranges
)
3033 bool any_changes
= false;
3034 hash_map
<tree
, int> *map
= NULL
;
3036 for (i
= first
; i
< length
; i
++)
3038 if (ranges
[i
].exp
== NULL_TREE
3039 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3043 tree type
= TREE_TYPE (ranges
[i
].exp
);
3044 if (!INTEGRAL_TYPE_P (type
)
3045 || TYPE_UNSIGNED (type
)
3046 || ranges
[i
].low
== NULL_TREE
3047 || !integer_zerop (ranges
[i
].low
)
3048 || ranges
[i
].high
!= NULL_TREE
)
3050 /* EXP >= 0 here. */
3052 map
= new hash_map
<tree
, int>;
3053 map
->put (ranges
[i
].exp
, i
);
3059 for (i
= 0; i
< length
; i
++)
3061 bool in_p
= ranges
[i
].in_p
;
3062 if (ranges
[i
].low
== NULL_TREE
3063 || ranges
[i
].high
== NULL_TREE
)
3065 if (!integer_zerop (ranges
[i
].low
)
3066 || !integer_zerop (ranges
[i
].high
))
3069 && TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) == 1
3070 && TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3071 && integer_onep (ranges
[i
].low
)
3072 && integer_onep (ranges
[i
].high
))
3083 if (TREE_CODE (ranges
[i
].exp
) != SSA_NAME
)
3085 stmt
= SSA_NAME_DEF_STMT (ranges
[i
].exp
);
3086 if (!is_gimple_assign (stmt
))
3088 ccode
= gimple_assign_rhs_code (stmt
);
3089 rhs1
= gimple_assign_rhs1 (stmt
);
3090 rhs2
= gimple_assign_rhs2 (stmt
);
3094 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3095 stmt
= last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3096 if (gimple_code (stmt
) != GIMPLE_COND
)
3098 ccode
= gimple_cond_code (stmt
);
3099 rhs1
= gimple_cond_lhs (stmt
);
3100 rhs2
= gimple_cond_rhs (stmt
);
3103 if (TREE_CODE (rhs1
) != SSA_NAME
3104 || rhs2
== NULL_TREE
3105 || TREE_CODE (rhs2
) != SSA_NAME
)
3119 ccode
= invert_tree_comparison (ccode
, false);
3124 std::swap (rhs1
, rhs2
);
3125 ccode
= swap_tree_comparison (ccode
);
3134 int *idx
= map
->get (rhs1
);
3138 wide_int nz
= get_nonzero_bits (rhs2
);
3142 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3143 and RHS2 is known to be RHS2 >= 0. */
3144 tree utype
= unsigned_type_for (TREE_TYPE (rhs1
));
3146 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
3147 if ((ranges
[*idx
].strict_overflow_p
3148 || ranges
[i
].strict_overflow_p
)
3149 && issue_strict_overflow_warning (wc
))
3150 warning_at (gimple_location (stmt
), OPT_Wstrict_overflow
,
3151 "assuming signed overflow does not occur "
3152 "when simplifying range test");
3154 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3156 struct range_entry
*r
= &ranges
[*idx
];
3157 fprintf (dump_file
, "Optimizing range test ");
3158 print_generic_expr (dump_file
, r
->exp
);
3159 fprintf (dump_file
, " +[");
3160 print_generic_expr (dump_file
, r
->low
);
3161 fprintf (dump_file
, ", ");
3162 print_generic_expr (dump_file
, r
->high
);
3163 fprintf (dump_file
, "] and comparison ");
3164 print_generic_expr (dump_file
, rhs1
);
3165 fprintf (dump_file
, " %s ", op_symbol_code (ccode
));
3166 print_generic_expr (dump_file
, rhs2
);
3167 fprintf (dump_file
, "\n into (");
3168 print_generic_expr (dump_file
, utype
);
3169 fprintf (dump_file
, ") ");
3170 print_generic_expr (dump_file
, rhs1
);
3171 fprintf (dump_file
, " %s (", op_symbol_code (ccode
));
3172 print_generic_expr (dump_file
, utype
);
3173 fprintf (dump_file
, ") ");
3174 print_generic_expr (dump_file
, rhs2
);
3175 fprintf (dump_file
, "\n");
3178 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3180 if (opcode
== BIT_IOR_EXPR
3181 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3184 ccode
= invert_tree_comparison (ccode
, false);
3187 unsigned int uid
= gimple_uid (stmt
);
3188 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3189 gimple
*g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs1
);
3190 gimple_set_uid (g
, uid
);
3191 rhs1
= gimple_assign_lhs (g
);
3192 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3193 g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs2
);
3194 gimple_set_uid (g
, uid
);
3195 rhs2
= gimple_assign_lhs (g
);
3196 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3197 if (tree_swap_operands_p (rhs1
, rhs2
))
3199 std::swap (rhs1
, rhs2
);
3200 ccode
= swap_tree_comparison (ccode
);
3202 if (gimple_code (stmt
) == GIMPLE_COND
)
3204 gcond
*c
= as_a
<gcond
*> (stmt
);
3205 gimple_cond_set_code (c
, ccode
);
3206 gimple_cond_set_lhs (c
, rhs1
);
3207 gimple_cond_set_rhs (c
, rhs2
);
3212 tree ctype
= oe
->op
? TREE_TYPE (oe
->op
) : boolean_type_node
;
3213 if (!INTEGRAL_TYPE_P (ctype
)
3214 || (TREE_CODE (ctype
) != BOOLEAN_TYPE
3215 && TYPE_PRECISION (ctype
) != 1))
3216 ctype
= boolean_type_node
;
3217 g
= gimple_build_assign (make_ssa_name (ctype
), ccode
, rhs1
, rhs2
);
3218 gimple_set_uid (g
, uid
);
3219 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3220 if (oe
->op
&& ctype
!= TREE_TYPE (oe
->op
))
3222 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (oe
->op
)),
3223 NOP_EXPR
, gimple_assign_lhs (g
));
3224 gimple_set_uid (g
, uid
);
3225 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3227 ranges
[i
].exp
= gimple_assign_lhs (g
);
3228 oe
->op
= ranges
[i
].exp
;
3229 ranges
[i
].low
= build_zero_cst (TREE_TYPE (ranges
[i
].exp
));
3230 ranges
[i
].high
= ranges
[i
].low
;
3232 ranges
[i
].strict_overflow_p
= false;
3233 oe
= (*ops
)[ranges
[*idx
].idx
];
3234 /* Now change all the other range test immediate uses, so that
3235 those tests will be optimized away. */
3236 if (opcode
== ERROR_MARK
)
3239 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
3240 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
3242 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
3243 ? boolean_false_node
: boolean_true_node
);
3246 oe
->op
= error_mark_node
;
3247 ranges
[*idx
].exp
= NULL_TREE
;
3248 ranges
[*idx
].low
= NULL_TREE
;
3249 ranges
[*idx
].high
= NULL_TREE
;
3257 /* Optimize range tests, similarly how fold_range_test optimizes
3258 it on trees. The tree code for the binary
3259 operation between all the operands is OPCODE.
3260 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3261 maybe_optimize_range_tests for inter-bb range optimization.
3262 In that case if oe->op is NULL, oe->id is bb->index whose
3263 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3264 the actual opcode. */
3267 optimize_range_tests (enum tree_code opcode
,
3268 vec
<operand_entry
*> *ops
)
3270 unsigned int length
= ops
->length (), i
, j
, first
;
3272 struct range_entry
*ranges
;
3273 bool any_changes
= false;
3278 ranges
= XNEWVEC (struct range_entry
, length
);
3279 for (i
= 0; i
< length
; i
++)
3283 init_range_entry (ranges
+ i
, oe
->op
,
3286 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
3287 /* For | invert it now, we will invert it again before emitting
3288 the optimized expression. */
3289 if (opcode
== BIT_IOR_EXPR
3290 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3291 ranges
[i
].in_p
= !ranges
[i
].in_p
;
3294 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
3295 for (i
= 0; i
< length
; i
++)
3296 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
3299 /* Try to merge ranges. */
3300 for (first
= i
; i
< length
; i
++)
3302 tree low
= ranges
[i
].low
;
3303 tree high
= ranges
[i
].high
;
3304 int in_p
= ranges
[i
].in_p
;
3305 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3306 int update_fail_count
= 0;
3308 for (j
= i
+ 1; j
< length
; j
++)
3310 if (ranges
[i
].exp
!= ranges
[j
].exp
)
3312 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
3313 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
3315 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3321 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
3322 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
3323 low
, high
, strict_overflow_p
))
3328 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3329 while update_range_test would fail. */
3330 else if (update_fail_count
== 64)
3333 ++update_fail_count
;
3336 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
3339 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
3340 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
3342 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
3343 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
3345 any_changes
|= optimize_range_tests_cmp_bitwise (opcode
, first
, length
,
3347 any_changes
|= optimize_range_tests_var_bound (opcode
, first
, length
, ops
,
3350 if (any_changes
&& opcode
!= ERROR_MARK
)
3353 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3355 if (oe
->op
== error_mark_node
)
3364 XDELETEVEC (ranges
);
3368 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3369 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3370 otherwise the comparison code. */
3373 ovce_extract_ops (tree var
, gassign
**rets
, bool *reti
)
3375 if (TREE_CODE (var
) != SSA_NAME
)
3378 gassign
*stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (var
));
3382 /* ??? If we start creating more COND_EXPR, we could perform
3383 this same optimization with them. For now, simplify. */
3384 if (gimple_assign_rhs_code (stmt
) != VEC_COND_EXPR
)
3387 tree cond
= gimple_assign_rhs1 (stmt
);
3388 tree_code cmp
= TREE_CODE (cond
);
3389 if (TREE_CODE_CLASS (cmp
) != tcc_comparison
)
3392 /* ??? For now, allow only canonical true and false result vectors.
3393 We could expand this to other constants should the need arise,
3394 but at the moment we don't create them. */
3395 tree t
= gimple_assign_rhs2 (stmt
);
3396 tree f
= gimple_assign_rhs3 (stmt
);
3398 if (integer_all_onesp (t
))
3400 else if (integer_all_onesp (f
))
3402 cmp
= invert_tree_comparison (cmp
, false);
3407 if (!integer_zerop (f
))
3418 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3419 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3422 optimize_vec_cond_expr (tree_code opcode
, vec
<operand_entry
*> *ops
)
3424 unsigned int length
= ops
->length (), i
, j
;
3425 bool any_changes
= false;
3430 for (i
= 0; i
< length
; ++i
)
3432 tree elt0
= (*ops
)[i
]->op
;
3436 tree_code cmp0
= ovce_extract_ops (elt0
, &stmt0
, &invert
);
3437 if (cmp0
== ERROR_MARK
)
3440 for (j
= i
+ 1; j
< length
; ++j
)
3442 tree
&elt1
= (*ops
)[j
]->op
;
3445 tree_code cmp1
= ovce_extract_ops (elt1
, &stmt1
, NULL
);
3446 if (cmp1
== ERROR_MARK
)
3449 tree cond0
= gimple_assign_rhs1 (stmt0
);
3450 tree x0
= TREE_OPERAND (cond0
, 0);
3451 tree y0
= TREE_OPERAND (cond0
, 1);
3453 tree cond1
= gimple_assign_rhs1 (stmt1
);
3454 tree x1
= TREE_OPERAND (cond1
, 0);
3455 tree y1
= TREE_OPERAND (cond1
, 1);
3458 if (opcode
== BIT_AND_EXPR
)
3459 comb
= maybe_fold_and_comparisons (cmp0
, x0
, y0
, cmp1
, x1
, y1
);
3460 else if (opcode
== BIT_IOR_EXPR
)
3461 comb
= maybe_fold_or_comparisons (cmp0
, x0
, y0
, cmp1
, x1
, y1
);
3468 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3470 fprintf (dump_file
, "Transforming ");
3471 print_generic_expr (dump_file
, cond0
);
3472 fprintf (dump_file
, " %c ", opcode
== BIT_AND_EXPR
? '&' : '|');
3473 print_generic_expr (dump_file
, cond1
);
3474 fprintf (dump_file
, " into ");
3475 print_generic_expr (dump_file
, comb
);
3476 fputc ('\n', dump_file
);
3479 gimple_assign_set_rhs1 (stmt0
, comb
);
3481 std::swap (*gimple_assign_rhs2_ptr (stmt0
),
3482 *gimple_assign_rhs3_ptr (stmt0
));
3483 update_stmt (stmt0
);
3485 elt1
= error_mark_node
;
3494 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3496 if (oe
->op
== error_mark_node
)
3508 /* Return true if STMT is a cast like:
3514 # _345 = PHI <_123(N), 1(...), 1(...)>
3515 where _234 has bool type, _123 has single use and
3516 bb N has a single successor M. This is commonly used in
3517 the last block of a range test.
3519 Also Return true if STMT is tcc_compare like:
3525 # _345 = PHI <_234(N), 1(...), 1(...)>
3527 where _234 has booltype, single use and
3528 bb N has a single successor M. This is commonly used in
3529 the last block of a range test. */
3532 final_range_test_p (gimple
*stmt
)
3534 basic_block bb
, rhs_bb
, lhs_bb
;
3537 use_operand_p use_p
;
3540 if (!gimple_assign_cast_p (stmt
)
3541 && (!is_gimple_assign (stmt
)
3542 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
3543 != tcc_comparison
)))
3545 bb
= gimple_bb (stmt
);
3546 if (!single_succ_p (bb
))
3548 e
= single_succ_edge (bb
);
3549 if (e
->flags
& EDGE_COMPLEX
)
3552 lhs
= gimple_assign_lhs (stmt
);
3553 rhs
= gimple_assign_rhs1 (stmt
);
3554 if (gimple_assign_cast_p (stmt
)
3555 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3556 || TREE_CODE (rhs
) != SSA_NAME
3557 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
))
3560 if (!gimple_assign_cast_p (stmt
)
3561 && (TREE_CODE (TREE_TYPE (lhs
)) != BOOLEAN_TYPE
))
3564 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3565 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
3568 if (gimple_code (use_stmt
) != GIMPLE_PHI
3569 || gimple_bb (use_stmt
) != e
->dest
)
3572 /* And that the rhs is defined in the same loop. */
3573 if (gimple_assign_cast_p (stmt
))
3575 if (TREE_CODE (rhs
) != SSA_NAME
3576 || !(rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
)))
3577 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
3582 if (TREE_CODE (lhs
) != SSA_NAME
3583 || !(lhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
)))
3584 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), lhs_bb
))
3591 /* Return true if BB is suitable basic block for inter-bb range test
3592 optimization. If BACKWARD is true, BB should be the only predecessor
3593 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3594 or compared with to find a common basic block to which all conditions
3595 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3596 be the only predecessor of BB. */
3599 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
3602 edge_iterator ei
, ei2
;
3606 bool other_edge_seen
= false;
3611 /* Check last stmt first. */
3612 stmt
= last_stmt (bb
);
3614 || (gimple_code (stmt
) != GIMPLE_COND
3615 && (backward
|| !final_range_test_p (stmt
)))
3616 || gimple_visited_p (stmt
)
3617 || stmt_could_throw_p (stmt
)
3620 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
3623 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3624 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3625 to *OTHER_BB (if not set yet, try to find it out). */
3626 if (EDGE_COUNT (bb
->succs
) != 2)
3628 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
3630 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
3632 if (e
->dest
== test_bb
)
3641 if (*other_bb
== NULL
)
3643 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
3644 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
3646 else if (e
->dest
== e2
->dest
)
3647 *other_bb
= e
->dest
;
3648 if (*other_bb
== NULL
)
3651 if (e
->dest
== *other_bb
)
3652 other_edge_seen
= true;
3656 if (*other_bb
== NULL
|| !other_edge_seen
)
3659 else if (single_succ (bb
) != *other_bb
)
3662 /* Now check all PHIs of *OTHER_BB. */
3663 e
= find_edge (bb
, *other_bb
);
3664 e2
= find_edge (test_bb
, *other_bb
);
3665 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3667 gphi
*phi
= gsi
.phi ();
3668 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3669 corresponding to BB and TEST_BB predecessor must be the same. */
3670 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
3671 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
3673 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3674 one of the PHIs should have the lhs of the last stmt in
3675 that block as PHI arg and that PHI should have 0 or 1
3676 corresponding to it in all other range test basic blocks
3680 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
3681 == gimple_assign_lhs (stmt
)
3682 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
3683 || integer_onep (gimple_phi_arg_def (phi
,
3689 gimple
*test_last
= last_stmt (test_bb
);
3690 if (gimple_code (test_last
) != GIMPLE_COND
3691 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
3692 == gimple_assign_lhs (test_last
)
3693 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
3694 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
3704 /* Return true if BB doesn't have side-effects that would disallow
3705 range test optimization, all SSA_NAMEs set in the bb are consumed
3706 in the bb and there are no PHIs. */
3709 no_side_effect_bb (basic_block bb
)
3711 gimple_stmt_iterator gsi
;
3714 if (!gimple_seq_empty_p (phi_nodes (bb
)))
3716 last
= last_stmt (bb
);
3717 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3719 gimple
*stmt
= gsi_stmt (gsi
);
3721 imm_use_iterator imm_iter
;
3722 use_operand_p use_p
;
3724 if (is_gimple_debug (stmt
))
3726 if (gimple_has_side_effects (stmt
))
3730 if (!is_gimple_assign (stmt
))
3732 lhs
= gimple_assign_lhs (stmt
);
3733 if (TREE_CODE (lhs
) != SSA_NAME
)
3735 if (gimple_assign_rhs_could_trap_p (stmt
))
3737 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
3739 gimple
*use_stmt
= USE_STMT (use_p
);
3740 if (is_gimple_debug (use_stmt
))
3742 if (gimple_bb (use_stmt
) != bb
)
3749 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3750 return true and fill in *OPS recursively. */
3753 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
3756 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3760 if (!is_reassociable_op (stmt
, code
, loop
))
3763 rhs
[0] = gimple_assign_rhs1 (stmt
);
3764 rhs
[1] = gimple_assign_rhs2 (stmt
);
3765 gimple_set_visited (stmt
, true);
3766 for (i
= 0; i
< 2; i
++)
3767 if (TREE_CODE (rhs
[i
]) == SSA_NAME
3768 && !get_ops (rhs
[i
], code
, ops
, loop
)
3769 && has_single_use (rhs
[i
]))
3771 operand_entry
*oe
= operand_entry_pool
.allocate ();
3777 oe
->stmt_to_insert
= NULL
;
3778 ops
->safe_push (oe
);
3783 /* Find the ops that were added by get_ops starting from VAR, see if
3784 they were changed during update_range_test and if yes, create new
3788 update_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> ops
,
3789 unsigned int *pidx
, struct loop
*loop
)
3791 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
3795 if (!is_reassociable_op (stmt
, code
, loop
))
3798 rhs
[0] = gimple_assign_rhs1 (stmt
);
3799 rhs
[1] = gimple_assign_rhs2 (stmt
);
3802 for (i
= 0; i
< 2; i
++)
3803 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
3805 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
3806 if (rhs
[2 + i
] == NULL_TREE
)
3808 if (has_single_use (rhs
[i
]))
3809 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
3811 rhs
[2 + i
] = rhs
[i
];
3814 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
3815 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
3817 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3818 var
= make_ssa_name (TREE_TYPE (var
));
3819 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
3821 gimple_set_uid (g
, gimple_uid (stmt
));
3822 gimple_set_visited (g
, true);
3823 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3828 /* Structure to track the initial value passed to get_ops and
3829 the range in the ops vector for each basic block. */
3831 struct inter_bb_range_test_entry
3834 unsigned int first_idx
, last_idx
;
3837 /* Inter-bb range test optimization.
3839 Returns TRUE if a gimple conditional is optimized to a true/false,
3840 otherwise return FALSE.
3842 This indicates to the caller that it should run a CFG cleanup pass
3843 once reassociation is completed. */
3846 maybe_optimize_range_tests (gimple
*stmt
)
3848 basic_block first_bb
= gimple_bb (stmt
);
3849 basic_block last_bb
= first_bb
;
3850 basic_block other_bb
= NULL
;
3854 auto_vec
<operand_entry
*> ops
;
3855 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3856 bool any_changes
= false;
3857 bool cfg_cleanup_needed
= false;
3859 /* Consider only basic blocks that end with GIMPLE_COND or
3860 a cast statement satisfying final_range_test_p. All
3861 but the last bb in the first_bb .. last_bb range
3862 should end with GIMPLE_COND. */
3863 if (gimple_code (stmt
) == GIMPLE_COND
)
3865 if (EDGE_COUNT (first_bb
->succs
) != 2)
3866 return cfg_cleanup_needed
;
3868 else if (final_range_test_p (stmt
))
3869 other_bb
= single_succ (first_bb
);
3871 return cfg_cleanup_needed
;
3873 if (stmt_could_throw_p (stmt
))
3874 return cfg_cleanup_needed
;
3876 /* As relative ordering of post-dominator sons isn't fixed,
3877 maybe_optimize_range_tests can be called first on any
3878 bb in the range we want to optimize. So, start searching
3879 backwards, if first_bb can be set to a predecessor. */
3880 while (single_pred_p (first_bb
))
3882 basic_block pred_bb
= single_pred (first_bb
);
3883 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3885 if (!no_side_effect_bb (first_bb
))
3889 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3890 Before starting forward search in last_bb successors, find
3891 out the other_bb. */
3892 if (first_bb
== last_bb
)
3895 /* As non-GIMPLE_COND last stmt always terminates the range,
3896 if forward search didn't discover anything, just give up. */
3897 if (gimple_code (stmt
) != GIMPLE_COND
)
3898 return cfg_cleanup_needed
;
3899 /* Look at both successors. Either it ends with a GIMPLE_COND
3900 and satisfies suitable_cond_bb, or ends with a cast and
3901 other_bb is that cast's successor. */
3902 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
3903 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
3904 || e
->dest
== first_bb
)
3905 return cfg_cleanup_needed
;
3906 else if (single_pred_p (e
->dest
))
3908 stmt
= last_stmt (e
->dest
);
3910 && gimple_code (stmt
) == GIMPLE_COND
3911 && EDGE_COUNT (e
->dest
->succs
) == 2)
3913 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
3919 && final_range_test_p (stmt
)
3920 && find_edge (first_bb
, single_succ (e
->dest
)))
3922 other_bb
= single_succ (e
->dest
);
3923 if (other_bb
== first_bb
)
3927 if (other_bb
== NULL
)
3928 return cfg_cleanup_needed
;
3930 /* Now do the forward search, moving last_bb to successor bbs
3931 that aren't other_bb. */
3932 while (EDGE_COUNT (last_bb
->succs
) == 2)
3934 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
3935 if (e
->dest
!= other_bb
)
3939 if (!single_pred_p (e
->dest
))
3941 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
3943 if (!no_side_effect_bb (e
->dest
))
3947 if (first_bb
== last_bb
)
3948 return cfg_cleanup_needed
;
3949 /* Here basic blocks first_bb through last_bb's predecessor
3950 end with GIMPLE_COND, all of them have one of the edges to
3951 other_bb and another to another block in the range,
3952 all blocks except first_bb don't have side-effects and
3953 last_bb ends with either GIMPLE_COND, or cast satisfying
3954 final_range_test_p. */
3955 for (bb
= last_bb
; ; bb
= single_pred (bb
))
3957 enum tree_code code
;
3959 inter_bb_range_test_entry bb_ent
;
3961 bb_ent
.op
= NULL_TREE
;
3962 bb_ent
.first_idx
= ops
.length ();
3963 bb_ent
.last_idx
= bb_ent
.first_idx
;
3964 e
= find_edge (bb
, other_bb
);
3965 stmt
= last_stmt (bb
);
3966 gimple_set_visited (stmt
, true);
3967 if (gimple_code (stmt
) != GIMPLE_COND
)
3969 use_operand_p use_p
;
3974 lhs
= gimple_assign_lhs (stmt
);
3975 rhs
= gimple_assign_rhs1 (stmt
);
3976 gcc_assert (bb
== last_bb
);
3985 # _345 = PHI <_123(N), 1(...), 1(...)>
3987 or 0 instead of 1. If it is 0, the _234
3988 range test is anded together with all the
3989 other range tests, if it is 1, it is ored with
3991 single_imm_use (lhs
, &use_p
, &phi
);
3992 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
3993 e2
= find_edge (first_bb
, other_bb
);
3995 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
3996 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
3997 code
= BIT_AND_EXPR
;
4000 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
4001 code
= BIT_IOR_EXPR
;
4004 /* If _234 SSA_NAME_DEF_STMT is
4006 (or &, corresponding to 1/0 in the phi arguments,
4007 push into ops the individual range test arguments
4008 of the bitwise or resp. and, recursively. */
4009 if (TREE_CODE (rhs
) == SSA_NAME
4010 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4012 && !get_ops (rhs
, code
, &ops
,
4013 loop_containing_stmt (stmt
))
4014 && has_single_use (rhs
))
4016 /* Otherwise, push the _234 range test itself. */
4017 operand_entry
*oe
= operand_entry_pool
.allocate ();
4023 oe
->stmt_to_insert
= NULL
;
4028 else if (is_gimple_assign (stmt
)
4029 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4031 && !get_ops (lhs
, code
, &ops
,
4032 loop_containing_stmt (stmt
))
4033 && has_single_use (lhs
))
4035 operand_entry
*oe
= operand_entry_pool
.allocate ();
4046 bb_ent
.last_idx
= ops
.length ();
4049 bbinfo
.safe_push (bb_ent
);
4052 /* Otherwise stmt is GIMPLE_COND. */
4053 code
= gimple_cond_code (stmt
);
4054 lhs
= gimple_cond_lhs (stmt
);
4055 rhs
= gimple_cond_rhs (stmt
);
4056 if (TREE_CODE (lhs
) == SSA_NAME
4057 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4058 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4059 || rhs
!= boolean_false_node
4060 /* Either push into ops the individual bitwise
4061 or resp. and operands, depending on which
4062 edge is other_bb. */
4063 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
4064 ^ (code
== EQ_EXPR
))
4065 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
4066 loop_containing_stmt (stmt
))))
4068 /* Or push the GIMPLE_COND stmt itself. */
4069 operand_entry
*oe
= operand_entry_pool
.allocate ();
4072 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
4073 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
4074 /* oe->op = NULL signs that there is no SSA_NAME
4075 for the range test, and oe->id instead is the
4076 basic block number, at which's end the GIMPLE_COND
4080 oe
->stmt_to_insert
= NULL
;
4085 else if (ops
.length () > bb_ent
.first_idx
)
4088 bb_ent
.last_idx
= ops
.length ();
4090 bbinfo
.safe_push (bb_ent
);
4094 if (ops
.length () > 1)
4095 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
4098 unsigned int idx
, max_idx
= 0;
4099 /* update_ops relies on has_single_use predicates returning the
4100 same values as it did during get_ops earlier. Additionally it
4101 never removes statements, only adds new ones and it should walk
4102 from the single imm use and check the predicate already before
4103 making those changes.
4104 On the other side, the handling of GIMPLE_COND directly can turn
4105 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4106 it needs to be done in a separate loop afterwards. */
4107 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4109 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4110 && bbinfo
[idx
].op
!= NULL_TREE
)
4115 stmt
= last_stmt (bb
);
4116 new_op
= update_ops (bbinfo
[idx
].op
,
4118 ops
[bbinfo
[idx
].first_idx
]->rank
,
4119 ops
, &bbinfo
[idx
].first_idx
,
4120 loop_containing_stmt (stmt
));
4121 if (new_op
== NULL_TREE
)
4123 gcc_assert (bb
== last_bb
);
4124 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
4126 if (bbinfo
[idx
].op
!= new_op
)
4128 imm_use_iterator iter
;
4129 use_operand_p use_p
;
4130 gimple
*use_stmt
, *cast_or_tcc_cmp_stmt
= NULL
;
4132 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
4133 if (is_gimple_debug (use_stmt
))
4135 else if (gimple_code (use_stmt
) == GIMPLE_COND
4136 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4137 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4138 SET_USE (use_p
, new_op
);
4139 else if ((is_gimple_assign (use_stmt
)
4141 (gimple_assign_rhs_code (use_stmt
))
4142 == tcc_comparison
)))
4143 cast_or_tcc_cmp_stmt
= use_stmt
;
4144 else if (gimple_assign_cast_p (use_stmt
))
4145 cast_or_tcc_cmp_stmt
= use_stmt
;
4149 if (cast_or_tcc_cmp_stmt
)
4151 gcc_assert (bb
== last_bb
);
4152 tree lhs
= gimple_assign_lhs (cast_or_tcc_cmp_stmt
);
4153 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
4154 enum tree_code rhs_code
4155 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt
)
4156 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt
)
4159 if (is_gimple_min_invariant (new_op
))
4161 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
4162 g
= gimple_build_assign (new_lhs
, new_op
);
4165 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
4166 gimple_stmt_iterator gsi
4167 = gsi_for_stmt (cast_or_tcc_cmp_stmt
);
4168 gimple_set_uid (g
, gimple_uid (cast_or_tcc_cmp_stmt
));
4169 gimple_set_visited (g
, true);
4170 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4171 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
4172 if (is_gimple_debug (use_stmt
))
4174 else if (gimple_code (use_stmt
) == GIMPLE_COND
4175 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4176 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4177 SET_USE (use_p
, new_lhs
);
4186 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4188 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4189 && bbinfo
[idx
].op
== NULL_TREE
4190 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
4192 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
4197 /* If we collapse the conditional to a true/false
4198 condition, then bubble that knowledge up to our caller. */
4199 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
4201 gimple_cond_make_false (cond_stmt
);
4202 cfg_cleanup_needed
= true;
4204 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
4206 gimple_cond_make_true (cond_stmt
);
4207 cfg_cleanup_needed
= true;
4211 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
4212 gimple_cond_set_lhs (cond_stmt
,
4213 ops
[bbinfo
[idx
].first_idx
]->op
);
4214 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
4216 update_stmt (cond_stmt
);
4222 /* The above changes could result in basic blocks after the first
4223 modified one, up to and including last_bb, to be executed even if
4224 they would not be in the original program. If the value ranges of
4225 assignment lhs' in those bbs were dependent on the conditions
4226 guarding those basic blocks which now can change, the VRs might
4227 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4228 are only used within the same bb, it should be not a big deal if
4229 we just reset all the VRs in those bbs. See PR68671. */
4230 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
4231 reset_flow_sensitive_info_in_bb (bb
);
4233 return cfg_cleanup_needed
;
4236 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4237 of STMT in it's operands. This is also known as a "destructive
4238 update" operation. */
4241 is_phi_for_stmt (gimple
*stmt
, tree operand
)
4246 use_operand_p arg_p
;
4249 if (TREE_CODE (operand
) != SSA_NAME
)
4252 lhs
= gimple_assign_lhs (stmt
);
4254 def_stmt
= SSA_NAME_DEF_STMT (operand
);
4255 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
4259 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
4260 if (lhs
== USE_FROM_PTR (arg_p
))
4265 /* Remove def stmt of VAR if VAR has zero uses and recurse
4266 on rhs1 operand if so. */
4269 remove_visited_stmt_chain (tree var
)
4272 gimple_stmt_iterator gsi
;
4276 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
4278 stmt
= SSA_NAME_DEF_STMT (var
);
4279 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
4281 var
= gimple_assign_rhs1 (stmt
);
4282 gsi
= gsi_for_stmt (stmt
);
4283 reassoc_remove_stmt (&gsi
);
4284 release_defs (stmt
);
4291 /* This function checks three consequtive operands in
4292 passed operands vector OPS starting from OPINDEX and
4293 swaps two operands if it is profitable for binary operation
4294 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4296 We pair ops with the same rank if possible.
4298 The alternative we try is to see if STMT is a destructive
4299 update style statement, which is like:
4302 In that case, we want to use the destructive update form to
4303 expose the possible vectorizer sum reduction opportunity.
4304 In that case, the third operand will be the phi node. This
4305 check is not performed if STMT is null.
4307 We could, of course, try to be better as noted above, and do a
4308 lot of work to try to find these opportunities in >3 operand
4309 cases, but it is unlikely to be worth it. */
4312 swap_ops_for_binary_stmt (vec
<operand_entry
*> ops
,
4313 unsigned int opindex
, gimple
*stmt
)
4315 operand_entry
*oe1
, *oe2
, *oe3
;
4318 oe2
= ops
[opindex
+ 1];
4319 oe3
= ops
[opindex
+ 2];
4321 if ((oe1
->rank
== oe2
->rank
4322 && oe2
->rank
!= oe3
->rank
)
4323 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
4324 && !is_phi_for_stmt (stmt
, oe1
->op
)
4325 && !is_phi_for_stmt (stmt
, oe2
->op
)))
4326 std::swap (*oe1
, *oe3
);
4327 else if ((oe1
->rank
== oe3
->rank
4328 && oe2
->rank
!= oe3
->rank
)
4329 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
4330 && !is_phi_for_stmt (stmt
, oe1
->op
)
4331 && !is_phi_for_stmt (stmt
, oe3
->op
)))
4332 std::swap (*oe1
, *oe2
);
4335 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4336 two definitions, otherwise return STMT. */
4338 static inline gimple
*
4339 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
)
4341 if (TREE_CODE (rhs1
) == SSA_NAME
4342 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
4343 stmt
= SSA_NAME_DEF_STMT (rhs1
);
4344 if (TREE_CODE (rhs2
) == SSA_NAME
4345 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
4346 stmt
= SSA_NAME_DEF_STMT (rhs2
);
4350 /* If the stmt that defines operand has to be inserted, insert it
4353 insert_stmt_before_use (gimple
*stmt
, gimple
*stmt_to_insert
)
4355 gcc_assert (is_gimple_assign (stmt_to_insert
));
4356 tree rhs1
= gimple_assign_rhs1 (stmt_to_insert
);
4357 tree rhs2
= gimple_assign_rhs2 (stmt_to_insert
);
4358 gimple
*insert_point
= find_insert_point (stmt
, rhs1
, rhs2
);
4359 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_point
);
4360 gimple_set_uid (stmt_to_insert
, gimple_uid (insert_point
));
4362 /* If the insert point is not stmt, then insert_point would be
4363 the point where operand rhs1 or rhs2 is defined. In this case,
4364 stmt_to_insert has to be inserted afterwards. This would
4365 only happen when the stmt insertion point is flexible. */
4366 if (stmt
== insert_point
)
4367 gsi_insert_before (&gsi
, stmt_to_insert
, GSI_NEW_STMT
);
4369 insert_stmt_after (stmt_to_insert
, insert_point
);
4373 /* Recursively rewrite our linearized statements so that the operators
4374 match those in OPS[OPINDEX], putting the computation in rank
4375 order. Return new lhs.
4376 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4377 the current stmt and during recursive invocations.
4378 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4379 recursive invocations. */
4382 rewrite_expr_tree (gimple
*stmt
, unsigned int opindex
,
4383 vec
<operand_entry
*> ops
, bool changed
, bool next_changed
)
4385 tree rhs1
= gimple_assign_rhs1 (stmt
);
4386 tree rhs2
= gimple_assign_rhs2 (stmt
);
4387 tree lhs
= gimple_assign_lhs (stmt
);
4390 /* The final recursion case for this function is that you have
4391 exactly two operations left.
4392 If we had exactly one op in the entire list to start with, we
4393 would have never called this function, and the tail recursion
4394 rewrites them one at a time. */
4395 if (opindex
+ 2 == ops
.length ())
4397 operand_entry
*oe1
, *oe2
;
4400 oe2
= ops
[opindex
+ 1];
4402 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
4404 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4405 unsigned int uid
= gimple_uid (stmt
);
4407 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4409 fprintf (dump_file
, "Transforming ");
4410 print_gimple_stmt (dump_file
, stmt
, 0);
4413 /* If the stmt that defines operand has to be inserted, insert it
4415 if (oe1
->stmt_to_insert
)
4416 insert_stmt_before_use (stmt
, oe1
->stmt_to_insert
);
4417 if (oe2
->stmt_to_insert
)
4418 insert_stmt_before_use (stmt
, oe2
->stmt_to_insert
);
4419 /* Even when changed is false, reassociation could have e.g. removed
4420 some redundant operations, so unless we are just swapping the
4421 arguments or unless there is no change at all (then we just
4422 return lhs), force creation of a new SSA_NAME. */
4423 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
4425 gimple
*insert_point
4426 = find_insert_point (stmt
, oe1
->op
, oe2
->op
);
4427 lhs
= make_ssa_name (TREE_TYPE (lhs
));
4429 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
4431 gimple_set_uid (stmt
, uid
);
4432 gimple_set_visited (stmt
, true);
4433 if (insert_point
== gsi_stmt (gsi
))
4434 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
4436 insert_stmt_after (stmt
, insert_point
);
4440 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
4442 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
4443 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
4447 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
4448 remove_visited_stmt_chain (rhs1
);
4450 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4452 fprintf (dump_file
, " into ");
4453 print_gimple_stmt (dump_file
, stmt
, 0);
4459 /* If we hit here, we should have 3 or more ops left. */
4460 gcc_assert (opindex
+ 2 < ops
.length ());
4462 /* Rewrite the next operator. */
4465 /* If the stmt that defines operand has to be inserted, insert it
4467 if (oe
->stmt_to_insert
)
4468 insert_stmt_before_use (stmt
, oe
->stmt_to_insert
);
4470 /* Recurse on the LHS of the binary operator, which is guaranteed to
4471 be the non-leaf side. */
4473 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
4474 changed
|| oe
->op
!= rhs2
|| next_changed
,
4477 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
4479 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4481 fprintf (dump_file
, "Transforming ");
4482 print_gimple_stmt (dump_file
, stmt
, 0);
4485 /* If changed is false, this is either opindex == 0
4486 or all outer rhs2's were equal to corresponding oe->op,
4487 and powi_result is NULL.
4488 That means lhs is equivalent before and after reassociation.
4489 Otherwise ensure the old lhs SSA_NAME is not reused and
4490 create a new stmt as well, so that any debug stmts will be
4491 properly adjusted. */
4494 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4495 unsigned int uid
= gimple_uid (stmt
);
4496 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
4498 lhs
= make_ssa_name (TREE_TYPE (lhs
));
4499 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
4501 gimple_set_uid (stmt
, uid
);
4502 gimple_set_visited (stmt
, true);
4503 if (insert_point
== gsi_stmt (gsi
))
4504 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
4506 insert_stmt_after (stmt
, insert_point
);
4510 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
4512 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
4513 gimple_assign_set_rhs2 (stmt
, oe
->op
);
4517 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4519 fprintf (dump_file
, " into ");
4520 print_gimple_stmt (dump_file
, stmt
, 0);
4526 /* Find out how many cycles we need to compute statements chain.
4527 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4528 maximum number of independent statements we may execute per cycle. */
4531 get_required_cycles (int ops_num
, int cpu_width
)
4537 /* While we have more than 2 * cpu_width operands
4538 we may reduce number of operands by cpu_width
4540 res
= ops_num
/ (2 * cpu_width
);
4542 /* Remained operands count may be reduced twice per cycle
4543 until we have only one operand. */
4544 rest
= (unsigned)(ops_num
- res
* cpu_width
);
4545 elog
= exact_log2 (rest
);
4549 res
+= floor_log2 (rest
) + 1;
4554 /* Returns an optimal number of registers to use for computation of
4555 given statements. */
4558 get_reassociation_width (int ops_num
, enum tree_code opc
,
4561 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
4566 if (param_width
> 0)
4567 width
= param_width
;
4569 width
= targetm
.sched
.reassociation_width (opc
, mode
);
4574 /* Get the minimal time required for sequence computation. */
4575 cycles_best
= get_required_cycles (ops_num
, width
);
4577 /* Check if we may use less width and still compute sequence for
4578 the same time. It will allow us to reduce registers usage.
4579 get_required_cycles is monotonically increasing with lower width
4580 so we can perform a binary search for the minimal width that still
4581 results in the optimal cycle count. */
4583 while (width
> width_min
)
4585 int width_mid
= (width
+ width_min
) / 2;
4587 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
4589 else if (width_min
< width_mid
)
4590 width_min
= width_mid
;
4598 /* Recursively rewrite our linearized statements so that the operators
4599 match those in OPS[OPINDEX], putting the computation in rank
4600 order and trying to allow operations to be executed in
4604 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
4605 vec
<operand_entry
*> ops
)
4607 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
4608 int op_num
= ops
.length ();
4609 gcc_assert (op_num
> 0);
4610 int stmt_num
= op_num
- 1;
4611 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
4612 int op_index
= op_num
- 1;
4614 int ready_stmts_end
= 0;
4616 gimple
*stmt1
= NULL
, *stmt2
= NULL
;
4617 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
4619 /* We start expression rewriting from the top statements.
4620 So, in this loop we create a full list of statements
4621 we will work with. */
4622 stmts
[stmt_num
- 1] = stmt
;
4623 for (i
= stmt_num
- 2; i
>= 0; i
--)
4624 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
4626 for (i
= 0; i
< stmt_num
; i
++)
4630 /* Determine whether we should use results of
4631 already handled statements or not. */
4632 if (ready_stmts_end
== 0
4633 && (i
- stmt_index
>= width
|| op_index
< 1))
4634 ready_stmts_end
= i
;
4636 /* Now we choose operands for the next statement. Non zero
4637 value in ready_stmts_end means here that we should use
4638 the result of already generated statements as new operand. */
4639 if (ready_stmts_end
> 0)
4641 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
4642 if (ready_stmts_end
> stmt_index
)
4643 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
4644 else if (op_index
>= 0)
4646 operand_entry
*oe
= ops
[op_index
--];
4647 stmt2
= oe
->stmt_to_insert
;
4652 gcc_assert (stmt_index
< i
);
4653 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
4656 if (stmt_index
>= ready_stmts_end
)
4657 ready_stmts_end
= 0;
4662 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
4663 operand_entry
*oe2
= ops
[op_index
--];
4664 operand_entry
*oe1
= ops
[op_index
--];
4666 stmt2
= oe2
->stmt_to_insert
;
4668 stmt1
= oe1
->stmt_to_insert
;
4671 /* If we emit the last statement then we should put
4672 operands into the last statement. It will also
4674 if (op_index
< 0 && stmt_index
== i
)
4677 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4679 fprintf (dump_file
, "Transforming ");
4680 print_gimple_stmt (dump_file
, stmts
[i
], 0);
4683 /* If the stmt that defines operand has to be inserted, insert it
4686 insert_stmt_before_use (stmts
[i
], stmt1
);
4688 insert_stmt_before_use (stmts
[i
], stmt2
);
4689 stmt1
= stmt2
= NULL
;
4691 /* We keep original statement only for the last one. All
4692 others are recreated. */
4693 if (i
== stmt_num
- 1)
4695 gimple_assign_set_rhs1 (stmts
[i
], op1
);
4696 gimple_assign_set_rhs2 (stmts
[i
], op2
);
4697 update_stmt (stmts
[i
]);
4701 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
4703 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4705 fprintf (dump_file
, " into ");
4706 print_gimple_stmt (dump_file
, stmts
[i
], 0);
4710 remove_visited_stmt_chain (last_rhs1
);
4713 /* Transform STMT, which is really (A +B) + (C + D) into the left
4714 linear form, ((A+B)+C)+D.
4715 Recurse on D if necessary. */
4718 linearize_expr (gimple
*stmt
)
4720 gimple_stmt_iterator gsi
;
4721 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
4722 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
4723 gimple
*oldbinrhs
= binrhs
;
4724 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4725 gimple
*newbinrhs
= NULL
;
4726 struct loop
*loop
= loop_containing_stmt (stmt
);
4727 tree lhs
= gimple_assign_lhs (stmt
);
4729 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
4730 && is_reassociable_op (binrhs
, rhscode
, loop
));
4732 gsi
= gsi_for_stmt (stmt
);
4734 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
4735 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
4736 gimple_assign_rhs_code (binrhs
),
4737 gimple_assign_lhs (binlhs
),
4738 gimple_assign_rhs2 (binrhs
));
4739 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
4740 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
4741 gimple_set_uid (binrhs
, gimple_uid (stmt
));
4743 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
4744 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
4746 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4748 fprintf (dump_file
, "Linearized: ");
4749 print_gimple_stmt (dump_file
, stmt
, 0);
4752 reassociate_stats
.linearized
++;
4755 gsi
= gsi_for_stmt (oldbinrhs
);
4756 reassoc_remove_stmt (&gsi
);
4757 release_defs (oldbinrhs
);
4759 gimple_set_visited (stmt
, true);
4760 gimple_set_visited (binlhs
, true);
4761 gimple_set_visited (binrhs
, true);
4763 /* Tail recurse on the new rhs if it still needs reassociation. */
4764 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
4765 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4766 want to change the algorithm while converting to tuples. */
4767 linearize_expr (stmt
);
4770 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4771 it. Otherwise, return NULL. */
4774 get_single_immediate_use (tree lhs
)
4776 use_operand_p immuse
;
4779 if (TREE_CODE (lhs
) == SSA_NAME
4780 && single_imm_use (lhs
, &immuse
, &immusestmt
)
4781 && is_gimple_assign (immusestmt
))
4787 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4788 representing the negated value. Insertions of any necessary
4789 instructions go before GSI.
4790 This function is recursive in that, if you hand it "a_5" as the
4791 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4792 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4795 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
4797 gimple
*negatedefstmt
= NULL
;
4798 tree resultofnegate
;
4799 gimple_stmt_iterator gsi
;
4802 /* If we are trying to negate a name, defined by an add, negate the
4803 add operands instead. */
4804 if (TREE_CODE (tonegate
) == SSA_NAME
)
4805 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
4806 if (TREE_CODE (tonegate
) == SSA_NAME
4807 && is_gimple_assign (negatedefstmt
)
4808 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
4809 && has_single_use (gimple_assign_lhs (negatedefstmt
))
4810 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
4812 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
4813 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
4814 tree lhs
= gimple_assign_lhs (negatedefstmt
);
4817 gsi
= gsi_for_stmt (negatedefstmt
);
4818 rhs1
= negate_value (rhs1
, &gsi
);
4820 gsi
= gsi_for_stmt (negatedefstmt
);
4821 rhs2
= negate_value (rhs2
, &gsi
);
4823 gsi
= gsi_for_stmt (negatedefstmt
);
4824 lhs
= make_ssa_name (TREE_TYPE (lhs
));
4825 gimple_set_visited (negatedefstmt
, true);
4826 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
4827 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
4828 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4832 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
4833 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
4834 NULL_TREE
, true, GSI_SAME_STMT
);
4836 uid
= gimple_uid (gsi_stmt (gsi
));
4837 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4839 gimple
*stmt
= gsi_stmt (gsi
);
4840 if (gimple_uid (stmt
) != 0)
4842 gimple_set_uid (stmt
, uid
);
4844 return resultofnegate
;
4847 /* Return true if we should break up the subtract in STMT into an add
4848 with negate. This is true when we the subtract operands are really
4849 adds, or the subtract itself is used in an add expression. In
4850 either case, breaking up the subtract into an add with negate
4851 exposes the adds to reassociation. */
4854 should_break_up_subtract (gimple
*stmt
)
4856 tree lhs
= gimple_assign_lhs (stmt
);
4857 tree binlhs
= gimple_assign_rhs1 (stmt
);
4858 tree binrhs
= gimple_assign_rhs2 (stmt
);
4860 struct loop
*loop
= loop_containing_stmt (stmt
);
4862 if (TREE_CODE (binlhs
) == SSA_NAME
4863 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
4866 if (TREE_CODE (binrhs
) == SSA_NAME
4867 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
4870 if (TREE_CODE (lhs
) == SSA_NAME
4871 && (immusestmt
= get_single_immediate_use (lhs
))
4872 && is_gimple_assign (immusestmt
)
4873 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
4874 || (gimple_assign_rhs_code (immusestmt
) == MINUS_EXPR
4875 && gimple_assign_rhs1 (immusestmt
) == lhs
)
4876 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
4881 /* Transform STMT from A - B into A + -B. */
4884 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
4886 tree rhs1
= gimple_assign_rhs1 (stmt
);
4887 tree rhs2
= gimple_assign_rhs2 (stmt
);
4889 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4891 fprintf (dump_file
, "Breaking up subtract ");
4892 print_gimple_stmt (dump_file
, stmt
, 0);
4895 rhs2
= negate_value (rhs2
, gsip
);
4896 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
4900 /* Determine whether STMT is a builtin call that raises an SSA name
4901 to an integer power and has only one use. If so, and this is early
4902 reassociation and unsafe math optimizations are permitted, place
4903 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4904 If any of these conditions does not hold, return FALSE. */
4907 acceptable_pow_call (gcall
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
4910 REAL_VALUE_TYPE c
, cint
;
4912 switch (gimple_call_combined_fn (stmt
))
4915 if (flag_errno_math
)
4918 *base
= gimple_call_arg (stmt
, 0);
4919 arg1
= gimple_call_arg (stmt
, 1);
4921 if (TREE_CODE (arg1
) != REAL_CST
)
4924 c
= TREE_REAL_CST (arg1
);
4926 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
4929 *exponent
= real_to_integer (&c
);
4930 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
4931 if (!real_identical (&c
, &cint
))
4937 *base
= gimple_call_arg (stmt
, 0);
4938 arg1
= gimple_call_arg (stmt
, 1);
4940 if (!tree_fits_shwi_p (arg1
))
4943 *exponent
= tree_to_shwi (arg1
);
4950 /* Expanding negative exponents is generally unproductive, so we don't
4951 complicate matters with those. Exponents of zero and one should
4952 have been handled by expression folding. */
4953 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
4959 /* Try to derive and add operand entry for OP to *OPS. Return false if
4963 try_special_add_to_ops (vec
<operand_entry
*> *ops
,
4964 enum tree_code code
,
4965 tree op
, gimple
* def_stmt
)
4967 tree base
= NULL_TREE
;
4968 HOST_WIDE_INT exponent
= 0;
4970 if (TREE_CODE (op
) != SSA_NAME
4971 || ! has_single_use (op
))
4974 if (code
== MULT_EXPR
4975 && reassoc_insert_powi_p
4976 && flag_unsafe_math_optimizations
4977 && is_gimple_call (def_stmt
)
4978 && acceptable_pow_call (as_a
<gcall
*> (def_stmt
), &base
, &exponent
))
4980 add_repeat_to_ops_vec (ops
, base
, exponent
);
4981 gimple_set_visited (def_stmt
, true);
4984 else if (code
== MULT_EXPR
4985 && is_gimple_assign (def_stmt
)
4986 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
4987 && !HONOR_SNANS (TREE_TYPE (op
))
4988 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op
))
4989 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op
))))
4991 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
4992 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
4993 add_to_ops_vec (ops
, rhs1
);
4994 add_to_ops_vec (ops
, cst
);
4995 gimple_set_visited (def_stmt
, true);
5002 /* Recursively linearize a binary expression that is the RHS of STMT.
5003 Place the operands of the expression tree in the vector named OPS. */
5006 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
5007 bool is_associative
, bool set_visited
)
5009 tree binlhs
= gimple_assign_rhs1 (stmt
);
5010 tree binrhs
= gimple_assign_rhs2 (stmt
);
5011 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
5012 bool binlhsisreassoc
= false;
5013 bool binrhsisreassoc
= false;
5014 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5015 struct loop
*loop
= loop_containing_stmt (stmt
);
5018 gimple_set_visited (stmt
, true);
5020 if (TREE_CODE (binlhs
) == SSA_NAME
)
5022 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
5023 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
5024 && !stmt_could_throw_p (binlhsdef
));
5027 if (TREE_CODE (binrhs
) == SSA_NAME
)
5029 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
5030 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
5031 && !stmt_could_throw_p (binrhsdef
));
5034 /* If the LHS is not reassociable, but the RHS is, we need to swap
5035 them. If neither is reassociable, there is nothing we can do, so
5036 just put them in the ops vector. If the LHS is reassociable,
5037 linearize it. If both are reassociable, then linearize the RHS
5040 if (!binlhsisreassoc
)
5042 /* If this is not a associative operation like division, give up. */
5043 if (!is_associative
)
5045 add_to_ops_vec (ops
, binrhs
);
5049 if (!binrhsisreassoc
)
5051 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5052 add_to_ops_vec (ops
, binrhs
);
5054 if (!try_special_add_to_ops (ops
, rhscode
, binlhs
, binlhsdef
))
5055 add_to_ops_vec (ops
, binlhs
);
5060 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5062 fprintf (dump_file
, "swapping operands of ");
5063 print_gimple_stmt (dump_file
, stmt
, 0);
5066 swap_ssa_operands (stmt
,
5067 gimple_assign_rhs1_ptr (stmt
),
5068 gimple_assign_rhs2_ptr (stmt
));
5071 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5073 fprintf (dump_file
, " is now ");
5074 print_gimple_stmt (dump_file
, stmt
, 0);
5077 /* We want to make it so the lhs is always the reassociative op,
5079 std::swap (binlhs
, binrhs
);
5081 else if (binrhsisreassoc
)
5083 linearize_expr (stmt
);
5084 binlhs
= gimple_assign_rhs1 (stmt
);
5085 binrhs
= gimple_assign_rhs2 (stmt
);
5088 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
5089 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
5091 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
5092 is_associative
, set_visited
);
5094 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5095 add_to_ops_vec (ops
, binrhs
);
5098 /* Repropagate the negates back into subtracts, since no other pass
5099 currently does it. */
5102 repropagate_negates (void)
5107 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
5109 gimple
*user
= get_single_immediate_use (negate
);
5111 if (!user
|| !is_gimple_assign (user
))
5114 /* The negate operand can be either operand of a PLUS_EXPR
5115 (it can be the LHS if the RHS is a constant for example).
5117 Force the negate operand to the RHS of the PLUS_EXPR, then
5118 transform the PLUS_EXPR into a MINUS_EXPR. */
5119 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
5121 /* If the negated operand appears on the LHS of the
5122 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5123 to force the negated operand to the RHS of the PLUS_EXPR. */
5124 if (gimple_assign_rhs1 (user
) == negate
)
5126 swap_ssa_operands (user
,
5127 gimple_assign_rhs1_ptr (user
),
5128 gimple_assign_rhs2_ptr (user
));
5131 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5132 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5133 if (gimple_assign_rhs2 (user
) == negate
)
5135 tree rhs1
= gimple_assign_rhs1 (user
);
5136 tree rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
5137 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
5138 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
5142 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
5144 if (gimple_assign_rhs1 (user
) == negate
)
5149 which we transform into
5152 This pushes down the negate which we possibly can merge
5153 into some other operation, hence insert it into the
5154 plus_negates vector. */
5155 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
5156 tree a
= gimple_assign_rhs1 (feed
);
5157 tree b
= gimple_assign_rhs2 (user
);
5158 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
5159 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
5160 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
5161 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
5162 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
5163 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
5164 user
= gsi_stmt (gsi2
);
5166 reassoc_remove_stmt (&gsi
);
5167 release_defs (feed
);
5168 plus_negates
.safe_push (gimple_assign_lhs (user
));
5172 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5173 rid of one operation. */
5174 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
5175 tree a
= gimple_assign_rhs1 (feed
);
5176 tree rhs1
= gimple_assign_rhs1 (user
);
5177 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
5178 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
5179 update_stmt (gsi_stmt (gsi
));
5185 /* Returns true if OP is of a type for which we can do reassociation.
5186 That is for integral or non-saturating fixed-point types, and for
5187 floating point type when associative-math is enabled. */
5190 can_reassociate_p (tree op
)
5192 tree type
= TREE_TYPE (op
);
5193 if (TREE_CODE (op
) == SSA_NAME
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op
))
5195 if ((ANY_INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
5196 || NON_SAT_FIXED_POINT_TYPE_P (type
)
5197 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
5202 /* Break up subtract operations in block BB.
5204 We do this top down because we don't know whether the subtract is
5205 part of a possible chain of reassociation except at the top.
5214 we want to break up k = t - q, but we won't until we've transformed q
5215 = b - r, which won't be broken up until we transform b = c - d.
5217 En passant, clear the GIMPLE visited flag on every statement
5218 and set UIDs within each basic block. */
5221 break_up_subtract_bb (basic_block bb
)
5223 gimple_stmt_iterator gsi
;
5225 unsigned int uid
= 1;
5227 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5229 gimple
*stmt
= gsi_stmt (gsi
);
5230 gimple_set_visited (stmt
, false);
5231 gimple_set_uid (stmt
, uid
++);
5233 if (!is_gimple_assign (stmt
)
5234 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
5237 /* Look for simple gimple subtract operations. */
5238 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
5240 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
5241 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
5244 /* Check for a subtract used only in an addition. If this
5245 is the case, transform it into add of a negate for better
5246 reassociation. IE transform C = A-B into C = A + -B if C
5247 is only used in an addition. */
5248 if (should_break_up_subtract (stmt
))
5249 break_up_subtract (stmt
, &gsi
);
5251 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
5252 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
5253 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
5255 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
5257 son
= next_dom_son (CDI_DOMINATORS
, son
))
5258 break_up_subtract_bb (son
);
5261 /* Used for repeated factor analysis. */
5262 struct repeat_factor
5264 /* An SSA name that occurs in a multiply chain. */
5267 /* Cached rank of the factor. */
5270 /* Number of occurrences of the factor in the chain. */
5271 HOST_WIDE_INT count
;
5273 /* An SSA name representing the product of this factor and
5274 all factors appearing later in the repeated factor vector. */
5279 static vec
<repeat_factor
> repeat_factor_vec
;
5281 /* Used for sorting the repeat factor vector. Sort primarily by
5282 ascending occurrence count, secondarily by descending rank. */
5285 compare_repeat_factors (const void *x1
, const void *x2
)
5287 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
5288 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
5290 if (rf1
->count
!= rf2
->count
)
5291 return rf1
->count
- rf2
->count
;
5293 return rf2
->rank
- rf1
->rank
;
5296 /* Look for repeated operands in OPS in the multiply tree rooted at
5297 STMT. Replace them with an optimal sequence of multiplies and powi
5298 builtin calls, and remove the used operands from OPS. Return an
5299 SSA name representing the value of the replacement sequence. */
5302 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
5304 unsigned i
, j
, vec_len
;
5307 repeat_factor
*rf1
, *rf2
;
5308 repeat_factor rfnew
;
5309 tree result
= NULL_TREE
;
5310 tree target_ssa
, iter_result
;
5311 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
5312 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
5313 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5314 gimple
*mul_stmt
, *pow_stmt
;
5316 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5321 /* Allocate the repeated factor vector. */
5322 repeat_factor_vec
.create (10);
5324 /* Scan the OPS vector for all SSA names in the product and build
5325 up a vector of occurrence counts for each factor. */
5326 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
5328 if (TREE_CODE (oe
->op
) == SSA_NAME
)
5330 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5332 if (rf1
->factor
== oe
->op
)
5334 rf1
->count
+= oe
->count
;
5339 if (j
>= repeat_factor_vec
.length ())
5341 rfnew
.factor
= oe
->op
;
5342 rfnew
.rank
= oe
->rank
;
5343 rfnew
.count
= oe
->count
;
5344 rfnew
.repr
= NULL_TREE
;
5345 repeat_factor_vec
.safe_push (rfnew
);
5350 /* Sort the repeated factor vector by (a) increasing occurrence count,
5351 and (b) decreasing rank. */
5352 repeat_factor_vec
.qsort (compare_repeat_factors
);
5354 /* It is generally best to combine as many base factors as possible
5355 into a product before applying __builtin_powi to the result.
5356 However, the sort order chosen for the repeated factor vector
5357 allows us to cache partial results for the product of the base
5358 factors for subsequent use. When we already have a cached partial
5359 result from a previous iteration, it is best to make use of it
5360 before looking for another __builtin_pow opportunity.
5362 As an example, consider x * x * y * y * y * z * z * z * z.
5363 We want to first compose the product x * y * z, raise it to the
5364 second power, then multiply this by y * z, and finally multiply
5365 by z. This can be done in 5 multiplies provided we cache y * z
5366 for use in both expressions:
5374 If we instead ignored the cached y * z and first multiplied by
5375 the __builtin_pow opportunity z * z, we would get the inferior:
5384 vec_len
= repeat_factor_vec
.length ();
5386 /* Repeatedly look for opportunities to create a builtin_powi call. */
5389 HOST_WIDE_INT power
;
5391 /* First look for the largest cached product of factors from
5392 preceding iterations. If found, create a builtin_powi for
5393 it if the minimum occurrence count for its factors is at
5394 least 2, or just use this cached product as our next
5395 multiplicand if the minimum occurrence count is 1. */
5396 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5398 if (rf1
->repr
&& rf1
->count
> 0)
5408 iter_result
= rf1
->repr
;
5410 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5414 fputs ("Multiplying by cached product ", dump_file
);
5415 for (elt
= j
; elt
< vec_len
; elt
++)
5417 rf
= &repeat_factor_vec
[elt
];
5418 print_generic_expr (dump_file
, rf
->factor
);
5419 if (elt
< vec_len
- 1)
5420 fputs (" * ", dump_file
);
5422 fputs ("\n", dump_file
);
5427 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5428 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
5429 build_int_cst (integer_type_node
,
5431 gimple_call_set_lhs (pow_stmt
, iter_result
);
5432 gimple_set_location (pow_stmt
, gimple_location (stmt
));
5433 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
5434 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
5436 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5440 fputs ("Building __builtin_pow call for cached product (",
5442 for (elt
= j
; elt
< vec_len
; elt
++)
5444 rf
= &repeat_factor_vec
[elt
];
5445 print_generic_expr (dump_file
, rf
->factor
);
5446 if (elt
< vec_len
- 1)
5447 fputs (" * ", dump_file
);
5449 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
5456 /* Otherwise, find the first factor in the repeated factor
5457 vector whose occurrence count is at least 2. If no such
5458 factor exists, there are no builtin_powi opportunities
5460 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5462 if (rf1
->count
>= 2)
5471 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5475 fputs ("Building __builtin_pow call for (", dump_file
);
5476 for (elt
= j
; elt
< vec_len
; elt
++)
5478 rf
= &repeat_factor_vec
[elt
];
5479 print_generic_expr (dump_file
, rf
->factor
);
5480 if (elt
< vec_len
- 1)
5481 fputs (" * ", dump_file
);
5483 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
5486 reassociate_stats
.pows_created
++;
5488 /* Visit each element of the vector in reverse order (so that
5489 high-occurrence elements are visited first, and within the
5490 same occurrence count, lower-ranked elements are visited
5491 first). Form a linear product of all elements in this order
5492 whose occurrencce count is at least that of element J.
5493 Record the SSA name representing the product of each element
5494 with all subsequent elements in the vector. */
5495 if (j
== vec_len
- 1)
5496 rf1
->repr
= rf1
->factor
;
5499 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
5503 rf1
= &repeat_factor_vec
[ii
];
5504 rf2
= &repeat_factor_vec
[ii
+ 1];
5506 /* Init the last factor's representative to be itself. */
5508 rf2
->repr
= rf2
->factor
;
5513 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5514 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
5516 gimple_set_location (mul_stmt
, gimple_location (stmt
));
5517 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
5518 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
5519 rf1
->repr
= target_ssa
;
5521 /* Don't reprocess the multiply we just introduced. */
5522 gimple_set_visited (mul_stmt
, true);
5526 /* Form a call to __builtin_powi for the maximum product
5527 just formed, raised to the power obtained earlier. */
5528 rf1
= &repeat_factor_vec
[j
];
5529 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5530 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
5531 build_int_cst (integer_type_node
,
5533 gimple_call_set_lhs (pow_stmt
, iter_result
);
5534 gimple_set_location (pow_stmt
, gimple_location (stmt
));
5535 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
5536 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
5539 /* If we previously formed at least one other builtin_powi call,
5540 form the product of this one and those others. */
5543 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5544 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
5545 result
, iter_result
);
5546 gimple_set_location (mul_stmt
, gimple_location (stmt
));
5547 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
5548 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
5549 gimple_set_visited (mul_stmt
, true);
5550 result
= new_result
;
5553 result
= iter_result
;
5555 /* Decrement the occurrence count of each element in the product
5556 by the count found above, and remove this many copies of each
5558 for (i
= j
; i
< vec_len
; i
++)
5563 rf1
= &repeat_factor_vec
[i
];
5564 rf1
->count
-= power
;
5566 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
5568 if (oe
->op
== rf1
->factor
)
5572 ops
->ordered_remove (n
);
5588 /* At this point all elements in the repeated factor vector have a
5589 remaining occurrence count of 0 or 1, and those with a count of 1
5590 don't have cached representatives. Re-sort the ops vector and
5592 ops
->qsort (sort_by_operand_rank
);
5593 repeat_factor_vec
.release ();
5595 /* Return the final product computed herein. Note that there may
5596 still be some elements with single occurrence count left in OPS;
5597 those will be handled by the normal reassociation logic. */
5601 /* Attempt to optimize
5602 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5603 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5606 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
5610 unsigned int length
= ops
->length ();
5611 tree cst
= ops
->last ()->op
;
5613 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
5616 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
5618 if (TREE_CODE (oe
->op
) == SSA_NAME
5619 && has_single_use (oe
->op
))
5621 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
5622 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
5625 switch (gimple_call_combined_fn (old_call
))
5628 CASE_CFN_COPYSIGN_FN
:
5629 arg0
= gimple_call_arg (old_call
, 0);
5630 arg1
= gimple_call_arg (old_call
, 1);
5631 /* The first argument of copysign must be a constant,
5632 otherwise there's nothing to do. */
5633 if (TREE_CODE (arg0
) == REAL_CST
)
5635 tree type
= TREE_TYPE (arg0
);
5636 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
5637 /* If we couldn't fold to a single constant, skip it.
5638 That happens e.g. for inexact multiplication when
5640 if (mul
== NULL_TREE
)
5642 /* Instead of adjusting OLD_CALL, let's build a new
5643 call to not leak the LHS and prevent keeping bogus
5644 debug statements. DCE will clean up the old call. */
5646 if (gimple_call_internal_p (old_call
))
5647 new_call
= gimple_build_call_internal
5648 (IFN_COPYSIGN
, 2, mul
, arg1
);
5650 new_call
= gimple_build_call
5651 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
5652 tree lhs
= make_ssa_name (type
);
5653 gimple_call_set_lhs (new_call
, lhs
);
5654 gimple_set_location (new_call
,
5655 gimple_location (old_call
));
5656 insert_stmt_after (new_call
, old_call
);
5657 /* We've used the constant, get rid of it. */
5659 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
5660 /* Handle the CST1 < 0 case by negating the result. */
5663 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
5665 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
5666 insert_stmt_after (negate_stmt
, new_call
);
5671 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5673 fprintf (dump_file
, "Optimizing copysign: ");
5674 print_generic_expr (dump_file
, cst
);
5675 fprintf (dump_file
, " * COPYSIGN (");
5676 print_generic_expr (dump_file
, arg0
);
5677 fprintf (dump_file
, ", ");
5678 print_generic_expr (dump_file
, arg1
);
5679 fprintf (dump_file
, ") into %sCOPYSIGN (",
5680 cst1_neg
? "-" : "");
5681 print_generic_expr (dump_file
, mul
);
5682 fprintf (dump_file
, ", ");
5683 print_generic_expr (dump_file
, arg1
);
5684 fprintf (dump_file
, "\n");
5697 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5700 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
5704 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5706 fprintf (dump_file
, "Transforming ");
5707 print_gimple_stmt (dump_file
, stmt
, 0);
5710 rhs1
= gimple_assign_rhs1 (stmt
);
5711 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
5713 remove_visited_stmt_chain (rhs1
);
5715 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5717 fprintf (dump_file
, " into ");
5718 print_gimple_stmt (dump_file
, stmt
, 0);
5722 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5725 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
5726 tree rhs1
, tree rhs2
)
5728 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5730 fprintf (dump_file
, "Transforming ");
5731 print_gimple_stmt (dump_file
, stmt
, 0);
5734 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
5735 update_stmt (gsi_stmt (*gsi
));
5736 remove_visited_stmt_chain (rhs1
);
5738 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5740 fprintf (dump_file
, " into ");
5741 print_gimple_stmt (dump_file
, stmt
, 0);
5745 /* Reassociate expressions in basic block BB and its post-dominator as
5748 Bubble up return status from maybe_optimize_range_tests. */
5751 reassociate_bb (basic_block bb
)
5753 gimple_stmt_iterator gsi
;
5755 gimple
*stmt
= last_stmt (bb
);
5756 bool cfg_cleanup_needed
= false;
5758 if (stmt
&& !gimple_visited_p (stmt
))
5759 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
5761 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
5763 stmt
= gsi_stmt (gsi
);
5765 if (is_gimple_assign (stmt
)
5766 && !stmt_could_throw_p (stmt
))
5768 tree lhs
, rhs1
, rhs2
;
5769 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
5771 /* If this is not a gimple binary expression, there is
5772 nothing for us to do with it. */
5773 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
5776 /* If this was part of an already processed statement,
5777 we don't need to touch it again. */
5778 if (gimple_visited_p (stmt
))
5780 /* This statement might have become dead because of previous
5782 if (has_zero_uses (gimple_get_lhs (stmt
)))
5784 reassoc_remove_stmt (&gsi
);
5785 release_defs (stmt
);
5786 /* We might end up removing the last stmt above which
5787 places the iterator to the end of the sequence.
5788 Reset it to the last stmt in this case which might
5789 be the end of the sequence as well if we removed
5790 the last statement of the sequence. In which case
5791 we need to bail out. */
5792 if (gsi_end_p (gsi
))
5794 gsi
= gsi_last_bb (bb
);
5795 if (gsi_end_p (gsi
))
5802 lhs
= gimple_assign_lhs (stmt
);
5803 rhs1
= gimple_assign_rhs1 (stmt
);
5804 rhs2
= gimple_assign_rhs2 (stmt
);
5806 /* For non-bit or min/max operations we can't associate
5807 all types. Verify that here. */
5808 if (rhs_code
!= BIT_IOR_EXPR
5809 && rhs_code
!= BIT_AND_EXPR
5810 && rhs_code
!= BIT_XOR_EXPR
5811 && rhs_code
!= MIN_EXPR
5812 && rhs_code
!= MAX_EXPR
5813 && (!can_reassociate_p (lhs
)
5814 || !can_reassociate_p (rhs1
)
5815 || !can_reassociate_p (rhs2
)))
5818 if (associative_tree_code (rhs_code
))
5820 auto_vec
<operand_entry
*> ops
;
5821 tree powi_result
= NULL_TREE
;
5822 bool is_vector
= VECTOR_TYPE_P (TREE_TYPE (lhs
));
5824 /* There may be no immediate uses left by the time we
5825 get here because we may have eliminated them all. */
5826 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
5829 gimple_set_visited (stmt
, true);
5830 linearize_expr_tree (&ops
, stmt
, true, true);
5831 ops
.qsort (sort_by_operand_rank
);
5832 int orig_len
= ops
.length ();
5833 optimize_ops_list (rhs_code
, &ops
);
5834 if (undistribute_ops_list (rhs_code
, &ops
,
5835 loop_containing_stmt (stmt
)))
5837 ops
.qsort (sort_by_operand_rank
);
5838 optimize_ops_list (rhs_code
, &ops
);
5841 if (rhs_code
== PLUS_EXPR
5842 && transform_add_to_multiply (&ops
))
5843 ops
.qsort (sort_by_operand_rank
);
5845 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
5848 optimize_vec_cond_expr (rhs_code
, &ops
);
5850 optimize_range_tests (rhs_code
, &ops
);
5853 if (rhs_code
== MULT_EXPR
&& !is_vector
)
5855 attempt_builtin_copysign (&ops
);
5857 if (reassoc_insert_powi_p
5858 && flag_unsafe_math_optimizations
)
5859 powi_result
= attempt_builtin_powi (stmt
, &ops
);
5862 operand_entry
*last
;
5863 bool negate_result
= false;
5864 if (ops
.length () > 1
5865 && rhs_code
== MULT_EXPR
)
5868 if ((integer_minus_onep (last
->op
)
5869 || real_minus_onep (last
->op
))
5870 && !HONOR_SNANS (TREE_TYPE (lhs
))
5871 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs
))
5872 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs
))))
5875 negate_result
= true;
5880 /* If the operand vector is now empty, all operands were
5881 consumed by the __builtin_powi optimization. */
5882 if (ops
.length () == 0)
5883 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
5884 else if (ops
.length () == 1)
5886 tree last_op
= ops
.last ()->op
;
5888 /* If the stmt that defines operand has to be inserted, insert it
5890 if (ops
.last ()->stmt_to_insert
)
5891 insert_stmt_before_use (stmt
, ops
.last ()->stmt_to_insert
);
5893 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
5896 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
5900 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
5901 int ops_num
= ops
.length ();
5902 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
5904 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5906 "Width = %d was chosen for reassociation\n", width
);
5909 /* For binary bit operations, if there are at least 3
5910 operands and the last last operand in OPS is a constant,
5911 move it to the front. This helps ensure that we generate
5912 (X & Y) & C rather than (X & C) & Y. The former will
5913 often match a canonical bit test when we get to RTL. */
5914 if (ops
.length () > 2
5915 && (rhs_code
== BIT_AND_EXPR
5916 || rhs_code
== BIT_IOR_EXPR
5917 || rhs_code
== BIT_XOR_EXPR
)
5918 && TREE_CODE (ops
.last ()->op
) == INTEGER_CST
)
5919 std::swap (*ops
[0], *ops
[ops_num
- 1]);
5922 && ops
.length () > 3)
5923 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
5927 /* When there are three operands left, we want
5928 to make sure the ones that get the double
5929 binary op are chosen wisely. */
5930 int len
= ops
.length ();
5932 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
5934 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
5940 /* If we combined some repeated factors into a
5941 __builtin_powi call, multiply that result by the
5942 reassociated operands. */
5945 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
5946 tree type
= TREE_TYPE (lhs
);
5947 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
5949 gimple_set_lhs (lhs_stmt
, target_ssa
);
5950 update_stmt (lhs_stmt
);
5953 target_ssa
= new_lhs
;
5956 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
5957 powi_result
, target_ssa
);
5958 gimple_set_location (mul_stmt
, gimple_location (stmt
));
5959 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
5960 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
5966 stmt
= SSA_NAME_DEF_STMT (lhs
);
5967 tree tmp
= make_ssa_name (TREE_TYPE (lhs
));
5968 gimple_set_lhs (stmt
, tmp
);
5971 gassign
*neg_stmt
= gimple_build_assign (lhs
, NEGATE_EXPR
,
5973 gimple_set_uid (neg_stmt
, gimple_uid (stmt
));
5974 gsi_insert_after (&gsi
, neg_stmt
, GSI_NEW_STMT
);
5980 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
5982 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
5983 cfg_cleanup_needed
|= reassociate_bb (son
);
5985 return cfg_cleanup_needed
;
5988 /* Add jumps around shifts for range tests turned into bit tests.
5989 For each SSA_NAME VAR we have code like:
5990 VAR = ...; // final stmt of range comparison
5991 // bit test here...;
5992 OTHERVAR = ...; // final stmt of the bit test sequence
5993 RES = VAR | OTHERVAR;
5994 Turn the above into:
6001 // bit test here...;
6004 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6012 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
6014 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
6017 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
6019 && is_gimple_assign (use_stmt
)
6020 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
6021 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
6023 basic_block cond_bb
= gimple_bb (def_stmt
);
6024 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
6025 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
6027 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6028 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
6029 build_zero_cst (TREE_TYPE (var
)),
6030 NULL_TREE
, NULL_TREE
);
6031 location_t loc
= gimple_location (use_stmt
);
6032 gimple_set_location (g
, loc
);
6033 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
6035 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
6036 etrue
->probability
= profile_probability::even ();
6037 edge efalse
= find_edge (cond_bb
, then_bb
);
6038 efalse
->flags
= EDGE_FALSE_VALUE
;
6039 efalse
->probability
-= etrue
->probability
;
6040 then_bb
->count
-= etrue
->count ();
6042 tree othervar
= NULL_TREE
;
6043 if (gimple_assign_rhs1 (use_stmt
) == var
)
6044 othervar
= gimple_assign_rhs2 (use_stmt
);
6045 else if (gimple_assign_rhs2 (use_stmt
) == var
)
6046 othervar
= gimple_assign_rhs1 (use_stmt
);
6049 tree lhs
= gimple_assign_lhs (use_stmt
);
6050 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
6051 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
6052 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
6053 gsi
= gsi_for_stmt (use_stmt
);
6054 gsi_remove (&gsi
, true);
6056 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
6057 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
6059 reassoc_branch_fixups
.release ();
6062 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
6063 void debug_ops_vector (vec
<operand_entry
*> ops
);
6065 /* Dump the operand entry vector OPS to FILE. */
6068 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
6073 FOR_EACH_VEC_ELT (ops
, i
, oe
)
6075 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
6076 print_generic_expr (file
, oe
->op
);
6077 fprintf (file
, "\n");
6081 /* Dump the operand entry vector OPS to STDERR. */
6084 debug_ops_vector (vec
<operand_entry
*> ops
)
6086 dump_ops_vector (stderr
, ops
);
6089 /* Bubble up return status from reassociate_bb. */
6094 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
6095 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
6098 /* Initialize the reassociation pass. */
6105 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
6107 /* Find the loops, so that we can prevent moving calculations in
6109 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
6111 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
6113 next_operand_entry_id
= 0;
6115 /* Reverse RPO (Reverse Post Order) will give us something where
6116 deeper loops come later. */
6117 pre_and_rev_post_order_compute (NULL
, bbs
, false);
6118 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
6119 operand_rank
= new hash_map
<tree
, long>;
6121 /* Give each default definition a distinct rank. This includes
6122 parameters and the static chain. Walk backwards over all
6123 SSA names so that we get proper rank ordering according
6124 to tree_swap_operands_p. */
6125 for (i
= num_ssa_names
- 1; i
> 0; --i
)
6127 tree name
= ssa_name (i
);
6128 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
6129 insert_operand_rank (name
, ++rank
);
6132 /* Set up rank for each BB */
6133 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
6134 bb_rank
[bbs
[i
]] = ++rank
<< 16;
6137 calculate_dominance_info (CDI_POST_DOMINATORS
);
6138 plus_negates
= vNULL
;
6141 /* Cleanup after the reassociation pass, and print stats if
6147 statistics_counter_event (cfun
, "Linearized",
6148 reassociate_stats
.linearized
);
6149 statistics_counter_event (cfun
, "Constants eliminated",
6150 reassociate_stats
.constants_eliminated
);
6151 statistics_counter_event (cfun
, "Ops eliminated",
6152 reassociate_stats
.ops_eliminated
);
6153 statistics_counter_event (cfun
, "Statements rewritten",
6154 reassociate_stats
.rewritten
);
6155 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
6156 reassociate_stats
.pows_encountered
);
6157 statistics_counter_event (cfun
, "Built-in powi calls created",
6158 reassociate_stats
.pows_created
);
6160 delete operand_rank
;
6161 operand_entry_pool
.release ();
6163 plus_negates
.release ();
6164 free_dominance_info (CDI_POST_DOMINATORS
);
6165 loop_optimizer_finalize ();
6168 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6169 insertion of __builtin_powi calls.
6171 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6172 optimization of a gimple conditional. Otherwise returns zero. */
6175 execute_reassoc (bool insert_powi_p
)
6177 reassoc_insert_powi_p
= insert_powi_p
;
6181 bool cfg_cleanup_needed
;
6182 cfg_cleanup_needed
= do_reassoc ();
6183 repropagate_negates ();
6187 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
6192 const pass_data pass_data_reassoc
=
6194 GIMPLE_PASS
, /* type */
6195 "reassoc", /* name */
6196 OPTGROUP_NONE
, /* optinfo_flags */
6197 TV_TREE_REASSOC
, /* tv_id */
6198 ( PROP_cfg
| PROP_ssa
), /* properties_required */
6199 0, /* properties_provided */
6200 0, /* properties_destroyed */
6201 0, /* todo_flags_start */
6202 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
6205 class pass_reassoc
: public gimple_opt_pass
6208 pass_reassoc (gcc::context
*ctxt
)
6209 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
6212 /* opt_pass methods: */
6213 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
6214 void set_pass_param (unsigned int n
, bool param
)
6216 gcc_assert (n
== 0);
6217 insert_powi_p
= param
;
6219 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
6220 virtual unsigned int execute (function
*)
6221 { return execute_reassoc (insert_powi_p
); }
6224 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6225 point 3a in the pass header comment. */
6227 }; // class pass_reassoc
6232 make_pass_reassoc (gcc::context
*ctxt
)
6234 return new pass_reassoc (ctxt
);