1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
35 #include "gimple-pretty-print.h"
36 #include "tree-inline.h"
37 #include "internal-fn.h"
38 #include "gimple-fold.h"
40 #include "gimple-iterator.h"
41 #include "gimplify-me.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
48 #include "tree-iterator.h"
49 #include "tree-pass.h"
50 #include "alloc-pool.h"
51 #include "langhooks.h"
55 #include "diagnostic-core.h"
58 #include "insn-codes.h"
59 #include "optabs-tree.h"
61 /* This is a simple global reassociation pass. It is, in part, based
62 on the LLVM pass of the same name (They do some things more/less
63 than we do, in different orders, etc).
65 It consists of five steps:
67 1. Breaking up subtract operations into addition + negate, where
68 it would promote the reassociation of adds.
70 2. Left linearization of the expression trees, so that (A+B)+(C+D)
71 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
72 During linearization, we place the operands of the binary
73 expressions into a vector of operand_entry_t
75 3. Optimization of the operand lists, eliminating things like a +
78 3a. Combine repeated factors with the same occurrence counts
79 into a __builtin_powi call that will later be optimized into
80 an optimal number of multiplies.
82 4. Rewrite the expression trees we linearized and optimized so
83 they are in proper rank order.
85 5. Repropagate negates, as nothing else will clean it up ATM.
87 A bit of theory on #4, since nobody seems to write anything down
88 about why it makes sense to do it the way they do it:
90 We could do this much nicer theoretically, but don't (for reasons
91 explained after how to do it theoretically nice :P).
93 In order to promote the most redundancy elimination, you want
94 binary expressions whose operands are the same rank (or
95 preferably, the same value) exposed to the redundancy eliminator,
96 for possible elimination.
98 So the way to do this if we really cared, is to build the new op
99 tree from the leaves to the roots, merging as you go, and putting the
100 new op on the end of the worklist, until you are left with one
101 thing on the worklist.
103 IE if you have to rewrite the following set of operands (listed with
104 rank in parentheses), with opcode PLUS_EXPR:
106 a (1), b (1), c (1), d (2), e (2)
109 We start with our merge worklist empty, and the ops list with all of
112 You want to first merge all leaves of the same rank, as much as
115 So first build a binary op of
117 mergetmp = a + b, and put "mergetmp" on the merge worklist.
119 Because there is no three operand form of PLUS_EXPR, c is not going to
120 be exposed to redundancy elimination as a rank 1 operand.
122 So you might as well throw it on the merge worklist (you could also
123 consider it to now be a rank two operand, and merge it with d and e,
124 but in this case, you then have evicted e from a binary op. So at
125 least in this situation, you can't win.)
127 Then build a binary op of d + e
130 and put mergetmp2 on the merge worklist.
132 so merge worklist = {mergetmp, c, mergetmp2}
134 Continue building binary ops of these operations until you have only
135 one operation left on the worklist.
140 mergetmp3 = mergetmp + c
142 worklist = {mergetmp2, mergetmp3}
144 mergetmp4 = mergetmp2 + mergetmp3
146 worklist = {mergetmp4}
148 because we have one operation left, we can now just set the original
149 statement equal to the result of that operation.
151 This will at least expose a + b and d + e to redundancy elimination
152 as binary operations.
154 For extra points, you can reuse the old statements to build the
155 mergetmps, since you shouldn't run out.
157 So why don't we do this?
159 Because it's expensive, and rarely will help. Most trees we are
160 reassociating have 3 or less ops. If they have 2 ops, they already
161 will be written into a nice single binary op. If you have 3 ops, a
162 single simple check suffices to tell you whether the first two are of the
163 same rank. If so, you know to order it
166 newstmt = mergetmp + op3
170 newstmt = mergetmp + op1
172 If all three are of the same rank, you can't expose them all in a
173 single binary operator anyway, so the above is *still* the best you
176 Thus, this is what we do. When we have three ops left, we check to see
177 what order to put them in, and call it a day. As a nod to vector sum
178 reduction, we check if any of the ops are really a phi node that is a
179 destructive update for the associating op, and keep the destructive
180 update together for vector sum reduction recognition. */
187 int constants_eliminated
;
190 int pows_encountered
;
194 /* Operator, rank pair. */
195 typedef struct operand_entry
203 static object_allocator
<operand_entry
> operand_entry_pool
204 ("operand entry pool");
206 /* This is used to assign a unique ID to each struct operand_entry
207 so that qsort results are identical on different hosts. */
208 static int next_operand_entry_id
;
210 /* Starting rank number for a given basic block, so that we can rank
211 operations using unmovable instructions in that BB based on the bb
213 static long *bb_rank
;
215 /* Operand->rank hashtable. */
216 static hash_map
<tree
, long> *operand_rank
;
218 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
219 all basic blocks the CFG should be adjusted - basic blocks
220 split right after that SSA_NAME's definition statement and before
221 the only use, which must be a bit ior. */
222 static vec
<tree
> reassoc_branch_fixups
;
225 static long get_rank (tree
);
226 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
228 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
229 possibly added by gsi_remove. */
232 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
234 gimple
*stmt
= gsi_stmt (*gsi
);
236 if (!MAY_HAVE_DEBUG_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
237 return gsi_remove (gsi
, true);
239 gimple_stmt_iterator prev
= *gsi
;
241 unsigned uid
= gimple_uid (stmt
);
242 basic_block bb
= gimple_bb (stmt
);
243 bool ret
= gsi_remove (gsi
, true);
244 if (!gsi_end_p (prev
))
247 prev
= gsi_start_bb (bb
);
248 gimple
*end_stmt
= gsi_stmt (*gsi
);
249 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
251 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
252 gimple_set_uid (stmt
, uid
);
258 /* Bias amount for loop-carried phis. We want this to be larger than
259 the depth of any reassociation tree we can see, but not larger than
260 the rank difference between two blocks. */
261 #define PHI_LOOP_BIAS (1 << 15)
263 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
264 an innermost loop, and the phi has only a single use which is inside
265 the loop, then the rank is the block rank of the loop latch plus an
266 extra bias for the loop-carried dependence. This causes expressions
267 calculated into an accumulator variable to be independent for each
268 iteration of the loop. If STMT is some other phi, the rank is the
269 block rank of its containing block. */
271 phi_rank (gimple
*stmt
)
273 basic_block bb
= gimple_bb (stmt
);
274 struct loop
*father
= bb
->loop_father
;
280 /* We only care about real loops (those with a latch). */
282 return bb_rank
[bb
->index
];
284 /* Interesting phis must be in headers of innermost loops. */
285 if (bb
!= father
->header
287 return bb_rank
[bb
->index
];
289 /* Ignore virtual SSA_NAMEs. */
290 res
= gimple_phi_result (stmt
);
291 if (virtual_operand_p (res
))
292 return bb_rank
[bb
->index
];
294 /* The phi definition must have a single use, and that use must be
295 within the loop. Otherwise this isn't an accumulator pattern. */
296 if (!single_imm_use (res
, &use
, &use_stmt
)
297 || gimple_bb (use_stmt
)->loop_father
!= father
)
298 return bb_rank
[bb
->index
];
300 /* Look for phi arguments from within the loop. If found, bias this phi. */
301 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
303 tree arg
= gimple_phi_arg_def (stmt
, i
);
304 if (TREE_CODE (arg
) == SSA_NAME
305 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
307 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
308 if (gimple_bb (def_stmt
)->loop_father
== father
)
309 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
313 /* Must be an uninteresting phi. */
314 return bb_rank
[bb
->index
];
317 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
318 loop-carried dependence of an innermost loop, return TRUE; else
321 loop_carried_phi (tree exp
)
326 if (TREE_CODE (exp
) != SSA_NAME
327 || SSA_NAME_IS_DEFAULT_DEF (exp
))
330 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
332 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
335 /* Non-loop-carried phis have block rank. Loop-carried phis have
336 an additional bias added in. If this phi doesn't have block rank,
337 it's biased and should not be propagated. */
338 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
340 if (phi_rank (phi_stmt
) != block_rank
)
346 /* Return the maximum of RANK and the rank that should be propagated
347 from expression OP. For most operands, this is just the rank of OP.
348 For loop-carried phis, the value is zero to avoid undoing the bias
349 in favor of the phi. */
351 propagate_rank (long rank
, tree op
)
355 if (loop_carried_phi (op
))
358 op_rank
= get_rank (op
);
360 return MAX (rank
, op_rank
);
363 /* Look up the operand rank structure for expression E. */
366 find_operand_rank (tree e
)
368 long *slot
= operand_rank
->get (e
);
369 return slot
? *slot
: -1;
372 /* Insert {E,RANK} into the operand rank hashtable. */
375 insert_operand_rank (tree e
, long rank
)
377 gcc_assert (rank
> 0);
378 gcc_assert (!operand_rank
->put (e
, rank
));
381 /* Given an expression E, return the rank of the expression. */
386 /* SSA_NAME's have the rank of the expression they are the result
388 For globals and uninitialized values, the rank is 0.
389 For function arguments, use the pre-setup rank.
390 For PHI nodes, stores, asm statements, etc, we use the rank of
392 For simple operations, the rank is the maximum rank of any of
393 its operands, or the bb_rank, whichever is less.
394 I make no claims that this is optimal, however, it gives good
397 /* We make an exception to the normal ranking system to break
398 dependences of accumulator variables in loops. Suppose we
399 have a simple one-block loop containing:
406 As shown, each iteration of the calculation into x is fully
407 dependent upon the iteration before it. We would prefer to
408 see this in the form:
415 If the loop is unrolled, the calculations of b and c from
416 different iterations can be interleaved.
418 To obtain this result during reassociation, we bias the rank
419 of the phi definition x_1 upward, when it is recognized as an
420 accumulator pattern. The artificial rank causes it to be
421 added last, providing the desired independence. */
423 if (TREE_CODE (e
) == SSA_NAME
)
430 if (SSA_NAME_IS_DEFAULT_DEF (e
))
431 return find_operand_rank (e
);
433 stmt
= SSA_NAME_DEF_STMT (e
);
434 if (gimple_code (stmt
) == GIMPLE_PHI
)
435 return phi_rank (stmt
);
437 if (!is_gimple_assign (stmt
))
438 return bb_rank
[gimple_bb (stmt
)->index
];
440 /* If we already have a rank for this expression, use that. */
441 rank
= find_operand_rank (e
);
445 /* Otherwise, find the maximum rank for the operands. As an
446 exception, remove the bias from loop-carried phis when propagating
447 the rank so that dependent operations are not also biased. */
448 /* Simply walk over all SSA uses - this takes advatage of the
449 fact that non-SSA operands are is_gimple_min_invariant and
452 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
453 rank
= propagate_rank (rank
, op
);
455 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
457 fprintf (dump_file
, "Rank for ");
458 print_generic_expr (dump_file
, e
, 0);
459 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
462 /* Note the rank in the hashtable so we don't recompute it. */
463 insert_operand_rank (e
, (rank
+ 1));
467 /* Constants, globals, etc., are rank 0 */
472 /* We want integer ones to end up last no matter what, since they are
473 the ones we can do the most with. */
474 #define INTEGER_CONST_TYPE 1 << 3
475 #define FLOAT_CONST_TYPE 1 << 2
476 #define OTHER_CONST_TYPE 1 << 1
478 /* Classify an invariant tree into integer, float, or other, so that
479 we can sort them to be near other constants of the same type. */
481 constant_type (tree t
)
483 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
484 return INTEGER_CONST_TYPE
;
485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
486 return FLOAT_CONST_TYPE
;
488 return OTHER_CONST_TYPE
;
491 /* qsort comparison function to sort operand entries PA and PB by rank
492 so that the sorted array is ordered by rank in decreasing order. */
494 sort_by_operand_rank (const void *pa
, const void *pb
)
496 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
497 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
499 /* It's nicer for optimize_expression if constants that are likely
500 to fold when added/multiplied//whatever are put next to each
501 other. Since all constants have rank 0, order them by type. */
502 if (oeb
->rank
== 0 && oea
->rank
== 0)
504 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
505 return constant_type (oeb
->op
) - constant_type (oea
->op
);
507 /* To make sorting result stable, we use unique IDs to determine
509 return oeb
->id
- oea
->id
;
512 /* Lastly, make sure the versions that are the same go next to each
514 if ((oeb
->rank
- oea
->rank
== 0)
515 && TREE_CODE (oea
->op
) == SSA_NAME
516 && TREE_CODE (oeb
->op
) == SSA_NAME
)
518 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
519 versions of removed SSA_NAMEs, so if possible, prefer to sort
520 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
522 if (!SSA_NAME_IS_DEFAULT_DEF (oea
->op
)
523 && !SSA_NAME_IS_DEFAULT_DEF (oeb
->op
)
524 && SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
526 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
527 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
528 basic_block bba
= gimple_bb (stmta
);
529 basic_block bbb
= gimple_bb (stmtb
);
532 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
533 return bb_rank
[bbb
->index
] - bb_rank
[bba
->index
];
537 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
538 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
544 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
545 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
547 return oeb
->id
- oea
->id
;
550 if (oeb
->rank
!= oea
->rank
)
551 return oeb
->rank
- oea
->rank
;
553 return oeb
->id
- oea
->id
;
556 /* Add an operand entry to *OPS for the tree operand OP. */
559 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
561 operand_entry_t oe
= operand_entry_pool
.allocate ();
564 oe
->rank
= get_rank (op
);
565 oe
->id
= next_operand_entry_id
++;
570 /* Add an operand entry to *OPS for the tree operand OP with repeat
574 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
575 HOST_WIDE_INT repeat
)
577 operand_entry_t oe
= operand_entry_pool
.allocate ();
580 oe
->rank
= get_rank (op
);
581 oe
->id
= next_operand_entry_id
++;
585 reassociate_stats
.pows_encountered
++;
588 /* Return true if STMT is reassociable operation containing a binary
589 operation with tree code CODE, and is inside LOOP. */
592 is_reassociable_op (gimple
*stmt
, enum tree_code code
, struct loop
*loop
)
594 basic_block bb
= gimple_bb (stmt
);
596 if (gimple_bb (stmt
) == NULL
)
599 if (!flow_bb_inside_loop_p (loop
, bb
))
602 if (is_gimple_assign (stmt
)
603 && gimple_assign_rhs_code (stmt
) == code
604 && has_single_use (gimple_assign_lhs (stmt
)))
611 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
612 operand of the negate operation. Otherwise, return NULL. */
615 get_unary_op (tree name
, enum tree_code opcode
)
617 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
619 if (!is_gimple_assign (stmt
))
622 if (gimple_assign_rhs_code (stmt
) == opcode
)
623 return gimple_assign_rhs1 (stmt
);
627 /* If CURR and LAST are a pair of ops that OPCODE allows us to
628 eliminate through equivalences, do so, remove them from OPS, and
629 return true. Otherwise, return false. */
632 eliminate_duplicate_pair (enum tree_code opcode
,
633 vec
<operand_entry_t
> *ops
,
636 operand_entry_t curr
,
637 operand_entry_t last
)
640 /* If we have two of the same op, and the opcode is & |, min, or max,
641 we can eliminate one of them.
642 If we have two of the same op, and the opcode is ^, we can
643 eliminate both of them. */
645 if (last
&& last
->op
== curr
->op
)
653 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
655 fprintf (dump_file
, "Equivalence: ");
656 print_generic_expr (dump_file
, curr
->op
, 0);
657 fprintf (dump_file
, " [&|minmax] ");
658 print_generic_expr (dump_file
, last
->op
, 0);
659 fprintf (dump_file
, " -> ");
660 print_generic_stmt (dump_file
, last
->op
, 0);
663 ops
->ordered_remove (i
);
664 reassociate_stats
.ops_eliminated
++;
669 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
671 fprintf (dump_file
, "Equivalence: ");
672 print_generic_expr (dump_file
, curr
->op
, 0);
673 fprintf (dump_file
, " ^ ");
674 print_generic_expr (dump_file
, last
->op
, 0);
675 fprintf (dump_file
, " -> nothing\n");
678 reassociate_stats
.ops_eliminated
+= 2;
680 if (ops
->length () == 2)
683 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
688 ops
->ordered_remove (i
-1);
689 ops
->ordered_remove (i
-1);
701 static vec
<tree
> plus_negates
;
703 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
704 expression, look in OPS for a corresponding positive operation to cancel
705 it out. If we find one, remove the other from OPS, replace
706 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
710 eliminate_plus_minus_pair (enum tree_code opcode
,
711 vec
<operand_entry_t
> *ops
,
712 unsigned int currindex
,
713 operand_entry_t curr
)
720 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
723 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
724 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
725 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
728 /* Any non-negated version will have a rank that is one less than
729 the current rank. So once we hit those ranks, if we don't find
732 for (i
= currindex
+ 1;
733 ops
->iterate (i
, &oe
)
734 && oe
->rank
>= curr
->rank
- 1 ;
737 if (oe
->op
== negateop
)
740 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
742 fprintf (dump_file
, "Equivalence: ");
743 print_generic_expr (dump_file
, negateop
, 0);
744 fprintf (dump_file
, " + -");
745 print_generic_expr (dump_file
, oe
->op
, 0);
746 fprintf (dump_file
, " -> 0\n");
749 ops
->ordered_remove (i
);
750 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
751 ops
->ordered_remove (currindex
);
752 reassociate_stats
.ops_eliminated
++;
756 else if (oe
->op
== notop
)
758 tree op_type
= TREE_TYPE (oe
->op
);
760 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
762 fprintf (dump_file
, "Equivalence: ");
763 print_generic_expr (dump_file
, notop
, 0);
764 fprintf (dump_file
, " + ~");
765 print_generic_expr (dump_file
, oe
->op
, 0);
766 fprintf (dump_file
, " -> -1\n");
769 ops
->ordered_remove (i
);
770 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
771 ops
->ordered_remove (currindex
);
772 reassociate_stats
.ops_eliminated
++;
778 /* CURR->OP is a negate expr in a plus expr: save it for later
779 inspection in repropagate_negates(). */
780 if (negateop
!= NULL_TREE
)
781 plus_negates
.safe_push (curr
->op
);
786 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
787 bitwise not expression, look in OPS for a corresponding operand to
788 cancel it out. If we find one, remove the other from OPS, replace
789 OPS[CURRINDEX] with 0, and return true. Otherwise, return
793 eliminate_not_pairs (enum tree_code opcode
,
794 vec
<operand_entry_t
> *ops
,
795 unsigned int currindex
,
796 operand_entry_t curr
)
802 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
803 || TREE_CODE (curr
->op
) != SSA_NAME
)
806 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
807 if (notop
== NULL_TREE
)
810 /* Any non-not version will have a rank that is one less than
811 the current rank. So once we hit those ranks, if we don't find
814 for (i
= currindex
+ 1;
815 ops
->iterate (i
, &oe
)
816 && oe
->rank
>= curr
->rank
- 1;
821 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
823 fprintf (dump_file
, "Equivalence: ");
824 print_generic_expr (dump_file
, notop
, 0);
825 if (opcode
== BIT_AND_EXPR
)
826 fprintf (dump_file
, " & ~");
827 else if (opcode
== BIT_IOR_EXPR
)
828 fprintf (dump_file
, " | ~");
829 print_generic_expr (dump_file
, oe
->op
, 0);
830 if (opcode
== BIT_AND_EXPR
)
831 fprintf (dump_file
, " -> 0\n");
832 else if (opcode
== BIT_IOR_EXPR
)
833 fprintf (dump_file
, " -> -1\n");
836 if (opcode
== BIT_AND_EXPR
)
837 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
838 else if (opcode
== BIT_IOR_EXPR
)
839 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
841 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
843 ops
->quick_push (oe
);
851 /* Use constant value that may be present in OPS to try to eliminate
852 operands. Note that this function is only really used when we've
853 eliminated ops for other reasons, or merged constants. Across
854 single statements, fold already does all of this, plus more. There
855 is little point in duplicating logic, so I've only included the
856 identities that I could ever construct testcases to trigger. */
859 eliminate_using_constants (enum tree_code opcode
,
860 vec
<operand_entry_t
> *ops
)
862 operand_entry_t oelast
= ops
->last ();
863 tree type
= TREE_TYPE (oelast
->op
);
865 if (oelast
->rank
== 0
866 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
871 if (integer_zerop (oelast
->op
))
873 if (ops
->length () != 1)
875 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
876 fprintf (dump_file
, "Found & 0, removing all other ops\n");
878 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
881 ops
->quick_push (oelast
);
885 else if (integer_all_onesp (oelast
->op
))
887 if (ops
->length () != 1)
889 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
890 fprintf (dump_file
, "Found & -1, removing\n");
892 reassociate_stats
.ops_eliminated
++;
897 if (integer_all_onesp (oelast
->op
))
899 if (ops
->length () != 1)
901 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
902 fprintf (dump_file
, "Found | -1, removing all other ops\n");
904 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
907 ops
->quick_push (oelast
);
911 else if (integer_zerop (oelast
->op
))
913 if (ops
->length () != 1)
915 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
916 fprintf (dump_file
, "Found | 0, removing\n");
918 reassociate_stats
.ops_eliminated
++;
923 if (integer_zerop (oelast
->op
)
924 || (FLOAT_TYPE_P (type
)
925 && !HONOR_NANS (type
)
926 && !HONOR_SIGNED_ZEROS (type
)
927 && real_zerop (oelast
->op
)))
929 if (ops
->length () != 1)
931 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
932 fprintf (dump_file
, "Found * 0, removing all other ops\n");
934 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
936 ops
->quick_push (oelast
);
940 else if (integer_onep (oelast
->op
)
941 || (FLOAT_TYPE_P (type
)
942 && !HONOR_SNANS (type
)
943 && real_onep (oelast
->op
)))
945 if (ops
->length () != 1)
947 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
948 fprintf (dump_file
, "Found * 1, removing\n");
950 reassociate_stats
.ops_eliminated
++;
958 if (integer_zerop (oelast
->op
)
959 || (FLOAT_TYPE_P (type
)
960 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
961 && fold_real_zero_addition_p (type
, oelast
->op
,
962 opcode
== MINUS_EXPR
)))
964 if (ops
->length () != 1)
966 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
967 fprintf (dump_file
, "Found [|^+] 0, removing\n");
969 reassociate_stats
.ops_eliminated
++;
981 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
*,
984 /* Structure for tracking and counting operands. */
988 enum tree_code oecode
;
993 /* The heap for the oecount hashtable and the sorted list of operands. */
994 static vec
<oecount
> cvec
;
997 /* Oecount hashtable helpers. */
999 struct oecount_hasher
: int_hash
<int, 0, 1>
1001 static inline hashval_t
hash (int);
1002 static inline bool equal (int, int);
1005 /* Hash function for oecount. */
1008 oecount_hasher::hash (int p
)
1010 const oecount
*c
= &cvec
[p
- 42];
1011 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1014 /* Comparison function for oecount. */
1017 oecount_hasher::equal (int p1
, int p2
)
1019 const oecount
*c1
= &cvec
[p1
- 42];
1020 const oecount
*c2
= &cvec
[p2
- 42];
1021 return (c1
->oecode
== c2
->oecode
1022 && c1
->op
== c2
->op
);
1025 /* Comparison function for qsort sorting oecount elements by count. */
1028 oecount_cmp (const void *p1
, const void *p2
)
1030 const oecount
*c1
= (const oecount
*)p1
;
1031 const oecount
*c2
= (const oecount
*)p2
;
1032 if (c1
->cnt
!= c2
->cnt
)
1033 return c1
->cnt
- c2
->cnt
;
1035 /* If counts are identical, use unique IDs to stabilize qsort. */
1036 return c1
->id
- c2
->id
;
1039 /* Return TRUE iff STMT represents a builtin call that raises OP
1040 to some exponent. */
1043 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1047 if (!is_gimple_call (stmt
))
1050 fndecl
= gimple_call_fndecl (stmt
);
1053 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1056 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1058 CASE_FLT_FN (BUILT_IN_POW
):
1059 CASE_FLT_FN (BUILT_IN_POWI
):
1060 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1067 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1068 in place and return the result. Assumes that stmt_is_power_of_op
1069 was previously called for STMT and returned TRUE. */
1071 static HOST_WIDE_INT
1072 decrement_power (gimple
*stmt
)
1074 REAL_VALUE_TYPE c
, cint
;
1075 HOST_WIDE_INT power
;
1078 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1080 CASE_FLT_FN (BUILT_IN_POW
):
1081 arg1
= gimple_call_arg (stmt
, 1);
1082 c
= TREE_REAL_CST (arg1
);
1083 power
= real_to_integer (&c
) - 1;
1084 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1085 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1088 CASE_FLT_FN (BUILT_IN_POWI
):
1089 arg1
= gimple_call_arg (stmt
, 1);
1090 power
= TREE_INT_CST_LOW (arg1
) - 1;
1091 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1099 /* Find the single immediate use of STMT's LHS, and replace it
1100 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1101 replace *DEF with OP as well. */
1104 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1109 gimple_stmt_iterator gsi
;
1111 if (is_gimple_call (stmt
))
1112 lhs
= gimple_call_lhs (stmt
);
1114 lhs
= gimple_assign_lhs (stmt
);
1116 gcc_assert (has_single_use (lhs
));
1117 single_imm_use (lhs
, &use
, &use_stmt
);
1121 if (TREE_CODE (op
) != SSA_NAME
)
1122 update_stmt (use_stmt
);
1123 gsi
= gsi_for_stmt (stmt
);
1124 unlink_stmt_vdef (stmt
);
1125 reassoc_remove_stmt (&gsi
);
1126 release_defs (stmt
);
1129 /* Walks the linear chain with result *DEF searching for an operation
1130 with operand OP and code OPCODE removing that from the chain. *DEF
1131 is updated if there is only one operand but no operation left. */
1134 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1136 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1142 if (opcode
== MULT_EXPR
1143 && stmt_is_power_of_op (stmt
, op
))
1145 if (decrement_power (stmt
) == 1)
1146 propagate_op_to_single_use (op
, stmt
, def
);
1150 name
= gimple_assign_rhs1 (stmt
);
1152 /* If this is the operation we look for and one of the operands
1153 is ours simply propagate the other operand into the stmts
1155 if (gimple_assign_rhs_code (stmt
) == opcode
1157 || gimple_assign_rhs2 (stmt
) == op
))
1160 name
= gimple_assign_rhs2 (stmt
);
1161 propagate_op_to_single_use (name
, stmt
, def
);
1165 /* We might have a multiply of two __builtin_pow* calls, and
1166 the operand might be hiding in the rightmost one. */
1167 if (opcode
== MULT_EXPR
1168 && gimple_assign_rhs_code (stmt
) == opcode
1169 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1170 && has_single_use (gimple_assign_rhs2 (stmt
)))
1172 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1173 if (stmt_is_power_of_op (stmt2
, op
))
1175 if (decrement_power (stmt2
) == 1)
1176 propagate_op_to_single_use (op
, stmt2
, def
);
1181 /* Continue walking the chain. */
1182 gcc_assert (name
!= op
1183 && TREE_CODE (name
) == SSA_NAME
);
1184 stmt
= SSA_NAME_DEF_STMT (name
);
1189 /* Returns true if statement S1 dominates statement S2. Like
1190 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1193 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1195 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1197 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1198 SSA_NAME. Assume it lives at the beginning of function and
1199 thus dominates everything. */
1200 if (!bb1
|| s1
== s2
)
1203 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1209 /* PHIs in the same basic block are assumed to be
1210 executed all in parallel, if only one stmt is a PHI,
1211 it dominates the other stmt in the same basic block. */
1212 if (gimple_code (s1
) == GIMPLE_PHI
)
1215 if (gimple_code (s2
) == GIMPLE_PHI
)
1218 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1220 if (gimple_uid (s1
) < gimple_uid (s2
))
1223 if (gimple_uid (s1
) > gimple_uid (s2
))
1226 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1227 unsigned int uid
= gimple_uid (s1
);
1228 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1230 gimple
*s
= gsi_stmt (gsi
);
1231 if (gimple_uid (s
) != uid
)
1240 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1243 /* Insert STMT after INSERT_POINT. */
1246 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1248 gimple_stmt_iterator gsi
;
1251 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1252 bb
= gimple_bb (insert_point
);
1253 else if (!stmt_ends_bb_p (insert_point
))
1255 gsi
= gsi_for_stmt (insert_point
);
1256 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1257 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1261 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1262 thus if it must end a basic block, it should be a call that can
1263 throw, or some assignment that can throw. If it throws, the LHS
1264 of it will not be initialized though, so only valid places using
1265 the SSA_NAME should be dominated by the fallthru edge. */
1266 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1267 gsi
= gsi_after_labels (bb
);
1268 if (gsi_end_p (gsi
))
1270 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1271 gimple_set_uid (stmt
,
1272 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1275 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1276 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1279 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1280 the result. Places the statement after the definition of either
1281 OP1 or OP2. Returns the new statement. */
1284 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1286 gimple
*op1def
= NULL
, *op2def
= NULL
;
1287 gimple_stmt_iterator gsi
;
1291 /* Create the addition statement. */
1292 op
= make_ssa_name (type
);
1293 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1295 /* Find an insertion place and insert. */
1296 if (TREE_CODE (op1
) == SSA_NAME
)
1297 op1def
= SSA_NAME_DEF_STMT (op1
);
1298 if (TREE_CODE (op2
) == SSA_NAME
)
1299 op2def
= SSA_NAME_DEF_STMT (op2
);
1300 if ((!op1def
|| gimple_nop_p (op1def
))
1301 && (!op2def
|| gimple_nop_p (op2def
)))
1303 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1304 if (gsi_end_p (gsi
))
1306 gimple_stmt_iterator gsi2
1307 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1308 gimple_set_uid (sum
,
1309 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1312 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1313 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1317 gimple
*insert_point
;
1318 if ((!op1def
|| gimple_nop_p (op1def
))
1319 || (op2def
&& !gimple_nop_p (op2def
)
1320 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1321 insert_point
= op2def
;
1323 insert_point
= op1def
;
1324 insert_stmt_after (sum
, insert_point
);
1331 /* Perform un-distribution of divisions and multiplications.
1332 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1333 to (A + B) / X for real X.
1335 The algorithm is organized as follows.
1337 - First we walk the addition chain *OPS looking for summands that
1338 are defined by a multiplication or a real division. This results
1339 in the candidates bitmap with relevant indices into *OPS.
1341 - Second we build the chains of multiplications or divisions for
1342 these candidates, counting the number of occurrences of (operand, code)
1343 pairs in all of the candidates chains.
1345 - Third we sort the (operand, code) pairs by number of occurrence and
1346 process them starting with the pair with the most uses.
1348 * For each such pair we walk the candidates again to build a
1349 second candidate bitmap noting all multiplication/division chains
1350 that have at least one occurrence of (operand, code).
1352 * We build an alternate addition chain only covering these
1353 candidates with one (operand, code) operation removed from their
1354 multiplication/division chain.
1356 * The first candidate gets replaced by the alternate addition chain
1357 multiplied/divided by the operand.
1359 * All candidate chains get disabled for further processing and
1360 processing of (operand, code) pairs continues.
1362 The alternate addition chains built are re-processed by the main
1363 reassociation algorithm which allows optimizing a * x * y + b * y * x
1364 to (a + b ) * x * y in one invocation of the reassociation pass. */
1367 undistribute_ops_list (enum tree_code opcode
,
1368 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1370 unsigned int length
= ops
->length ();
1371 operand_entry_t oe1
;
1373 sbitmap candidates
, candidates2
;
1374 unsigned nr_candidates
, nr_candidates2
;
1375 sbitmap_iterator sbi0
;
1376 vec
<operand_entry_t
> *subops
;
1377 bool changed
= false;
1378 int next_oecount_id
= 0;
1381 || opcode
!= PLUS_EXPR
)
1384 /* Build a list of candidates to process. */
1385 candidates
= sbitmap_alloc (length
);
1386 bitmap_clear (candidates
);
1388 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1390 enum tree_code dcode
;
1393 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1395 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1396 if (!is_gimple_assign (oe1def
))
1398 dcode
= gimple_assign_rhs_code (oe1def
);
1399 if ((dcode
!= MULT_EXPR
1400 && dcode
!= RDIV_EXPR
)
1401 || !is_reassociable_op (oe1def
, dcode
, loop
))
1404 bitmap_set_bit (candidates
, i
);
1408 if (nr_candidates
< 2)
1410 sbitmap_free (candidates
);
1414 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1416 fprintf (dump_file
, "searching for un-distribute opportunities ");
1417 print_generic_expr (dump_file
,
1418 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1419 fprintf (dump_file
, " %d\n", nr_candidates
);
1422 /* Build linearized sub-operand lists and the counting table. */
1425 hash_table
<oecount_hasher
> ctable (15);
1427 /* ??? Macro arguments cannot have multi-argument template types in
1428 them. This typedef is needed to workaround that limitation. */
1429 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1430 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1431 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1434 enum tree_code oecode
;
1437 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1438 oecode
= gimple_assign_rhs_code (oedef
);
1439 linearize_expr_tree (&subops
[i
], oedef
,
1440 associative_tree_code (oecode
), false);
1442 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1449 c
.id
= next_oecount_id
++;
1452 idx
= cvec
.length () + 41;
1453 slot
= ctable
.find_slot (idx
, INSERT
);
1461 cvec
[*slot
- 42].cnt
++;
1466 /* Sort the counting table. */
1467 cvec
.qsort (oecount_cmp
);
1469 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1472 fprintf (dump_file
, "Candidates:\n");
1473 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1475 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1476 c
->oecode
== MULT_EXPR
1477 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1478 print_generic_expr (dump_file
, c
->op
, 0);
1479 fprintf (dump_file
, "\n");
1483 /* Process the (operand, code) pairs in order of most occurrence. */
1484 candidates2
= sbitmap_alloc (length
);
1485 while (!cvec
.is_empty ())
1487 oecount
*c
= &cvec
.last ();
1491 /* Now collect the operands in the outer chain that contain
1492 the common operand in their inner chain. */
1493 bitmap_clear (candidates2
);
1495 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1498 enum tree_code oecode
;
1500 tree op
= (*ops
)[i
]->op
;
1502 /* If we undistributed in this chain already this may be
1504 if (TREE_CODE (op
) != SSA_NAME
)
1507 oedef
= SSA_NAME_DEF_STMT (op
);
1508 oecode
= gimple_assign_rhs_code (oedef
);
1509 if (oecode
!= c
->oecode
)
1512 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1514 if (oe1
->op
== c
->op
)
1516 bitmap_set_bit (candidates2
, i
);
1523 if (nr_candidates2
>= 2)
1525 operand_entry_t oe1
, oe2
;
1527 int first
= bitmap_first_set_bit (candidates2
);
1529 /* Build the new addition chain. */
1530 oe1
= (*ops
)[first
];
1531 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1533 fprintf (dump_file
, "Building (");
1534 print_generic_expr (dump_file
, oe1
->op
, 0);
1536 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1537 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1541 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1543 fprintf (dump_file
, " + ");
1544 print_generic_expr (dump_file
, oe2
->op
, 0);
1546 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1547 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1548 oe1
->op
, oe2
->op
, opcode
);
1549 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1551 oe1
->op
= gimple_get_lhs (sum
);
1554 /* Apply the multiplication/division. */
1555 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1556 oe1
->op
, c
->op
, c
->oecode
);
1557 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1559 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1560 print_generic_expr (dump_file
, c
->op
, 0);
1561 fprintf (dump_file
, "\n");
1564 /* Record it in the addition chain and disable further
1565 undistribution with this op. */
1566 oe1
->op
= gimple_assign_lhs (prod
);
1567 oe1
->rank
= get_rank (oe1
->op
);
1568 subops
[first
].release ();
1576 for (i
= 0; i
< ops
->length (); ++i
)
1577 subops
[i
].release ();
1580 sbitmap_free (candidates
);
1581 sbitmap_free (candidates2
);
1586 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1587 expression, examine the other OPS to see if any of them are comparisons
1588 of the same values, which we may be able to combine or eliminate.
1589 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1592 eliminate_redundant_comparison (enum tree_code opcode
,
1593 vec
<operand_entry_t
> *ops
,
1594 unsigned int currindex
,
1595 operand_entry_t curr
)
1598 enum tree_code lcode
, rcode
;
1599 gimple
*def1
, *def2
;
1603 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1606 /* Check that CURR is a comparison. */
1607 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1609 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1610 if (!is_gimple_assign (def1
))
1612 lcode
= gimple_assign_rhs_code (def1
);
1613 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1615 op1
= gimple_assign_rhs1 (def1
);
1616 op2
= gimple_assign_rhs2 (def1
);
1618 /* Now look for a similar comparison in the remaining OPS. */
1619 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1623 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1625 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1626 if (!is_gimple_assign (def2
))
1628 rcode
= gimple_assign_rhs_code (def2
);
1629 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1632 /* If we got here, we have a match. See if we can combine the
1634 if (opcode
== BIT_IOR_EXPR
)
1635 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1636 rcode
, gimple_assign_rhs1 (def2
),
1637 gimple_assign_rhs2 (def2
));
1639 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1640 rcode
, gimple_assign_rhs1 (def2
),
1641 gimple_assign_rhs2 (def2
));
1645 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1646 always give us a boolean_type_node value back. If the original
1647 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1648 we need to convert. */
1649 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1650 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1652 if (TREE_CODE (t
) != INTEGER_CST
1653 && !operand_equal_p (t
, curr
->op
, 0))
1655 enum tree_code subcode
;
1656 tree newop1
, newop2
;
1657 if (!COMPARISON_CLASS_P (t
))
1659 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1660 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1661 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1662 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1666 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1668 fprintf (dump_file
, "Equivalence: ");
1669 print_generic_expr (dump_file
, curr
->op
, 0);
1670 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1671 print_generic_expr (dump_file
, oe
->op
, 0);
1672 fprintf (dump_file
, " -> ");
1673 print_generic_expr (dump_file
, t
, 0);
1674 fprintf (dump_file
, "\n");
1677 /* Now we can delete oe, as it has been subsumed by the new combined
1679 ops
->ordered_remove (i
);
1680 reassociate_stats
.ops_eliminated
++;
1682 /* If t is the same as curr->op, we're done. Otherwise we must
1683 replace curr->op with t. Special case is if we got a constant
1684 back, in which case we add it to the end instead of in place of
1685 the current entry. */
1686 if (TREE_CODE (t
) == INTEGER_CST
)
1688 ops
->ordered_remove (currindex
);
1689 add_to_ops_vec (ops
, t
);
1691 else if (!operand_equal_p (t
, curr
->op
, 0))
1694 enum tree_code subcode
;
1697 gcc_assert (COMPARISON_CLASS_P (t
));
1698 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1699 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1700 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1701 gcc_checking_assert (is_gimple_val (newop1
)
1702 && is_gimple_val (newop2
));
1703 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1704 curr
->op
= gimple_get_lhs (sum
);
1712 /* Perform various identities and other optimizations on the list of
1713 operand entries, stored in OPS. The tree code for the binary
1714 operation between all the operands is OPCODE. */
1717 optimize_ops_list (enum tree_code opcode
,
1718 vec
<operand_entry_t
> *ops
)
1720 unsigned int length
= ops
->length ();
1723 operand_entry_t oelast
= NULL
;
1724 bool iterate
= false;
1729 oelast
= ops
->last ();
1731 /* If the last two are constants, pop the constants off, merge them
1732 and try the next two. */
1733 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1735 operand_entry_t oelm1
= (*ops
)[length
- 2];
1737 if (oelm1
->rank
== 0
1738 && is_gimple_min_invariant (oelm1
->op
)
1739 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1740 TREE_TYPE (oelast
->op
)))
1742 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1743 oelm1
->op
, oelast
->op
);
1745 if (folded
&& is_gimple_min_invariant (folded
))
1747 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1748 fprintf (dump_file
, "Merging constants\n");
1753 add_to_ops_vec (ops
, folded
);
1754 reassociate_stats
.constants_eliminated
++;
1756 optimize_ops_list (opcode
, ops
);
1762 eliminate_using_constants (opcode
, ops
);
1765 for (i
= 0; ops
->iterate (i
, &oe
);)
1769 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1771 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1772 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1773 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1785 length
= ops
->length ();
1786 oelast
= ops
->last ();
1789 optimize_ops_list (opcode
, ops
);
1792 /* The following functions are subroutines to optimize_range_tests and allow
1793 it to try to change a logical combination of comparisons into a range
1797 X == 2 || X == 5 || X == 3 || X == 4
1801 (unsigned) (X - 2) <= 3
1803 For more information see comments above fold_test_range in fold-const.c,
1804 this implementation is for GIMPLE. */
1812 bool strict_overflow_p
;
1813 unsigned int idx
, next
;
1816 /* This is similar to make_range in fold-const.c, but on top of
1817 GIMPLE instead of trees. If EXP is non-NULL, it should be
1818 an SSA_NAME and STMT argument is ignored, otherwise STMT
1819 argument should be a GIMPLE_COND. */
1822 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
1826 bool is_bool
, strict_overflow_p
;
1830 r
->strict_overflow_p
= false;
1832 r
->high
= NULL_TREE
;
1833 if (exp
!= NULL_TREE
1834 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1837 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1838 and see if we can refine the range. Some of the cases below may not
1839 happen, but it doesn't seem worth worrying about this. We "continue"
1840 the outer loop when we've changed something; otherwise we "break"
1841 the switch, which will "break" the while. */
1842 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1845 strict_overflow_p
= false;
1847 if (exp
== NULL_TREE
)
1849 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1851 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1856 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1861 enum tree_code code
;
1862 tree arg0
, arg1
, exp_type
;
1866 if (exp
!= NULL_TREE
)
1868 if (TREE_CODE (exp
) != SSA_NAME
1869 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
1872 stmt
= SSA_NAME_DEF_STMT (exp
);
1873 if (!is_gimple_assign (stmt
))
1876 code
= gimple_assign_rhs_code (stmt
);
1877 arg0
= gimple_assign_rhs1 (stmt
);
1878 arg1
= gimple_assign_rhs2 (stmt
);
1879 exp_type
= TREE_TYPE (exp
);
1883 code
= gimple_cond_code (stmt
);
1884 arg0
= gimple_cond_lhs (stmt
);
1885 arg1
= gimple_cond_rhs (stmt
);
1886 exp_type
= boolean_type_node
;
1889 if (TREE_CODE (arg0
) != SSA_NAME
)
1891 loc
= gimple_location (stmt
);
1895 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1896 /* Ensure the range is either +[-,0], +[0,0],
1897 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1898 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1899 or similar expression of unconditional true or
1900 false, it should not be negated. */
1901 && ((high
&& integer_zerop (high
))
1902 || (low
&& integer_onep (low
))))
1915 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1917 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1922 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1937 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1939 &strict_overflow_p
);
1940 if (nexp
!= NULL_TREE
)
1943 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
1956 r
->strict_overflow_p
= strict_overflow_p
;
1960 /* Comparison function for qsort. Sort entries
1961 without SSA_NAME exp first, then with SSA_NAMEs sorted
1962 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1963 by increasing ->low and if ->low is the same, by increasing
1964 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1968 range_entry_cmp (const void *a
, const void *b
)
1970 const struct range_entry
*p
= (const struct range_entry
*) a
;
1971 const struct range_entry
*q
= (const struct range_entry
*) b
;
1973 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
1975 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1977 /* Group range_entries for the same SSA_NAME together. */
1978 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
1980 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
1982 /* If ->low is different, NULL low goes first, then by
1984 if (p
->low
!= NULL_TREE
)
1986 if (q
->low
!= NULL_TREE
)
1988 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1990 if (tem
&& integer_onep (tem
))
1992 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
1994 if (tem
&& integer_onep (tem
))
2000 else if (q
->low
!= NULL_TREE
)
2002 /* If ->high is different, NULL high goes last, before that by
2004 if (p
->high
!= NULL_TREE
)
2006 if (q
->high
!= NULL_TREE
)
2008 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2010 if (tem
&& integer_onep (tem
))
2012 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2014 if (tem
&& integer_onep (tem
))
2020 else if (q
->high
!= NULL_TREE
)
2022 /* If both ranges are the same, sort below by ascending idx. */
2027 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2030 if (p
->idx
< q
->idx
)
2034 gcc_checking_assert (p
->idx
> q
->idx
);
2039 /* Helper routine of optimize_range_test.
2040 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2041 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2042 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2043 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2044 an array of COUNT pointers to other ranges. Return
2045 true if the range merge has been successful.
2046 If OPCODE is ERROR_MARK, this is called from within
2047 maybe_optimize_range_tests and is performing inter-bb range optimization.
2048 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2052 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2053 struct range_entry
**otherrangep
,
2054 unsigned int count
, enum tree_code opcode
,
2055 vec
<operand_entry_t
> *ops
, tree exp
, gimple_seq seq
,
2056 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2058 operand_entry_t oe
= (*ops
)[range
->idx
];
2060 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
) :
2061 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2062 location_t loc
= gimple_location (stmt
);
2063 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2064 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2066 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2067 gimple_stmt_iterator gsi
;
2070 if (tem
== NULL_TREE
)
2073 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2074 warning_at (loc
, OPT_Wstrict_overflow
,
2075 "assuming signed overflow does not occur "
2076 "when simplifying range test");
2078 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2080 struct range_entry
*r
;
2081 fprintf (dump_file
, "Optimizing range tests ");
2082 print_generic_expr (dump_file
, range
->exp
, 0);
2083 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2084 print_generic_expr (dump_file
, range
->low
, 0);
2085 fprintf (dump_file
, ", ");
2086 print_generic_expr (dump_file
, range
->high
, 0);
2087 fprintf (dump_file
, "]");
2088 for (i
= 0; i
< count
; i
++)
2094 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2095 print_generic_expr (dump_file
, r
->low
, 0);
2096 fprintf (dump_file
, ", ");
2097 print_generic_expr (dump_file
, r
->high
, 0);
2098 fprintf (dump_file
, "]");
2100 fprintf (dump_file
, "\n into ");
2101 print_generic_expr (dump_file
, tem
, 0);
2102 fprintf (dump_file
, "\n");
2105 if (opcode
== BIT_IOR_EXPR
2106 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2107 tem
= invert_truthvalue_loc (loc
, tem
);
2109 tem
= fold_convert_loc (loc
, optype
, tem
);
2110 gsi
= gsi_for_stmt (stmt
);
2111 unsigned int uid
= gimple_uid (stmt
);
2112 /* In rare cases range->exp can be equal to lhs of stmt.
2113 In that case we have to insert after the stmt rather then before
2114 it. If stmt is a PHI, insert it at the start of the basic block. */
2115 if (op
!= range
->exp
)
2117 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2118 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2122 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2124 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2125 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2126 GSI_CONTINUE_LINKING
);
2130 gsi
= gsi_after_labels (gimple_bb (stmt
));
2131 if (!gsi_end_p (gsi
))
2132 uid
= gimple_uid (gsi_stmt (gsi
));
2135 gsi
= gsi_start_bb (gimple_bb (stmt
));
2137 while (!gsi_end_p (gsi
))
2139 uid
= gimple_uid (gsi_stmt (gsi
));
2143 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2144 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2146 if (gsi_end_p (gsi
))
2147 gsi
= gsi_last_bb (gimple_bb (stmt
));
2151 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2152 if (gimple_uid (gsi_stmt (gsi
)))
2155 gimple_set_uid (gsi_stmt (gsi
), uid
);
2162 range
->strict_overflow_p
= false;
2164 for (i
= 0; i
< count
; i
++)
2167 range
= otherrange
+ i
;
2169 range
= otherrangep
[i
];
2170 oe
= (*ops
)[range
->idx
];
2171 /* Now change all the other range test immediate uses, so that
2172 those tests will be optimized away. */
2173 if (opcode
== ERROR_MARK
)
2176 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2177 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2179 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2180 ? boolean_false_node
: boolean_true_node
);
2183 oe
->op
= error_mark_node
;
2184 range
->exp
= NULL_TREE
;
2189 /* Optimize X == CST1 || X == CST2
2190 if popcount (CST1 ^ CST2) == 1 into
2191 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2192 Similarly for ranges. E.g.
2193 X != 2 && X != 3 && X != 10 && X != 11
2194 will be transformed by the previous optimization into
2195 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2196 and this loop can transform that into
2197 !(((X & ~8) - 2U) <= 1U). */
2200 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2201 tree lowi
, tree lowj
, tree highi
, tree highj
,
2202 vec
<operand_entry_t
> *ops
,
2203 struct range_entry
*rangei
,
2204 struct range_entry
*rangej
)
2206 tree lowxor
, highxor
, tem
, exp
;
2207 /* Check lowi ^ lowj == highi ^ highj and
2208 popcount (lowi ^ lowj) == 1. */
2209 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2210 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2212 if (!integer_pow2p (lowxor
))
2214 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2215 if (!tree_int_cst_equal (lowxor
, highxor
))
2218 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2219 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2220 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2221 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2222 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2223 NULL
, rangei
->in_p
, lowj
, highj
,
2224 rangei
->strict_overflow_p
2225 || rangej
->strict_overflow_p
))
2230 /* Optimize X == CST1 || X == CST2
2231 if popcount (CST2 - CST1) == 1 into
2232 ((X - CST1) & ~(CST2 - CST1)) == 0.
2233 Similarly for ranges. E.g.
2234 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2235 || X == 75 || X == 45
2236 will be transformed by the previous optimization into
2237 (X - 43U) <= 3U || (X - 75U) <= 3U
2238 and this loop can transform that into
2239 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2241 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2242 tree lowi
, tree lowj
, tree highi
, tree highj
,
2243 vec
<operand_entry_t
> *ops
,
2244 struct range_entry
*rangei
,
2245 struct range_entry
*rangej
)
2247 tree tem1
, tem2
, mask
;
2248 /* Check highi - lowi == highj - lowj. */
2249 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2250 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2252 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2253 if (!tree_int_cst_equal (tem1
, tem2
))
2255 /* Check popcount (lowj - lowi) == 1. */
2256 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2257 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2259 if (!integer_pow2p (tem1
))
2262 type
= unsigned_type_for (type
);
2263 tem1
= fold_convert (type
, tem1
);
2264 tem2
= fold_convert (type
, tem2
);
2265 lowi
= fold_convert (type
, lowi
);
2266 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2267 tem1
= fold_binary (MINUS_EXPR
, type
,
2268 fold_convert (type
, rangei
->exp
), lowi
);
2269 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2270 lowj
= build_int_cst (type
, 0);
2271 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2272 NULL
, rangei
->in_p
, lowj
, tem2
,
2273 rangei
->strict_overflow_p
2274 || rangej
->strict_overflow_p
))
2279 /* It does some common checks for function optimize_range_tests_xor and
2280 optimize_range_tests_diff.
2281 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2282 Else it calls optimize_range_tests_diff. */
2285 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2286 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2287 struct range_entry
*ranges
)
2290 bool any_changes
= false;
2291 for (i
= first
; i
< length
; i
++)
2293 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2295 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2297 type
= TREE_TYPE (ranges
[i
].exp
);
2298 if (!INTEGRAL_TYPE_P (type
))
2300 lowi
= ranges
[i
].low
;
2301 if (lowi
== NULL_TREE
)
2302 lowi
= TYPE_MIN_VALUE (type
);
2303 highi
= ranges
[i
].high
;
2304 if (highi
== NULL_TREE
)
2306 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2309 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2311 lowj
= ranges
[j
].low
;
2312 if (lowj
== NULL_TREE
)
2314 highj
= ranges
[j
].high
;
2315 if (highj
== NULL_TREE
)
2316 highj
= TYPE_MAX_VALUE (type
);
2317 /* Check lowj > highi. */
2318 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2320 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2323 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2325 ranges
+ i
, ranges
+ j
);
2327 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2329 ranges
+ i
, ranges
+ j
);
2340 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2341 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2342 EXP on success, NULL otherwise. */
2345 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2346 wide_int
*mask
, tree
*totallowp
)
2348 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2349 if (tem
== NULL_TREE
2350 || TREE_CODE (tem
) != INTEGER_CST
2351 || TREE_OVERFLOW (tem
)
2352 || tree_int_cst_sgn (tem
) == -1
2353 || compare_tree_int (tem
, prec
) != -1)
2356 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2357 *mask
= wi::shifted_mask (0, max
, false, prec
);
2358 if (TREE_CODE (exp
) == BIT_AND_EXPR
2359 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2361 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2362 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2363 if (wi::popcount (msk
) == 1
2364 && wi::ltu_p (msk
, prec
- max
))
2366 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2367 max
+= msk
.to_uhwi ();
2368 exp
= TREE_OPERAND (exp
, 0);
2369 if (integer_zerop (low
)
2370 && TREE_CODE (exp
) == PLUS_EXPR
2371 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2373 tree ret
= TREE_OPERAND (exp
, 0);
2376 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2377 TYPE_PRECISION (TREE_TYPE (low
))));
2378 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
2384 else if (!tree_int_cst_lt (totallow
, tbias
))
2386 bias
= wi::to_widest (tbias
);
2387 bias
-= wi::to_widest (totallow
);
2388 if (wi::ges_p (bias
, 0) && wi::lts_p (bias
, prec
- max
))
2390 *mask
= wi::lshift (*mask
, bias
);
2398 if (!tree_int_cst_lt (totallow
, low
))
2400 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2401 if (tem
== NULL_TREE
2402 || TREE_CODE (tem
) != INTEGER_CST
2403 || TREE_OVERFLOW (tem
)
2404 || compare_tree_int (tem
, prec
- max
) == 1)
2407 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2411 /* Attempt to optimize small range tests using bit test.
2413 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2414 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2415 has been by earlier optimizations optimized into:
2416 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2417 As all the 43 through 82 range is less than 64 numbers,
2418 for 64-bit word targets optimize that into:
2419 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2422 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2423 vec
<operand_entry_t
> *ops
,
2424 struct range_entry
*ranges
)
2427 bool any_changes
= false;
2428 int prec
= GET_MODE_BITSIZE (word_mode
);
2429 auto_vec
<struct range_entry
*, 64> candidates
;
2431 for (i
= first
; i
< length
- 2; i
++)
2433 tree lowi
, highi
, lowj
, highj
, type
;
2435 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2437 type
= TREE_TYPE (ranges
[i
].exp
);
2438 if (!INTEGRAL_TYPE_P (type
))
2440 lowi
= ranges
[i
].low
;
2441 if (lowi
== NULL_TREE
)
2442 lowi
= TYPE_MIN_VALUE (type
);
2443 highi
= ranges
[i
].high
;
2444 if (highi
== NULL_TREE
)
2447 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2448 highi
, &mask
, &lowi
);
2449 if (exp
== NULL_TREE
)
2451 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2452 candidates
.truncate (0);
2453 int end
= MIN (i
+ 64, length
);
2454 for (j
= i
+ 1; j
< end
; j
++)
2457 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2459 if (ranges
[j
].exp
== exp
)
2461 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2463 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2466 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2468 exp2
= TREE_OPERAND (exp2
, 0);
2478 lowj
= ranges
[j
].low
;
2479 if (lowj
== NULL_TREE
)
2481 highj
= ranges
[j
].high
;
2482 if (highj
== NULL_TREE
)
2483 highj
= TYPE_MAX_VALUE (type
);
2485 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2486 highj
, &mask2
, NULL
);
2490 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2491 candidates
.safe_push (&ranges
[j
]);
2494 /* If we need otherwise 3 or more comparisons, use a bit test. */
2495 if (candidates
.length () >= 2)
2497 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2498 wi::to_widest (lowi
)
2499 + prec
- 1 - wi::clz (mask
));
2500 operand_entry_t oe
= (*ops
)[ranges
[i
].idx
];
2502 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2503 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2504 location_t loc
= gimple_location (stmt
);
2505 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2507 /* See if it isn't cheaper to pretend the minimum value of the
2508 range is 0, if maximum value is small enough.
2509 We can avoid then subtraction of the minimum value, but the
2510 mask constant could be perhaps more expensive. */
2511 if (compare_tree_int (lowi
, 0) > 0
2512 && compare_tree_int (high
, prec
) < 0)
2515 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2516 rtx reg
= gen_raw_REG (word_mode
, 10000);
2517 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2518 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2519 GEN_INT (-m
)), speed_p
);
2520 rtx r
= immed_wide_int_const (mask
, word_mode
);
2521 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2522 word_mode
, speed_p
);
2523 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2524 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2525 word_mode
, speed_p
);
2528 mask
= wi::lshift (mask
, m
);
2529 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2533 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2535 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2537 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2538 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2539 fold_convert_loc (loc
, etype
, exp
),
2540 fold_convert_loc (loc
, etype
, lowi
));
2541 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2542 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2543 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2544 build_int_cst (word_type
, 1), exp
);
2545 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2546 wide_int_to_tree (word_type
, mask
));
2547 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2548 build_zero_cst (word_type
));
2549 if (is_gimple_val (exp
))
2552 /* The shift might have undefined behavior if TEM is true,
2553 but reassociate_bb isn't prepared to have basic blocks
2554 split when it is running. So, temporarily emit a code
2555 with BIT_IOR_EXPR instead of &&, and fix it up in
2558 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2559 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2560 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2562 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2563 gimple_seq_add_seq_without_update (&seq
, seq2
);
2564 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2565 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2566 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
2567 BIT_IOR_EXPR
, tem
, exp
);
2568 gimple_set_location (g
, loc
);
2569 gimple_seq_add_stmt_without_update (&seq
, g
);
2570 exp
= gimple_assign_lhs (g
);
2571 tree val
= build_zero_cst (optype
);
2572 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2573 candidates
.length (), opcode
, ops
, exp
,
2574 seq
, false, val
, val
, strict_overflow_p
))
2577 reassoc_branch_fixups
.safe_push (tem
);
2580 gimple_seq_discard (seq
);
2586 /* Optimize range tests, similarly how fold_range_test optimizes
2587 it on trees. The tree code for the binary
2588 operation between all the operands is OPCODE.
2589 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2590 maybe_optimize_range_tests for inter-bb range optimization.
2591 In that case if oe->op is NULL, oe->id is bb->index whose
2592 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2593 the actual opcode. */
2596 optimize_range_tests (enum tree_code opcode
,
2597 vec
<operand_entry_t
> *ops
)
2599 unsigned int length
= ops
->length (), i
, j
, first
;
2601 struct range_entry
*ranges
;
2602 bool any_changes
= false;
2607 ranges
= XNEWVEC (struct range_entry
, length
);
2608 for (i
= 0; i
< length
; i
++)
2612 init_range_entry (ranges
+ i
, oe
->op
,
2614 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2615 /* For | invert it now, we will invert it again before emitting
2616 the optimized expression. */
2617 if (opcode
== BIT_IOR_EXPR
2618 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2619 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2622 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2623 for (i
= 0; i
< length
; i
++)
2624 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2627 /* Try to merge ranges. */
2628 for (first
= i
; i
< length
; i
++)
2630 tree low
= ranges
[i
].low
;
2631 tree high
= ranges
[i
].high
;
2632 int in_p
= ranges
[i
].in_p
;
2633 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2634 int update_fail_count
= 0;
2636 for (j
= i
+ 1; j
< length
; j
++)
2638 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2640 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2641 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2643 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2649 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
2650 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
2651 low
, high
, strict_overflow_p
))
2656 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2657 while update_range_test would fail. */
2658 else if (update_fail_count
== 64)
2661 ++update_fail_count
;
2664 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2667 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2668 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2670 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
2671 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
2674 if (any_changes
&& opcode
!= ERROR_MARK
)
2677 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2679 if (oe
->op
== error_mark_node
)
2688 XDELETEVEC (ranges
);
2692 /* Return true if STMT is a cast like:
2698 # _345 = PHI <_123(N), 1(...), 1(...)>
2699 where _234 has bool type, _123 has single use and
2700 bb N has a single successor M. This is commonly used in
2701 the last block of a range test. */
2704 final_range_test_p (gimple
*stmt
)
2706 basic_block bb
, rhs_bb
;
2709 use_operand_p use_p
;
2712 if (!gimple_assign_cast_p (stmt
))
2714 bb
= gimple_bb (stmt
);
2715 if (!single_succ_p (bb
))
2717 e
= single_succ_edge (bb
);
2718 if (e
->flags
& EDGE_COMPLEX
)
2721 lhs
= gimple_assign_lhs (stmt
);
2722 rhs
= gimple_assign_rhs1 (stmt
);
2723 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2724 || TREE_CODE (rhs
) != SSA_NAME
2725 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2728 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2729 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2732 if (gimple_code (use_stmt
) != GIMPLE_PHI
2733 || gimple_bb (use_stmt
) != e
->dest
)
2736 /* And that the rhs is defined in the same loop. */
2737 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2739 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2745 /* Return true if BB is suitable basic block for inter-bb range test
2746 optimization. If BACKWARD is true, BB should be the only predecessor
2747 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2748 or compared with to find a common basic block to which all conditions
2749 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2750 be the only predecessor of BB. */
2753 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2756 edge_iterator ei
, ei2
;
2760 bool other_edge_seen
= false;
2765 /* Check last stmt first. */
2766 stmt
= last_stmt (bb
);
2768 || (gimple_code (stmt
) != GIMPLE_COND
2769 && (backward
|| !final_range_test_p (stmt
)))
2770 || gimple_visited_p (stmt
)
2771 || stmt_could_throw_p (stmt
)
2774 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2777 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2778 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2779 to *OTHER_BB (if not set yet, try to find it out). */
2780 if (EDGE_COUNT (bb
->succs
) != 2)
2782 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2784 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2786 if (e
->dest
== test_bb
)
2795 if (*other_bb
== NULL
)
2797 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2798 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2800 else if (e
->dest
== e2
->dest
)
2801 *other_bb
= e
->dest
;
2802 if (*other_bb
== NULL
)
2805 if (e
->dest
== *other_bb
)
2806 other_edge_seen
= true;
2810 if (*other_bb
== NULL
|| !other_edge_seen
)
2813 else if (single_succ (bb
) != *other_bb
)
2816 /* Now check all PHIs of *OTHER_BB. */
2817 e
= find_edge (bb
, *other_bb
);
2818 e2
= find_edge (test_bb
, *other_bb
);
2819 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2821 gphi
*phi
= gsi
.phi ();
2822 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2823 corresponding to BB and TEST_BB predecessor must be the same. */
2824 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2825 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2827 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2828 one of the PHIs should have the lhs of the last stmt in
2829 that block as PHI arg and that PHI should have 0 or 1
2830 corresponding to it in all other range test basic blocks
2834 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2835 == gimple_assign_lhs (stmt
)
2836 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2837 || integer_onep (gimple_phi_arg_def (phi
,
2843 gimple
*test_last
= last_stmt (test_bb
);
2844 if (gimple_code (test_last
) != GIMPLE_COND
2845 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2846 == gimple_assign_lhs (test_last
)
2847 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2848 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2858 /* Return true if BB doesn't have side-effects that would disallow
2859 range test optimization, all SSA_NAMEs set in the bb are consumed
2860 in the bb and there are no PHIs. */
2863 no_side_effect_bb (basic_block bb
)
2865 gimple_stmt_iterator gsi
;
2868 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2870 last
= last_stmt (bb
);
2871 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2873 gimple
*stmt
= gsi_stmt (gsi
);
2875 imm_use_iterator imm_iter
;
2876 use_operand_p use_p
;
2878 if (is_gimple_debug (stmt
))
2880 if (gimple_has_side_effects (stmt
))
2884 if (!is_gimple_assign (stmt
))
2886 lhs
= gimple_assign_lhs (stmt
);
2887 if (TREE_CODE (lhs
) != SSA_NAME
)
2889 if (gimple_assign_rhs_could_trap_p (stmt
))
2891 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2893 gimple
*use_stmt
= USE_STMT (use_p
);
2894 if (is_gimple_debug (use_stmt
))
2896 if (gimple_bb (use_stmt
) != bb
)
2903 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2904 return true and fill in *OPS recursively. */
2907 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2910 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
2914 if (!is_reassociable_op (stmt
, code
, loop
))
2917 rhs
[0] = gimple_assign_rhs1 (stmt
);
2918 rhs
[1] = gimple_assign_rhs2 (stmt
);
2919 gimple_set_visited (stmt
, true);
2920 for (i
= 0; i
< 2; i
++)
2921 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2922 && !get_ops (rhs
[i
], code
, ops
, loop
)
2923 && has_single_use (rhs
[i
]))
2925 operand_entry_t oe
= operand_entry_pool
.allocate ();
2931 ops
->safe_push (oe
);
2936 /* Find the ops that were added by get_ops starting from VAR, see if
2937 they were changed during update_range_test and if yes, create new
2941 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
2942 unsigned int *pidx
, struct loop
*loop
)
2944 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
2948 if (!is_reassociable_op (stmt
, code
, loop
))
2951 rhs
[0] = gimple_assign_rhs1 (stmt
);
2952 rhs
[1] = gimple_assign_rhs2 (stmt
);
2955 for (i
= 0; i
< 2; i
++)
2956 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
2958 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
2959 if (rhs
[2 + i
] == NULL_TREE
)
2961 if (has_single_use (rhs
[i
]))
2962 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
2964 rhs
[2 + i
] = rhs
[i
];
2967 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
2968 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
2970 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
2971 var
= make_ssa_name (TREE_TYPE (var
));
2972 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
2974 gimple_set_uid (g
, gimple_uid (stmt
));
2975 gimple_set_visited (g
, true);
2976 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2981 /* Structure to track the initial value passed to get_ops and
2982 the range in the ops vector for each basic block. */
2984 struct inter_bb_range_test_entry
2987 unsigned int first_idx
, last_idx
;
2990 /* Inter-bb range test optimization. */
2993 maybe_optimize_range_tests (gimple
*stmt
)
2995 basic_block first_bb
= gimple_bb (stmt
);
2996 basic_block last_bb
= first_bb
;
2997 basic_block other_bb
= NULL
;
3001 auto_vec
<operand_entry_t
> ops
;
3002 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3003 bool any_changes
= false;
3005 /* Consider only basic blocks that end with GIMPLE_COND or
3006 a cast statement satisfying final_range_test_p. All
3007 but the last bb in the first_bb .. last_bb range
3008 should end with GIMPLE_COND. */
3009 if (gimple_code (stmt
) == GIMPLE_COND
)
3011 if (EDGE_COUNT (first_bb
->succs
) != 2)
3014 else if (final_range_test_p (stmt
))
3015 other_bb
= single_succ (first_bb
);
3019 if (stmt_could_throw_p (stmt
))
3022 /* As relative ordering of post-dominator sons isn't fixed,
3023 maybe_optimize_range_tests can be called first on any
3024 bb in the range we want to optimize. So, start searching
3025 backwards, if first_bb can be set to a predecessor. */
3026 while (single_pred_p (first_bb
))
3028 basic_block pred_bb
= single_pred (first_bb
);
3029 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3031 if (!no_side_effect_bb (first_bb
))
3035 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3036 Before starting forward search in last_bb successors, find
3037 out the other_bb. */
3038 if (first_bb
== last_bb
)
3041 /* As non-GIMPLE_COND last stmt always terminates the range,
3042 if forward search didn't discover anything, just give up. */
3043 if (gimple_code (stmt
) != GIMPLE_COND
)
3045 /* Look at both successors. Either it ends with a GIMPLE_COND
3046 and satisfies suitable_cond_bb, or ends with a cast and
3047 other_bb is that cast's successor. */
3048 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
3049 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
3050 || e
->dest
== first_bb
)
3052 else if (single_pred_p (e
->dest
))
3054 stmt
= last_stmt (e
->dest
);
3056 && gimple_code (stmt
) == GIMPLE_COND
3057 && EDGE_COUNT (e
->dest
->succs
) == 2)
3059 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
3065 && final_range_test_p (stmt
)
3066 && find_edge (first_bb
, single_succ (e
->dest
)))
3068 other_bb
= single_succ (e
->dest
);
3069 if (other_bb
== first_bb
)
3073 if (other_bb
== NULL
)
3076 /* Now do the forward search, moving last_bb to successor bbs
3077 that aren't other_bb. */
3078 while (EDGE_COUNT (last_bb
->succs
) == 2)
3080 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
3081 if (e
->dest
!= other_bb
)
3085 if (!single_pred_p (e
->dest
))
3087 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
3089 if (!no_side_effect_bb (e
->dest
))
3093 if (first_bb
== last_bb
)
3095 /* Here basic blocks first_bb through last_bb's predecessor
3096 end with GIMPLE_COND, all of them have one of the edges to
3097 other_bb and another to another block in the range,
3098 all blocks except first_bb don't have side-effects and
3099 last_bb ends with either GIMPLE_COND, or cast satisfying
3100 final_range_test_p. */
3101 for (bb
= last_bb
; ; bb
= single_pred (bb
))
3103 enum tree_code code
;
3105 inter_bb_range_test_entry bb_ent
;
3107 bb_ent
.op
= NULL_TREE
;
3108 bb_ent
.first_idx
= ops
.length ();
3109 bb_ent
.last_idx
= bb_ent
.first_idx
;
3110 e
= find_edge (bb
, other_bb
);
3111 stmt
= last_stmt (bb
);
3112 gimple_set_visited (stmt
, true);
3113 if (gimple_code (stmt
) != GIMPLE_COND
)
3115 use_operand_p use_p
;
3120 lhs
= gimple_assign_lhs (stmt
);
3121 rhs
= gimple_assign_rhs1 (stmt
);
3122 gcc_assert (bb
== last_bb
);
3129 # _345 = PHI <_123(N), 1(...), 1(...)>
3131 or 0 instead of 1. If it is 0, the _234
3132 range test is anded together with all the
3133 other range tests, if it is 1, it is ored with
3135 single_imm_use (lhs
, &use_p
, &phi
);
3136 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
3137 e2
= find_edge (first_bb
, other_bb
);
3139 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
3140 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
3141 code
= BIT_AND_EXPR
;
3144 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
3145 code
= BIT_IOR_EXPR
;
3148 /* If _234 SSA_NAME_DEF_STMT is
3150 (or &, corresponding to 1/0 in the phi arguments,
3151 push into ops the individual range test arguments
3152 of the bitwise or resp. and, recursively. */
3153 if (!get_ops (rhs
, code
, &ops
,
3154 loop_containing_stmt (stmt
))
3155 && has_single_use (rhs
))
3157 /* Otherwise, push the _234 range test itself. */
3158 operand_entry_t oe
= operand_entry_pool
.allocate ();
3168 bb_ent
.last_idx
= ops
.length ();
3170 bbinfo
.safe_push (bb_ent
);
3173 /* Otherwise stmt is GIMPLE_COND. */
3174 code
= gimple_cond_code (stmt
);
3175 lhs
= gimple_cond_lhs (stmt
);
3176 rhs
= gimple_cond_rhs (stmt
);
3177 if (TREE_CODE (lhs
) == SSA_NAME
3178 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3179 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
3180 || rhs
!= boolean_false_node
3181 /* Either push into ops the individual bitwise
3182 or resp. and operands, depending on which
3183 edge is other_bb. */
3184 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
3185 ^ (code
== EQ_EXPR
))
3186 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
3187 loop_containing_stmt (stmt
))))
3189 /* Or push the GIMPLE_COND stmt itself. */
3190 operand_entry_t oe
= operand_entry_pool
.allocate ();
3193 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
3194 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3195 /* oe->op = NULL signs that there is no SSA_NAME
3196 for the range test, and oe->id instead is the
3197 basic block number, at which's end the GIMPLE_COND
3205 else if (ops
.length () > bb_ent
.first_idx
)
3208 bb_ent
.last_idx
= ops
.length ();
3210 bbinfo
.safe_push (bb_ent
);
3214 if (ops
.length () > 1)
3215 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
3219 /* update_ops relies on has_single_use predicates returning the
3220 same values as it did during get_ops earlier. Additionally it
3221 never removes statements, only adds new ones and it should walk
3222 from the single imm use and check the predicate already before
3223 making those changes.
3224 On the other side, the handling of GIMPLE_COND directly can turn
3225 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3226 it needs to be done in a separate loop afterwards. */
3227 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3229 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3230 && bbinfo
[idx
].op
!= NULL_TREE
)
3234 stmt
= last_stmt (bb
);
3235 new_op
= update_ops (bbinfo
[idx
].op
,
3237 ops
[bbinfo
[idx
].first_idx
]->rank
,
3238 ops
, &bbinfo
[idx
].first_idx
,
3239 loop_containing_stmt (stmt
));
3240 if (new_op
== NULL_TREE
)
3242 gcc_assert (bb
== last_bb
);
3243 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
3245 if (bbinfo
[idx
].op
!= new_op
)
3247 imm_use_iterator iter
;
3248 use_operand_p use_p
;
3249 gimple
*use_stmt
, *cast_stmt
= NULL
;
3251 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
3252 if (is_gimple_debug (use_stmt
))
3254 else if (gimple_code (use_stmt
) == GIMPLE_COND
3255 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3256 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3257 SET_USE (use_p
, new_op
);
3258 else if (gimple_assign_cast_p (use_stmt
))
3259 cast_stmt
= use_stmt
;
3264 gcc_assert (bb
== last_bb
);
3265 tree lhs
= gimple_assign_lhs (cast_stmt
);
3266 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
3267 enum tree_code rhs_code
3268 = gimple_assign_rhs_code (cast_stmt
);
3270 if (is_gimple_min_invariant (new_op
))
3272 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
3273 g
= gimple_build_assign (new_lhs
, new_op
);
3276 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
3277 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
3278 gimple_set_uid (g
, gimple_uid (cast_stmt
));
3279 gimple_set_visited (g
, true);
3280 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3281 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3282 if (is_gimple_debug (use_stmt
))
3284 else if (gimple_code (use_stmt
) == GIMPLE_COND
3285 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3286 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3287 SET_USE (use_p
, new_lhs
);
3296 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3298 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3299 && bbinfo
[idx
].op
== NULL_TREE
3300 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
3302 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
3303 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
3304 gimple_cond_make_false (cond_stmt
);
3305 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
3306 gimple_cond_make_true (cond_stmt
);
3309 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
3310 gimple_cond_set_lhs (cond_stmt
,
3311 ops
[bbinfo
[idx
].first_idx
]->op
);
3312 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
3314 update_stmt (cond_stmt
);
3322 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3323 of STMT in it's operands. This is also known as a "destructive
3324 update" operation. */
3327 is_phi_for_stmt (gimple
*stmt
, tree operand
)
3332 use_operand_p arg_p
;
3335 if (TREE_CODE (operand
) != SSA_NAME
)
3338 lhs
= gimple_assign_lhs (stmt
);
3340 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3341 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
3345 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
3346 if (lhs
== USE_FROM_PTR (arg_p
))
3351 /* Remove def stmt of VAR if VAR has zero uses and recurse
3352 on rhs1 operand if so. */
3355 remove_visited_stmt_chain (tree var
)
3358 gimple_stmt_iterator gsi
;
3362 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3364 stmt
= SSA_NAME_DEF_STMT (var
);
3365 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3367 var
= gimple_assign_rhs1 (stmt
);
3368 gsi
= gsi_for_stmt (stmt
);
3369 reassoc_remove_stmt (&gsi
);
3370 release_defs (stmt
);
3377 /* This function checks three consequtive operands in
3378 passed operands vector OPS starting from OPINDEX and
3379 swaps two operands if it is profitable for binary operation
3380 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3382 We pair ops with the same rank if possible.
3384 The alternative we try is to see if STMT is a destructive
3385 update style statement, which is like:
3388 In that case, we want to use the destructive update form to
3389 expose the possible vectorizer sum reduction opportunity.
3390 In that case, the third operand will be the phi node. This
3391 check is not performed if STMT is null.
3393 We could, of course, try to be better as noted above, and do a
3394 lot of work to try to find these opportunities in >3 operand
3395 cases, but it is unlikely to be worth it. */
3398 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3399 unsigned int opindex
, gimple
*stmt
)
3401 operand_entry_t oe1
, oe2
, oe3
;
3404 oe2
= ops
[opindex
+ 1];
3405 oe3
= ops
[opindex
+ 2];
3407 if ((oe1
->rank
== oe2
->rank
3408 && oe2
->rank
!= oe3
->rank
)
3409 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3410 && !is_phi_for_stmt (stmt
, oe1
->op
)
3411 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3413 struct operand_entry temp
= *oe3
;
3415 oe3
->rank
= oe1
->rank
;
3417 oe1
->rank
= temp
.rank
;
3419 else if ((oe1
->rank
== oe3
->rank
3420 && oe2
->rank
!= oe3
->rank
)
3421 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3422 && !is_phi_for_stmt (stmt
, oe1
->op
)
3423 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3425 struct operand_entry temp
= *oe2
;
3427 oe2
->rank
= oe1
->rank
;
3429 oe1
->rank
= temp
.rank
;
3433 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3434 two definitions, otherwise return STMT. */
3436 static inline gimple
*
3437 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
)
3439 if (TREE_CODE (rhs1
) == SSA_NAME
3440 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3441 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3442 if (TREE_CODE (rhs2
) == SSA_NAME
3443 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3444 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3448 /* Recursively rewrite our linearized statements so that the operators
3449 match those in OPS[OPINDEX], putting the computation in rank
3450 order. Return new lhs. */
3453 rewrite_expr_tree (gimple
*stmt
, unsigned int opindex
,
3454 vec
<operand_entry_t
> ops
, bool changed
)
3456 tree rhs1
= gimple_assign_rhs1 (stmt
);
3457 tree rhs2
= gimple_assign_rhs2 (stmt
);
3458 tree lhs
= gimple_assign_lhs (stmt
);
3461 /* The final recursion case for this function is that you have
3462 exactly two operations left.
3463 If we had exactly one op in the entire list to start with, we
3464 would have never called this function, and the tail recursion
3465 rewrites them one at a time. */
3466 if (opindex
+ 2 == ops
.length ())
3468 operand_entry_t oe1
, oe2
;
3471 oe2
= ops
[opindex
+ 1];
3473 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3475 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3476 unsigned int uid
= gimple_uid (stmt
);
3478 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3480 fprintf (dump_file
, "Transforming ");
3481 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3484 /* Even when changed is false, reassociation could have e.g. removed
3485 some redundant operations, so unless we are just swapping the
3486 arguments or unless there is no change at all (then we just
3487 return lhs), force creation of a new SSA_NAME. */
3488 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
3490 gimple
*insert_point
3491 = find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3492 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3494 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3496 gimple_set_uid (stmt
, uid
);
3497 gimple_set_visited (stmt
, true);
3498 if (insert_point
== gsi_stmt (gsi
))
3499 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3501 insert_stmt_after (stmt
, insert_point
);
3505 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3507 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3508 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3512 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3513 remove_visited_stmt_chain (rhs1
);
3515 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3517 fprintf (dump_file
, " into ");
3518 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3524 /* If we hit here, we should have 3 or more ops left. */
3525 gcc_assert (opindex
+ 2 < ops
.length ());
3527 /* Rewrite the next operator. */
3530 /* Recurse on the LHS of the binary operator, which is guaranteed to
3531 be the non-leaf side. */
3533 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3534 changed
|| oe
->op
!= rhs2
);
3536 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3538 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3540 fprintf (dump_file
, "Transforming ");
3541 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3544 /* If changed is false, this is either opindex == 0
3545 or all outer rhs2's were equal to corresponding oe->op,
3546 and powi_result is NULL.
3547 That means lhs is equivalent before and after reassociation.
3548 Otherwise ensure the old lhs SSA_NAME is not reused and
3549 create a new stmt as well, so that any debug stmts will be
3550 properly adjusted. */
3553 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3554 unsigned int uid
= gimple_uid (stmt
);
3555 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3557 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3558 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3560 gimple_set_uid (stmt
, uid
);
3561 gimple_set_visited (stmt
, true);
3562 if (insert_point
== gsi_stmt (gsi
))
3563 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3565 insert_stmt_after (stmt
, insert_point
);
3569 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3571 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3572 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3576 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3578 fprintf (dump_file
, " into ");
3579 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3585 /* Find out how many cycles we need to compute statements chain.
3586 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3587 maximum number of independent statements we may execute per cycle. */
3590 get_required_cycles (int ops_num
, int cpu_width
)
3596 /* While we have more than 2 * cpu_width operands
3597 we may reduce number of operands by cpu_width
3599 res
= ops_num
/ (2 * cpu_width
);
3601 /* Remained operands count may be reduced twice per cycle
3602 until we have only one operand. */
3603 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3604 elog
= exact_log2 (rest
);
3608 res
+= floor_log2 (rest
) + 1;
3613 /* Returns an optimal number of registers to use for computation of
3614 given statements. */
3617 get_reassociation_width (int ops_num
, enum tree_code opc
,
3620 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3625 if (param_width
> 0)
3626 width
= param_width
;
3628 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3633 /* Get the minimal time required for sequence computation. */
3634 cycles_best
= get_required_cycles (ops_num
, width
);
3636 /* Check if we may use less width and still compute sequence for
3637 the same time. It will allow us to reduce registers usage.
3638 get_required_cycles is monotonically increasing with lower width
3639 so we can perform a binary search for the minimal width that still
3640 results in the optimal cycle count. */
3642 while (width
> width_min
)
3644 int width_mid
= (width
+ width_min
) / 2;
3646 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3648 else if (width_min
< width_mid
)
3649 width_min
= width_mid
;
3657 /* Recursively rewrite our linearized statements so that the operators
3658 match those in OPS[OPINDEX], putting the computation in rank
3659 order and trying to allow operations to be executed in
3663 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
3664 vec
<operand_entry_t
> ops
)
3666 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3667 int op_num
= ops
.length ();
3668 int stmt_num
= op_num
- 1;
3669 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
3670 int op_index
= op_num
- 1;
3672 int ready_stmts_end
= 0;
3674 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3676 /* We start expression rewriting from the top statements.
3677 So, in this loop we create a full list of statements
3678 we will work with. */
3679 stmts
[stmt_num
- 1] = stmt
;
3680 for (i
= stmt_num
- 2; i
>= 0; i
--)
3681 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3683 for (i
= 0; i
< stmt_num
; i
++)
3687 /* Determine whether we should use results of
3688 already handled statements or not. */
3689 if (ready_stmts_end
== 0
3690 && (i
- stmt_index
>= width
|| op_index
< 1))
3691 ready_stmts_end
= i
;
3693 /* Now we choose operands for the next statement. Non zero
3694 value in ready_stmts_end means here that we should use
3695 the result of already generated statements as new operand. */
3696 if (ready_stmts_end
> 0)
3698 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3699 if (ready_stmts_end
> stmt_index
)
3700 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3701 else if (op_index
>= 0)
3702 op2
= ops
[op_index
--]->op
;
3705 gcc_assert (stmt_index
< i
);
3706 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3709 if (stmt_index
>= ready_stmts_end
)
3710 ready_stmts_end
= 0;
3715 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3716 op2
= ops
[op_index
--]->op
;
3717 op1
= ops
[op_index
--]->op
;
3720 /* If we emit the last statement then we should put
3721 operands into the last statement. It will also
3723 if (op_index
< 0 && stmt_index
== i
)
3726 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3728 fprintf (dump_file
, "Transforming ");
3729 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3732 /* We keep original statement only for the last one. All
3733 others are recreated. */
3734 if (i
== stmt_num
- 1)
3736 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3737 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3738 update_stmt (stmts
[i
]);
3741 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3743 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3745 fprintf (dump_file
, " into ");
3746 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3750 remove_visited_stmt_chain (last_rhs1
);
3753 /* Transform STMT, which is really (A +B) + (C + D) into the left
3754 linear form, ((A+B)+C)+D.
3755 Recurse on D if necessary. */
3758 linearize_expr (gimple
*stmt
)
3760 gimple_stmt_iterator gsi
;
3761 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3762 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3763 gimple
*oldbinrhs
= binrhs
;
3764 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3765 gimple
*newbinrhs
= NULL
;
3766 struct loop
*loop
= loop_containing_stmt (stmt
);
3767 tree lhs
= gimple_assign_lhs (stmt
);
3769 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3770 && is_reassociable_op (binrhs
, rhscode
, loop
));
3772 gsi
= gsi_for_stmt (stmt
);
3774 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3775 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
3776 gimple_assign_rhs_code (binrhs
),
3777 gimple_assign_lhs (binlhs
),
3778 gimple_assign_rhs2 (binrhs
));
3779 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3780 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3781 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3783 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3784 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3786 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3788 fprintf (dump_file
, "Linearized: ");
3789 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3792 reassociate_stats
.linearized
++;
3795 gsi
= gsi_for_stmt (oldbinrhs
);
3796 reassoc_remove_stmt (&gsi
);
3797 release_defs (oldbinrhs
);
3799 gimple_set_visited (stmt
, true);
3800 gimple_set_visited (binlhs
, true);
3801 gimple_set_visited (binrhs
, true);
3803 /* Tail recurse on the new rhs if it still needs reassociation. */
3804 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3805 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3806 want to change the algorithm while converting to tuples. */
3807 linearize_expr (stmt
);
3810 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3811 it. Otherwise, return NULL. */
3814 get_single_immediate_use (tree lhs
)
3816 use_operand_p immuse
;
3819 if (TREE_CODE (lhs
) == SSA_NAME
3820 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3821 && is_gimple_assign (immusestmt
))
3827 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3828 representing the negated value. Insertions of any necessary
3829 instructions go before GSI.
3830 This function is recursive in that, if you hand it "a_5" as the
3831 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3832 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3835 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3837 gimple
*negatedefstmt
= NULL
;
3838 tree resultofnegate
;
3839 gimple_stmt_iterator gsi
;
3842 /* If we are trying to negate a name, defined by an add, negate the
3843 add operands instead. */
3844 if (TREE_CODE (tonegate
) == SSA_NAME
)
3845 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3846 if (TREE_CODE (tonegate
) == SSA_NAME
3847 && is_gimple_assign (negatedefstmt
)
3848 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3849 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3850 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3852 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3853 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3854 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3857 gsi
= gsi_for_stmt (negatedefstmt
);
3858 rhs1
= negate_value (rhs1
, &gsi
);
3860 gsi
= gsi_for_stmt (negatedefstmt
);
3861 rhs2
= negate_value (rhs2
, &gsi
);
3863 gsi
= gsi_for_stmt (negatedefstmt
);
3864 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3865 gimple_set_visited (negatedefstmt
, true);
3866 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
3867 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3868 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3872 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3873 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3874 NULL_TREE
, true, GSI_SAME_STMT
);
3876 uid
= gimple_uid (gsi_stmt (gsi
));
3877 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3879 gimple
*stmt
= gsi_stmt (gsi
);
3880 if (gimple_uid (stmt
) != 0)
3882 gimple_set_uid (stmt
, uid
);
3884 return resultofnegate
;
3887 /* Return true if we should break up the subtract in STMT into an add
3888 with negate. This is true when we the subtract operands are really
3889 adds, or the subtract itself is used in an add expression. In
3890 either case, breaking up the subtract into an add with negate
3891 exposes the adds to reassociation. */
3894 should_break_up_subtract (gimple
*stmt
)
3896 tree lhs
= gimple_assign_lhs (stmt
);
3897 tree binlhs
= gimple_assign_rhs1 (stmt
);
3898 tree binrhs
= gimple_assign_rhs2 (stmt
);
3900 struct loop
*loop
= loop_containing_stmt (stmt
);
3902 if (TREE_CODE (binlhs
) == SSA_NAME
3903 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3906 if (TREE_CODE (binrhs
) == SSA_NAME
3907 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3910 if (TREE_CODE (lhs
) == SSA_NAME
3911 && (immusestmt
= get_single_immediate_use (lhs
))
3912 && is_gimple_assign (immusestmt
)
3913 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3914 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3919 /* Transform STMT from A - B into A + -B. */
3922 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
3924 tree rhs1
= gimple_assign_rhs1 (stmt
);
3925 tree rhs2
= gimple_assign_rhs2 (stmt
);
3927 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3929 fprintf (dump_file
, "Breaking up subtract ");
3930 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3933 rhs2
= negate_value (rhs2
, gsip
);
3934 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3938 /* Determine whether STMT is a builtin call that raises an SSA name
3939 to an integer power and has only one use. If so, and this is early
3940 reassociation and unsafe math optimizations are permitted, place
3941 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3942 If any of these conditions does not hold, return FALSE. */
3945 acceptable_pow_call (gimple
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3948 REAL_VALUE_TYPE c
, cint
;
3950 if (!first_pass_instance
3951 || !flag_unsafe_math_optimizations
3952 || !is_gimple_call (stmt
)
3953 || !has_single_use (gimple_call_lhs (stmt
)))
3956 fndecl
= gimple_call_fndecl (stmt
);
3959 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3962 switch (DECL_FUNCTION_CODE (fndecl
))
3964 CASE_FLT_FN (BUILT_IN_POW
):
3965 if (flag_errno_math
)
3968 *base
= gimple_call_arg (stmt
, 0);
3969 arg1
= gimple_call_arg (stmt
, 1);
3971 if (TREE_CODE (arg1
) != REAL_CST
)
3974 c
= TREE_REAL_CST (arg1
);
3976 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
3979 *exponent
= real_to_integer (&c
);
3980 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
3981 if (!real_identical (&c
, &cint
))
3986 CASE_FLT_FN (BUILT_IN_POWI
):
3987 *base
= gimple_call_arg (stmt
, 0);
3988 arg1
= gimple_call_arg (stmt
, 1);
3990 if (!tree_fits_shwi_p (arg1
))
3993 *exponent
= tree_to_shwi (arg1
);
4000 /* Expanding negative exponents is generally unproductive, so we don't
4001 complicate matters with those. Exponents of zero and one should
4002 have been handled by expression folding. */
4003 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
4009 /* Recursively linearize a binary expression that is the RHS of STMT.
4010 Place the operands of the expression tree in the vector named OPS. */
4013 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple
*stmt
,
4014 bool is_associative
, bool set_visited
)
4016 tree binlhs
= gimple_assign_rhs1 (stmt
);
4017 tree binrhs
= gimple_assign_rhs2 (stmt
);
4018 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
4019 bool binlhsisreassoc
= false;
4020 bool binrhsisreassoc
= false;
4021 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4022 struct loop
*loop
= loop_containing_stmt (stmt
);
4023 tree base
= NULL_TREE
;
4024 HOST_WIDE_INT exponent
= 0;
4027 gimple_set_visited (stmt
, true);
4029 if (TREE_CODE (binlhs
) == SSA_NAME
)
4031 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
4032 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
4033 && !stmt_could_throw_p (binlhsdef
));
4036 if (TREE_CODE (binrhs
) == SSA_NAME
)
4038 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
4039 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
4040 && !stmt_could_throw_p (binrhsdef
));
4043 /* If the LHS is not reassociable, but the RHS is, we need to swap
4044 them. If neither is reassociable, there is nothing we can do, so
4045 just put them in the ops vector. If the LHS is reassociable,
4046 linearize it. If both are reassociable, then linearize the RHS
4049 if (!binlhsisreassoc
)
4051 /* If this is not a associative operation like division, give up. */
4052 if (!is_associative
)
4054 add_to_ops_vec (ops
, binrhs
);
4058 if (!binrhsisreassoc
)
4060 if (rhscode
== MULT_EXPR
4061 && TREE_CODE (binrhs
) == SSA_NAME
4062 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
4064 add_repeat_to_ops_vec (ops
, base
, exponent
);
4065 gimple_set_visited (binrhsdef
, true);
4068 add_to_ops_vec (ops
, binrhs
);
4070 if (rhscode
== MULT_EXPR
4071 && TREE_CODE (binlhs
) == SSA_NAME
4072 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
4074 add_repeat_to_ops_vec (ops
, base
, exponent
);
4075 gimple_set_visited (binlhsdef
, true);
4078 add_to_ops_vec (ops
, binlhs
);
4083 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4085 fprintf (dump_file
, "swapping operands of ");
4086 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4089 swap_ssa_operands (stmt
,
4090 gimple_assign_rhs1_ptr (stmt
),
4091 gimple_assign_rhs2_ptr (stmt
));
4094 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4096 fprintf (dump_file
, " is now ");
4097 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4100 /* We want to make it so the lhs is always the reassociative op,
4102 std::swap (binlhs
, binrhs
);
4104 else if (binrhsisreassoc
)
4106 linearize_expr (stmt
);
4107 binlhs
= gimple_assign_rhs1 (stmt
);
4108 binrhs
= gimple_assign_rhs2 (stmt
);
4111 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
4112 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
4114 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
4115 is_associative
, set_visited
);
4117 if (rhscode
== MULT_EXPR
4118 && TREE_CODE (binrhs
) == SSA_NAME
4119 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
4121 add_repeat_to_ops_vec (ops
, base
, exponent
);
4122 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
4125 add_to_ops_vec (ops
, binrhs
);
4128 /* Repropagate the negates back into subtracts, since no other pass
4129 currently does it. */
4132 repropagate_negates (void)
4137 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
4139 gimple
*user
= get_single_immediate_use (negate
);
4141 if (!user
|| !is_gimple_assign (user
))
4144 /* The negate operand can be either operand of a PLUS_EXPR
4145 (it can be the LHS if the RHS is a constant for example).
4147 Force the negate operand to the RHS of the PLUS_EXPR, then
4148 transform the PLUS_EXPR into a MINUS_EXPR. */
4149 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
4151 /* If the negated operand appears on the LHS of the
4152 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4153 to force the negated operand to the RHS of the PLUS_EXPR. */
4154 if (gimple_assign_rhs1 (user
) == negate
)
4156 swap_ssa_operands (user
,
4157 gimple_assign_rhs1_ptr (user
),
4158 gimple_assign_rhs2_ptr (user
));
4161 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4162 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4163 if (gimple_assign_rhs2 (user
) == negate
)
4165 tree rhs1
= gimple_assign_rhs1 (user
);
4166 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
4167 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4168 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
4172 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
4174 if (gimple_assign_rhs1 (user
) == negate
)
4179 which we transform into
4182 This pushes down the negate which we possibly can merge
4183 into some other operation, hence insert it into the
4184 plus_negates vector. */
4185 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
4186 tree a
= gimple_assign_rhs1 (feed
);
4187 tree b
= gimple_assign_rhs2 (user
);
4188 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
4189 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
4190 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
4191 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
4192 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
4193 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
4194 user
= gsi_stmt (gsi2
);
4196 reassoc_remove_stmt (&gsi
);
4197 release_defs (feed
);
4198 plus_negates
.safe_push (gimple_assign_lhs (user
));
4202 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4203 rid of one operation. */
4204 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
4205 tree a
= gimple_assign_rhs1 (feed
);
4206 tree rhs1
= gimple_assign_rhs1 (user
);
4207 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4208 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
4209 update_stmt (gsi_stmt (gsi
));
4215 /* Returns true if OP is of a type for which we can do reassociation.
4216 That is for integral or non-saturating fixed-point types, and for
4217 floating point type when associative-math is enabled. */
4220 can_reassociate_p (tree op
)
4222 tree type
= TREE_TYPE (op
);
4223 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
4224 || NON_SAT_FIXED_POINT_TYPE_P (type
)
4225 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
4230 /* Break up subtract operations in block BB.
4232 We do this top down because we don't know whether the subtract is
4233 part of a possible chain of reassociation except at the top.
4242 we want to break up k = t - q, but we won't until we've transformed q
4243 = b - r, which won't be broken up until we transform b = c - d.
4245 En passant, clear the GIMPLE visited flag on every statement
4246 and set UIDs within each basic block. */
4249 break_up_subtract_bb (basic_block bb
)
4251 gimple_stmt_iterator gsi
;
4253 unsigned int uid
= 1;
4255 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4257 gimple
*stmt
= gsi_stmt (gsi
);
4258 gimple_set_visited (stmt
, false);
4259 gimple_set_uid (stmt
, uid
++);
4261 if (!is_gimple_assign (stmt
)
4262 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
4265 /* Look for simple gimple subtract operations. */
4266 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
4268 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
4269 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
4272 /* Check for a subtract used only in an addition. If this
4273 is the case, transform it into add of a negate for better
4274 reassociation. IE transform C = A-B into C = A + -B if C
4275 is only used in an addition. */
4276 if (should_break_up_subtract (stmt
))
4277 break_up_subtract (stmt
, &gsi
);
4279 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
4280 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
4281 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
4283 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
4285 son
= next_dom_son (CDI_DOMINATORS
, son
))
4286 break_up_subtract_bb (son
);
4289 /* Used for repeated factor analysis. */
4290 struct repeat_factor_d
4292 /* An SSA name that occurs in a multiply chain. */
4295 /* Cached rank of the factor. */
4298 /* Number of occurrences of the factor in the chain. */
4299 HOST_WIDE_INT count
;
4301 /* An SSA name representing the product of this factor and
4302 all factors appearing later in the repeated factor vector. */
4306 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
4307 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
4310 static vec
<repeat_factor
> repeat_factor_vec
;
4312 /* Used for sorting the repeat factor vector. Sort primarily by
4313 ascending occurrence count, secondarily by descending rank. */
4316 compare_repeat_factors (const void *x1
, const void *x2
)
4318 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
4319 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
4321 if (rf1
->count
!= rf2
->count
)
4322 return rf1
->count
- rf2
->count
;
4324 return rf2
->rank
- rf1
->rank
;
4327 /* Look for repeated operands in OPS in the multiply tree rooted at
4328 STMT. Replace them with an optimal sequence of multiplies and powi
4329 builtin calls, and remove the used operands from OPS. Return an
4330 SSA name representing the value of the replacement sequence. */
4333 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry_t
> *ops
)
4335 unsigned i
, j
, vec_len
;
4338 repeat_factor_t rf1
, rf2
;
4339 repeat_factor rfnew
;
4340 tree result
= NULL_TREE
;
4341 tree target_ssa
, iter_result
;
4342 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4343 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4344 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4345 gimple
*mul_stmt
, *pow_stmt
;
4347 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4352 /* Allocate the repeated factor vector. */
4353 repeat_factor_vec
.create (10);
4355 /* Scan the OPS vector for all SSA names in the product and build
4356 up a vector of occurrence counts for each factor. */
4357 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4359 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4361 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4363 if (rf1
->factor
== oe
->op
)
4365 rf1
->count
+= oe
->count
;
4370 if (j
>= repeat_factor_vec
.length ())
4372 rfnew
.factor
= oe
->op
;
4373 rfnew
.rank
= oe
->rank
;
4374 rfnew
.count
= oe
->count
;
4375 rfnew
.repr
= NULL_TREE
;
4376 repeat_factor_vec
.safe_push (rfnew
);
4381 /* Sort the repeated factor vector by (a) increasing occurrence count,
4382 and (b) decreasing rank. */
4383 repeat_factor_vec
.qsort (compare_repeat_factors
);
4385 /* It is generally best to combine as many base factors as possible
4386 into a product before applying __builtin_powi to the result.
4387 However, the sort order chosen for the repeated factor vector
4388 allows us to cache partial results for the product of the base
4389 factors for subsequent use. When we already have a cached partial
4390 result from a previous iteration, it is best to make use of it
4391 before looking for another __builtin_pow opportunity.
4393 As an example, consider x * x * y * y * y * z * z * z * z.
4394 We want to first compose the product x * y * z, raise it to the
4395 second power, then multiply this by y * z, and finally multiply
4396 by z. This can be done in 5 multiplies provided we cache y * z
4397 for use in both expressions:
4405 If we instead ignored the cached y * z and first multiplied by
4406 the __builtin_pow opportunity z * z, we would get the inferior:
4415 vec_len
= repeat_factor_vec
.length ();
4417 /* Repeatedly look for opportunities to create a builtin_powi call. */
4420 HOST_WIDE_INT power
;
4422 /* First look for the largest cached product of factors from
4423 preceding iterations. If found, create a builtin_powi for
4424 it if the minimum occurrence count for its factors is at
4425 least 2, or just use this cached product as our next
4426 multiplicand if the minimum occurrence count is 1. */
4427 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4429 if (rf1
->repr
&& rf1
->count
> 0)
4439 iter_result
= rf1
->repr
;
4441 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4445 fputs ("Multiplying by cached product ", dump_file
);
4446 for (elt
= j
; elt
< vec_len
; elt
++)
4448 rf
= &repeat_factor_vec
[elt
];
4449 print_generic_expr (dump_file
, rf
->factor
, 0);
4450 if (elt
< vec_len
- 1)
4451 fputs (" * ", dump_file
);
4453 fputs ("\n", dump_file
);
4458 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4459 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4460 build_int_cst (integer_type_node
,
4462 gimple_call_set_lhs (pow_stmt
, iter_result
);
4463 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4464 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4466 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4470 fputs ("Building __builtin_pow call for cached product (",
4472 for (elt
= j
; elt
< vec_len
; elt
++)
4474 rf
= &repeat_factor_vec
[elt
];
4475 print_generic_expr (dump_file
, rf
->factor
, 0);
4476 if (elt
< vec_len
- 1)
4477 fputs (" * ", dump_file
);
4479 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
4486 /* Otherwise, find the first factor in the repeated factor
4487 vector whose occurrence count is at least 2. If no such
4488 factor exists, there are no builtin_powi opportunities
4490 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4492 if (rf1
->count
>= 2)
4501 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4505 fputs ("Building __builtin_pow call for (", dump_file
);
4506 for (elt
= j
; elt
< vec_len
; elt
++)
4508 rf
= &repeat_factor_vec
[elt
];
4509 print_generic_expr (dump_file
, rf
->factor
, 0);
4510 if (elt
< vec_len
- 1)
4511 fputs (" * ", dump_file
);
4513 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4516 reassociate_stats
.pows_created
++;
4518 /* Visit each element of the vector in reverse order (so that
4519 high-occurrence elements are visited first, and within the
4520 same occurrence count, lower-ranked elements are visited
4521 first). Form a linear product of all elements in this order
4522 whose occurrencce count is at least that of element J.
4523 Record the SSA name representing the product of each element
4524 with all subsequent elements in the vector. */
4525 if (j
== vec_len
- 1)
4526 rf1
->repr
= rf1
->factor
;
4529 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4533 rf1
= &repeat_factor_vec
[ii
];
4534 rf2
= &repeat_factor_vec
[ii
+ 1];
4536 /* Init the last factor's representative to be itself. */
4538 rf2
->repr
= rf2
->factor
;
4543 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4544 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
4546 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4547 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4548 rf1
->repr
= target_ssa
;
4550 /* Don't reprocess the multiply we just introduced. */
4551 gimple_set_visited (mul_stmt
, true);
4555 /* Form a call to __builtin_powi for the maximum product
4556 just formed, raised to the power obtained earlier. */
4557 rf1
= &repeat_factor_vec
[j
];
4558 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4559 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4560 build_int_cst (integer_type_node
,
4562 gimple_call_set_lhs (pow_stmt
, iter_result
);
4563 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4564 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4567 /* If we previously formed at least one other builtin_powi call,
4568 form the product of this one and those others. */
4571 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4572 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
4573 result
, iter_result
);
4574 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4575 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4576 gimple_set_visited (mul_stmt
, true);
4577 result
= new_result
;
4580 result
= iter_result
;
4582 /* Decrement the occurrence count of each element in the product
4583 by the count found above, and remove this many copies of each
4585 for (i
= j
; i
< vec_len
; i
++)
4590 rf1
= &repeat_factor_vec
[i
];
4591 rf1
->count
-= power
;
4593 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4595 if (oe
->op
== rf1
->factor
)
4599 ops
->ordered_remove (n
);
4615 /* At this point all elements in the repeated factor vector have a
4616 remaining occurrence count of 0 or 1, and those with a count of 1
4617 don't have cached representatives. Re-sort the ops vector and
4619 ops
->qsort (sort_by_operand_rank
);
4620 repeat_factor_vec
.release ();
4622 /* Return the final product computed herein. Note that there may
4623 still be some elements with single occurrence count left in OPS;
4624 those will be handled by the normal reassociation logic. */
4628 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4631 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
4635 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4637 fprintf (dump_file
, "Transforming ");
4638 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4641 rhs1
= gimple_assign_rhs1 (stmt
);
4642 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4644 remove_visited_stmt_chain (rhs1
);
4646 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4648 fprintf (dump_file
, " into ");
4649 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4653 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4656 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
4657 tree rhs1
, tree rhs2
)
4659 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4661 fprintf (dump_file
, "Transforming ");
4662 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4665 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4666 update_stmt (gsi_stmt (*gsi
));
4667 remove_visited_stmt_chain (rhs1
);
4669 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4671 fprintf (dump_file
, " into ");
4672 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4676 /* Reassociate expressions in basic block BB and its post-dominator as
4680 reassociate_bb (basic_block bb
)
4682 gimple_stmt_iterator gsi
;
4684 gimple
*stmt
= last_stmt (bb
);
4686 if (stmt
&& !gimple_visited_p (stmt
))
4687 maybe_optimize_range_tests (stmt
);
4689 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4691 stmt
= gsi_stmt (gsi
);
4693 if (is_gimple_assign (stmt
)
4694 && !stmt_could_throw_p (stmt
))
4696 tree lhs
, rhs1
, rhs2
;
4697 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4699 /* If this is not a gimple binary expression, there is
4700 nothing for us to do with it. */
4701 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4704 /* If this was part of an already processed statement,
4705 we don't need to touch it again. */
4706 if (gimple_visited_p (stmt
))
4708 /* This statement might have become dead because of previous
4710 if (has_zero_uses (gimple_get_lhs (stmt
)))
4712 reassoc_remove_stmt (&gsi
);
4713 release_defs (stmt
);
4714 /* We might end up removing the last stmt above which
4715 places the iterator to the end of the sequence.
4716 Reset it to the last stmt in this case which might
4717 be the end of the sequence as well if we removed
4718 the last statement of the sequence. In which case
4719 we need to bail out. */
4720 if (gsi_end_p (gsi
))
4722 gsi
= gsi_last_bb (bb
);
4723 if (gsi_end_p (gsi
))
4730 lhs
= gimple_assign_lhs (stmt
);
4731 rhs1
= gimple_assign_rhs1 (stmt
);
4732 rhs2
= gimple_assign_rhs2 (stmt
);
4734 /* For non-bit or min/max operations we can't associate
4735 all types. Verify that here. */
4736 if (rhs_code
!= BIT_IOR_EXPR
4737 && rhs_code
!= BIT_AND_EXPR
4738 && rhs_code
!= BIT_XOR_EXPR
4739 && rhs_code
!= MIN_EXPR
4740 && rhs_code
!= MAX_EXPR
4741 && (!can_reassociate_p (lhs
)
4742 || !can_reassociate_p (rhs1
)
4743 || !can_reassociate_p (rhs2
)))
4746 if (associative_tree_code (rhs_code
))
4748 auto_vec
<operand_entry_t
> ops
;
4749 tree powi_result
= NULL_TREE
;
4751 /* There may be no immediate uses left by the time we
4752 get here because we may have eliminated them all. */
4753 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4756 gimple_set_visited (stmt
, true);
4757 linearize_expr_tree (&ops
, stmt
, true, true);
4758 ops
.qsort (sort_by_operand_rank
);
4759 optimize_ops_list (rhs_code
, &ops
);
4760 if (undistribute_ops_list (rhs_code
, &ops
,
4761 loop_containing_stmt (stmt
)))
4763 ops
.qsort (sort_by_operand_rank
);
4764 optimize_ops_list (rhs_code
, &ops
);
4767 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4768 optimize_range_tests (rhs_code
, &ops
);
4770 if (first_pass_instance
4771 && rhs_code
== MULT_EXPR
4772 && flag_unsafe_math_optimizations
)
4773 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4775 /* If the operand vector is now empty, all operands were
4776 consumed by the __builtin_powi optimization. */
4777 if (ops
.length () == 0)
4778 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4779 else if (ops
.length () == 1)
4781 tree last_op
= ops
.last ()->op
;
4784 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4787 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4791 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4792 int ops_num
= ops
.length ();
4793 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4796 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4798 "Width = %d was chosen for reassociation\n", width
);
4801 && ops
.length () > 3)
4802 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
4806 /* When there are three operands left, we want
4807 to make sure the ones that get the double
4808 binary op are chosen wisely. */
4809 int len
= ops
.length ();
4811 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4813 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4814 powi_result
!= NULL
);
4817 /* If we combined some repeated factors into a
4818 __builtin_powi call, multiply that result by the
4819 reassociated operands. */
4822 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4823 tree type
= TREE_TYPE (lhs
);
4824 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4826 gimple_set_lhs (lhs_stmt
, target_ssa
);
4827 update_stmt (lhs_stmt
);
4829 target_ssa
= new_lhs
;
4830 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
4831 powi_result
, target_ssa
);
4832 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4833 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4839 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4841 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4842 reassociate_bb (son
);
4845 /* Add jumps around shifts for range tests turned into bit tests.
4846 For each SSA_NAME VAR we have code like:
4847 VAR = ...; // final stmt of range comparison
4848 // bit test here...;
4849 OTHERVAR = ...; // final stmt of the bit test sequence
4850 RES = VAR | OTHERVAR;
4851 Turn the above into:
4858 // bit test here...;
4861 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4869 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
4871 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
4874 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
4876 && is_gimple_assign (use_stmt
)
4877 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
4878 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
4880 basic_block cond_bb
= gimple_bb (def_stmt
);
4881 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
4882 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
4884 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
4885 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
4886 build_zero_cst (TREE_TYPE (var
)),
4887 NULL_TREE
, NULL_TREE
);
4888 location_t loc
= gimple_location (use_stmt
);
4889 gimple_set_location (g
, loc
);
4890 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
4892 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
4893 etrue
->probability
= REG_BR_PROB_BASE
/ 2;
4894 etrue
->count
= cond_bb
->count
/ 2;
4895 edge efalse
= find_edge (cond_bb
, then_bb
);
4896 efalse
->flags
= EDGE_FALSE_VALUE
;
4897 efalse
->probability
-= etrue
->probability
;
4898 efalse
->count
-= etrue
->count
;
4899 then_bb
->count
-= etrue
->count
;
4901 tree othervar
= NULL_TREE
;
4902 if (gimple_assign_rhs1 (use_stmt
) == var
)
4903 othervar
= gimple_assign_rhs2 (use_stmt
);
4904 else if (gimple_assign_rhs2 (use_stmt
) == var
)
4905 othervar
= gimple_assign_rhs1 (use_stmt
);
4908 tree lhs
= gimple_assign_lhs (use_stmt
);
4909 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
4910 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
4911 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
4912 gsi
= gsi_for_stmt (use_stmt
);
4913 gsi_remove (&gsi
, true);
4915 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
4916 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
4918 reassoc_branch_fixups
.release ();
4921 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4922 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4924 /* Dump the operand entry vector OPS to FILE. */
4927 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4932 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4934 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4935 print_generic_expr (file
, oe
->op
, 0);
4939 /* Dump the operand entry vector OPS to STDERR. */
4942 debug_ops_vector (vec
<operand_entry_t
> ops
)
4944 dump_ops_vector (stderr
, ops
);
4950 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
4951 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
4954 /* Initialize the reassociation pass. */
4961 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
4963 /* Find the loops, so that we can prevent moving calculations in
4965 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
4967 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
4969 next_operand_entry_id
= 0;
4971 /* Reverse RPO (Reverse Post Order) will give us something where
4972 deeper loops come later. */
4973 pre_and_rev_post_order_compute (NULL
, bbs
, false);
4974 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
4975 operand_rank
= new hash_map
<tree
, long>;
4977 /* Give each default definition a distinct rank. This includes
4978 parameters and the static chain. Walk backwards over all
4979 SSA names so that we get proper rank ordering according
4980 to tree_swap_operands_p. */
4981 for (i
= num_ssa_names
- 1; i
> 0; --i
)
4983 tree name
= ssa_name (i
);
4984 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
4985 insert_operand_rank (name
, ++rank
);
4988 /* Set up rank for each BB */
4989 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
4990 bb_rank
[bbs
[i
]] = ++rank
<< 16;
4993 calculate_dominance_info (CDI_POST_DOMINATORS
);
4994 plus_negates
= vNULL
;
4997 /* Cleanup after the reassociation pass, and print stats if
5003 statistics_counter_event (cfun
, "Linearized",
5004 reassociate_stats
.linearized
);
5005 statistics_counter_event (cfun
, "Constants eliminated",
5006 reassociate_stats
.constants_eliminated
);
5007 statistics_counter_event (cfun
, "Ops eliminated",
5008 reassociate_stats
.ops_eliminated
);
5009 statistics_counter_event (cfun
, "Statements rewritten",
5010 reassociate_stats
.rewritten
);
5011 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
5012 reassociate_stats
.pows_encountered
);
5013 statistics_counter_event (cfun
, "Built-in powi calls created",
5014 reassociate_stats
.pows_created
);
5016 delete operand_rank
;
5017 operand_entry_pool
.release ();
5019 plus_negates
.release ();
5020 free_dominance_info (CDI_POST_DOMINATORS
);
5021 loop_optimizer_finalize ();
5024 /* Gate and execute functions for Reassociation. */
5027 execute_reassoc (void)
5032 repropagate_negates ();
5041 const pass_data pass_data_reassoc
=
5043 GIMPLE_PASS
, /* type */
5044 "reassoc", /* name */
5045 OPTGROUP_NONE
, /* optinfo_flags */
5046 TV_TREE_REASSOC
, /* tv_id */
5047 ( PROP_cfg
| PROP_ssa
), /* properties_required */
5048 0, /* properties_provided */
5049 0, /* properties_destroyed */
5050 0, /* todo_flags_start */
5051 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
5054 class pass_reassoc
: public gimple_opt_pass
5057 pass_reassoc (gcc::context
*ctxt
)
5058 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
5061 /* opt_pass methods: */
5062 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
5063 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
5064 virtual unsigned int execute (function
*) { return execute_reassoc (); }
5066 }; // class pass_reassoc
5071 make_pass_reassoc (gcc::context
*ctxt
)
5073 return new pass_reassoc (ctxt
);