1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
35 #include "gimple-pretty-print.h"
36 #include "tree-inline.h"
37 #include "internal-fn.h"
38 #include "gimple-fold.h"
40 #include "gimple-iterator.h"
41 #include "gimplify-me.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
46 #include "insn-config.h"
57 #include "tree-iterator.h"
58 #include "tree-pass.h"
59 #include "alloc-pool.h"
60 #include "langhooks.h"
64 #include "diagnostic-core.h"
67 #include "insn-codes.h"
70 /* This is a simple global reassociation pass. It is, in part, based
71 on the LLVM pass of the same name (They do some things more/less
72 than we do, in different orders, etc).
74 It consists of five steps:
76 1. Breaking up subtract operations into addition + negate, where
77 it would promote the reassociation of adds.
79 2. Left linearization of the expression trees, so that (A+B)+(C+D)
80 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
81 During linearization, we place the operands of the binary
82 expressions into a vector of operand_entry_t
84 3. Optimization of the operand lists, eliminating things like a +
87 3a. Combine repeated factors with the same occurrence counts
88 into a __builtin_powi call that will later be optimized into
89 an optimal number of multiplies.
91 4. Rewrite the expression trees we linearized and optimized so
92 they are in proper rank order.
94 5. Repropagate negates, as nothing else will clean it up ATM.
96 A bit of theory on #4, since nobody seems to write anything down
97 about why it makes sense to do it the way they do it:
99 We could do this much nicer theoretically, but don't (for reasons
100 explained after how to do it theoretically nice :P).
102 In order to promote the most redundancy elimination, you want
103 binary expressions whose operands are the same rank (or
104 preferably, the same value) exposed to the redundancy eliminator,
105 for possible elimination.
107 So the way to do this if we really cared, is to build the new op
108 tree from the leaves to the roots, merging as you go, and putting the
109 new op on the end of the worklist, until you are left with one
110 thing on the worklist.
112 IE if you have to rewrite the following set of operands (listed with
113 rank in parentheses), with opcode PLUS_EXPR:
115 a (1), b (1), c (1), d (2), e (2)
118 We start with our merge worklist empty, and the ops list with all of
121 You want to first merge all leaves of the same rank, as much as
124 So first build a binary op of
126 mergetmp = a + b, and put "mergetmp" on the merge worklist.
128 Because there is no three operand form of PLUS_EXPR, c is not going to
129 be exposed to redundancy elimination as a rank 1 operand.
131 So you might as well throw it on the merge worklist (you could also
132 consider it to now be a rank two operand, and merge it with d and e,
133 but in this case, you then have evicted e from a binary op. So at
134 least in this situation, you can't win.)
136 Then build a binary op of d + e
139 and put mergetmp2 on the merge worklist.
141 so merge worklist = {mergetmp, c, mergetmp2}
143 Continue building binary ops of these operations until you have only
144 one operation left on the worklist.
149 mergetmp3 = mergetmp + c
151 worklist = {mergetmp2, mergetmp3}
153 mergetmp4 = mergetmp2 + mergetmp3
155 worklist = {mergetmp4}
157 because we have one operation left, we can now just set the original
158 statement equal to the result of that operation.
160 This will at least expose a + b and d + e to redundancy elimination
161 as binary operations.
163 For extra points, you can reuse the old statements to build the
164 mergetmps, since you shouldn't run out.
166 So why don't we do this?
168 Because it's expensive, and rarely will help. Most trees we are
169 reassociating have 3 or less ops. If they have 2 ops, they already
170 will be written into a nice single binary op. If you have 3 ops, a
171 single simple check suffices to tell you whether the first two are of the
172 same rank. If so, you know to order it
175 newstmt = mergetmp + op3
179 newstmt = mergetmp + op1
181 If all three are of the same rank, you can't expose them all in a
182 single binary operator anyway, so the above is *still* the best you
185 Thus, this is what we do. When we have three ops left, we check to see
186 what order to put them in, and call it a day. As a nod to vector sum
187 reduction, we check if any of the ops are really a phi node that is a
188 destructive update for the associating op, and keep the destructive
189 update together for vector sum reduction recognition. */
196 int constants_eliminated
;
199 int pows_encountered
;
203 /* Operator, rank pair. */
204 typedef struct operand_entry
212 static object_allocator
<operand_entry
> operand_entry_pool ("operand entry pool",
215 /* This is used to assign a unique ID to each struct operand_entry
216 so that qsort results are identical on different hosts. */
217 static int next_operand_entry_id
;
219 /* Starting rank number for a given basic block, so that we can rank
220 operations using unmovable instructions in that BB based on the bb
222 static long *bb_rank
;
224 /* Operand->rank hashtable. */
225 static hash_map
<tree
, long> *operand_rank
;
227 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
228 all basic blocks the CFG should be adjusted - basic blocks
229 split right after that SSA_NAME's definition statement and before
230 the only use, which must be a bit ior. */
231 static vec
<tree
> reassoc_branch_fixups
;
234 static long get_rank (tree
);
235 static bool reassoc_stmt_dominates_stmt_p (gimple
, gimple
);
237 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
238 possibly added by gsi_remove. */
241 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
243 gimple stmt
= gsi_stmt (*gsi
);
245 if (!MAY_HAVE_DEBUG_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
246 return gsi_remove (gsi
, true);
248 gimple_stmt_iterator prev
= *gsi
;
250 unsigned uid
= gimple_uid (stmt
);
251 basic_block bb
= gimple_bb (stmt
);
252 bool ret
= gsi_remove (gsi
, true);
253 if (!gsi_end_p (prev
))
256 prev
= gsi_start_bb (bb
);
257 gimple end_stmt
= gsi_stmt (*gsi
);
258 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
260 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
261 gimple_set_uid (stmt
, uid
);
267 /* Bias amount for loop-carried phis. We want this to be larger than
268 the depth of any reassociation tree we can see, but not larger than
269 the rank difference between two blocks. */
270 #define PHI_LOOP_BIAS (1 << 15)
272 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
273 an innermost loop, and the phi has only a single use which is inside
274 the loop, then the rank is the block rank of the loop latch plus an
275 extra bias for the loop-carried dependence. This causes expressions
276 calculated into an accumulator variable to be independent for each
277 iteration of the loop. If STMT is some other phi, the rank is the
278 block rank of its containing block. */
280 phi_rank (gimple stmt
)
282 basic_block bb
= gimple_bb (stmt
);
283 struct loop
*father
= bb
->loop_father
;
289 /* We only care about real loops (those with a latch). */
291 return bb_rank
[bb
->index
];
293 /* Interesting phis must be in headers of innermost loops. */
294 if (bb
!= father
->header
296 return bb_rank
[bb
->index
];
298 /* Ignore virtual SSA_NAMEs. */
299 res
= gimple_phi_result (stmt
);
300 if (virtual_operand_p (res
))
301 return bb_rank
[bb
->index
];
303 /* The phi definition must have a single use, and that use must be
304 within the loop. Otherwise this isn't an accumulator pattern. */
305 if (!single_imm_use (res
, &use
, &use_stmt
)
306 || gimple_bb (use_stmt
)->loop_father
!= father
)
307 return bb_rank
[bb
->index
];
309 /* Look for phi arguments from within the loop. If found, bias this phi. */
310 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
312 tree arg
= gimple_phi_arg_def (stmt
, i
);
313 if (TREE_CODE (arg
) == SSA_NAME
314 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
316 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
317 if (gimple_bb (def_stmt
)->loop_father
== father
)
318 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
322 /* Must be an uninteresting phi. */
323 return bb_rank
[bb
->index
];
326 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
327 loop-carried dependence of an innermost loop, return TRUE; else
330 loop_carried_phi (tree exp
)
335 if (TREE_CODE (exp
) != SSA_NAME
336 || SSA_NAME_IS_DEFAULT_DEF (exp
))
339 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
341 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
344 /* Non-loop-carried phis have block rank. Loop-carried phis have
345 an additional bias added in. If this phi doesn't have block rank,
346 it's biased and should not be propagated. */
347 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
349 if (phi_rank (phi_stmt
) != block_rank
)
355 /* Return the maximum of RANK and the rank that should be propagated
356 from expression OP. For most operands, this is just the rank of OP.
357 For loop-carried phis, the value is zero to avoid undoing the bias
358 in favor of the phi. */
360 propagate_rank (long rank
, tree op
)
364 if (loop_carried_phi (op
))
367 op_rank
= get_rank (op
);
369 return MAX (rank
, op_rank
);
372 /* Look up the operand rank structure for expression E. */
375 find_operand_rank (tree e
)
377 long *slot
= operand_rank
->get (e
);
378 return slot
? *slot
: -1;
381 /* Insert {E,RANK} into the operand rank hashtable. */
384 insert_operand_rank (tree e
, long rank
)
386 gcc_assert (rank
> 0);
387 gcc_assert (!operand_rank
->put (e
, rank
));
390 /* Given an expression E, return the rank of the expression. */
395 /* SSA_NAME's have the rank of the expression they are the result
397 For globals and uninitialized values, the rank is 0.
398 For function arguments, use the pre-setup rank.
399 For PHI nodes, stores, asm statements, etc, we use the rank of
401 For simple operations, the rank is the maximum rank of any of
402 its operands, or the bb_rank, whichever is less.
403 I make no claims that this is optimal, however, it gives good
406 /* We make an exception to the normal ranking system to break
407 dependences of accumulator variables in loops. Suppose we
408 have a simple one-block loop containing:
415 As shown, each iteration of the calculation into x is fully
416 dependent upon the iteration before it. We would prefer to
417 see this in the form:
424 If the loop is unrolled, the calculations of b and c from
425 different iterations can be interleaved.
427 To obtain this result during reassociation, we bias the rank
428 of the phi definition x_1 upward, when it is recognized as an
429 accumulator pattern. The artificial rank causes it to be
430 added last, providing the desired independence. */
432 if (TREE_CODE (e
) == SSA_NAME
)
439 if (SSA_NAME_IS_DEFAULT_DEF (e
))
440 return find_operand_rank (e
);
442 stmt
= SSA_NAME_DEF_STMT (e
);
443 if (gimple_code (stmt
) == GIMPLE_PHI
)
444 return phi_rank (stmt
);
446 if (!is_gimple_assign (stmt
))
447 return bb_rank
[gimple_bb (stmt
)->index
];
449 /* If we already have a rank for this expression, use that. */
450 rank
= find_operand_rank (e
);
454 /* Otherwise, find the maximum rank for the operands. As an
455 exception, remove the bias from loop-carried phis when propagating
456 the rank so that dependent operations are not also biased. */
457 /* Simply walk over all SSA uses - this takes advatage of the
458 fact that non-SSA operands are is_gimple_min_invariant and
461 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
462 rank
= propagate_rank (rank
, op
);
464 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
466 fprintf (dump_file
, "Rank for ");
467 print_generic_expr (dump_file
, e
, 0);
468 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
471 /* Note the rank in the hashtable so we don't recompute it. */
472 insert_operand_rank (e
, (rank
+ 1));
476 /* Constants, globals, etc., are rank 0 */
481 /* We want integer ones to end up last no matter what, since they are
482 the ones we can do the most with. */
483 #define INTEGER_CONST_TYPE 1 << 3
484 #define FLOAT_CONST_TYPE 1 << 2
485 #define OTHER_CONST_TYPE 1 << 1
487 /* Classify an invariant tree into integer, float, or other, so that
488 we can sort them to be near other constants of the same type. */
490 constant_type (tree t
)
492 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
493 return INTEGER_CONST_TYPE
;
494 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
495 return FLOAT_CONST_TYPE
;
497 return OTHER_CONST_TYPE
;
500 /* qsort comparison function to sort operand entries PA and PB by rank
501 so that the sorted array is ordered by rank in decreasing order. */
503 sort_by_operand_rank (const void *pa
, const void *pb
)
505 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
506 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
508 /* It's nicer for optimize_expression if constants that are likely
509 to fold when added/multiplied//whatever are put next to each
510 other. Since all constants have rank 0, order them by type. */
511 if (oeb
->rank
== 0 && oea
->rank
== 0)
513 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
514 return constant_type (oeb
->op
) - constant_type (oea
->op
);
516 /* To make sorting result stable, we use unique IDs to determine
518 return oeb
->id
- oea
->id
;
521 /* Lastly, make sure the versions that are the same go next to each
523 if ((oeb
->rank
- oea
->rank
== 0)
524 && TREE_CODE (oea
->op
) == SSA_NAME
525 && TREE_CODE (oeb
->op
) == SSA_NAME
)
527 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
528 versions of removed SSA_NAMEs, so if possible, prefer to sort
529 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
531 if (!SSA_NAME_IS_DEFAULT_DEF (oea
->op
)
532 && !SSA_NAME_IS_DEFAULT_DEF (oeb
->op
)
533 && SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
535 gimple stmta
= SSA_NAME_DEF_STMT (oea
->op
);
536 gimple stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
537 basic_block bba
= gimple_bb (stmta
);
538 basic_block bbb
= gimple_bb (stmtb
);
541 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
542 return bb_rank
[bbb
->index
] - bb_rank
[bba
->index
];
546 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
547 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
553 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
554 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
556 return oeb
->id
- oea
->id
;
559 if (oeb
->rank
!= oea
->rank
)
560 return oeb
->rank
- oea
->rank
;
562 return oeb
->id
- oea
->id
;
565 /* Add an operand entry to *OPS for the tree operand OP. */
568 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
570 operand_entry_t oe
= operand_entry_pool
.allocate ();
573 oe
->rank
= get_rank (op
);
574 oe
->id
= next_operand_entry_id
++;
579 /* Add an operand entry to *OPS for the tree operand OP with repeat
583 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
584 HOST_WIDE_INT repeat
)
586 operand_entry_t oe
= operand_entry_pool
.allocate ();
589 oe
->rank
= get_rank (op
);
590 oe
->id
= next_operand_entry_id
++;
594 reassociate_stats
.pows_encountered
++;
597 /* Return true if STMT is reassociable operation containing a binary
598 operation with tree code CODE, and is inside LOOP. */
601 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
603 basic_block bb
= gimple_bb (stmt
);
605 if (gimple_bb (stmt
) == NULL
)
608 if (!flow_bb_inside_loop_p (loop
, bb
))
611 if (is_gimple_assign (stmt
)
612 && gimple_assign_rhs_code (stmt
) == code
613 && has_single_use (gimple_assign_lhs (stmt
)))
620 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
621 operand of the negate operation. Otherwise, return NULL. */
624 get_unary_op (tree name
, enum tree_code opcode
)
626 gimple stmt
= SSA_NAME_DEF_STMT (name
);
628 if (!is_gimple_assign (stmt
))
631 if (gimple_assign_rhs_code (stmt
) == opcode
)
632 return gimple_assign_rhs1 (stmt
);
636 /* If CURR and LAST are a pair of ops that OPCODE allows us to
637 eliminate through equivalences, do so, remove them from OPS, and
638 return true. Otherwise, return false. */
641 eliminate_duplicate_pair (enum tree_code opcode
,
642 vec
<operand_entry_t
> *ops
,
645 operand_entry_t curr
,
646 operand_entry_t last
)
649 /* If we have two of the same op, and the opcode is & |, min, or max,
650 we can eliminate one of them.
651 If we have two of the same op, and the opcode is ^, we can
652 eliminate both of them. */
654 if (last
&& last
->op
== curr
->op
)
662 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
664 fprintf (dump_file
, "Equivalence: ");
665 print_generic_expr (dump_file
, curr
->op
, 0);
666 fprintf (dump_file
, " [&|minmax] ");
667 print_generic_expr (dump_file
, last
->op
, 0);
668 fprintf (dump_file
, " -> ");
669 print_generic_stmt (dump_file
, last
->op
, 0);
672 ops
->ordered_remove (i
);
673 reassociate_stats
.ops_eliminated
++;
678 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
680 fprintf (dump_file
, "Equivalence: ");
681 print_generic_expr (dump_file
, curr
->op
, 0);
682 fprintf (dump_file
, " ^ ");
683 print_generic_expr (dump_file
, last
->op
, 0);
684 fprintf (dump_file
, " -> nothing\n");
687 reassociate_stats
.ops_eliminated
+= 2;
689 if (ops
->length () == 2)
692 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
697 ops
->ordered_remove (i
-1);
698 ops
->ordered_remove (i
-1);
710 static vec
<tree
> plus_negates
;
712 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
713 expression, look in OPS for a corresponding positive operation to cancel
714 it out. If we find one, remove the other from OPS, replace
715 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
719 eliminate_plus_minus_pair (enum tree_code opcode
,
720 vec
<operand_entry_t
> *ops
,
721 unsigned int currindex
,
722 operand_entry_t curr
)
729 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
732 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
733 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
734 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
737 /* Any non-negated version will have a rank that is one less than
738 the current rank. So once we hit those ranks, if we don't find
741 for (i
= currindex
+ 1;
742 ops
->iterate (i
, &oe
)
743 && oe
->rank
>= curr
->rank
- 1 ;
746 if (oe
->op
== negateop
)
749 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
751 fprintf (dump_file
, "Equivalence: ");
752 print_generic_expr (dump_file
, negateop
, 0);
753 fprintf (dump_file
, " + -");
754 print_generic_expr (dump_file
, oe
->op
, 0);
755 fprintf (dump_file
, " -> 0\n");
758 ops
->ordered_remove (i
);
759 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
760 ops
->ordered_remove (currindex
);
761 reassociate_stats
.ops_eliminated
++;
765 else if (oe
->op
== notop
)
767 tree op_type
= TREE_TYPE (oe
->op
);
769 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
771 fprintf (dump_file
, "Equivalence: ");
772 print_generic_expr (dump_file
, notop
, 0);
773 fprintf (dump_file
, " + ~");
774 print_generic_expr (dump_file
, oe
->op
, 0);
775 fprintf (dump_file
, " -> -1\n");
778 ops
->ordered_remove (i
);
779 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
780 ops
->ordered_remove (currindex
);
781 reassociate_stats
.ops_eliminated
++;
787 /* CURR->OP is a negate expr in a plus expr: save it for later
788 inspection in repropagate_negates(). */
789 if (negateop
!= NULL_TREE
)
790 plus_negates
.safe_push (curr
->op
);
795 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
796 bitwise not expression, look in OPS for a corresponding operand to
797 cancel it out. If we find one, remove the other from OPS, replace
798 OPS[CURRINDEX] with 0, and return true. Otherwise, return
802 eliminate_not_pairs (enum tree_code opcode
,
803 vec
<operand_entry_t
> *ops
,
804 unsigned int currindex
,
805 operand_entry_t curr
)
811 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
812 || TREE_CODE (curr
->op
) != SSA_NAME
)
815 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
816 if (notop
== NULL_TREE
)
819 /* Any non-not version will have a rank that is one less than
820 the current rank. So once we hit those ranks, if we don't find
823 for (i
= currindex
+ 1;
824 ops
->iterate (i
, &oe
)
825 && oe
->rank
>= curr
->rank
- 1;
830 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
832 fprintf (dump_file
, "Equivalence: ");
833 print_generic_expr (dump_file
, notop
, 0);
834 if (opcode
== BIT_AND_EXPR
)
835 fprintf (dump_file
, " & ~");
836 else if (opcode
== BIT_IOR_EXPR
)
837 fprintf (dump_file
, " | ~");
838 print_generic_expr (dump_file
, oe
->op
, 0);
839 if (opcode
== BIT_AND_EXPR
)
840 fprintf (dump_file
, " -> 0\n");
841 else if (opcode
== BIT_IOR_EXPR
)
842 fprintf (dump_file
, " -> -1\n");
845 if (opcode
== BIT_AND_EXPR
)
846 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
847 else if (opcode
== BIT_IOR_EXPR
)
848 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
850 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
852 ops
->quick_push (oe
);
860 /* Use constant value that may be present in OPS to try to eliminate
861 operands. Note that this function is only really used when we've
862 eliminated ops for other reasons, or merged constants. Across
863 single statements, fold already does all of this, plus more. There
864 is little point in duplicating logic, so I've only included the
865 identities that I could ever construct testcases to trigger. */
868 eliminate_using_constants (enum tree_code opcode
,
869 vec
<operand_entry_t
> *ops
)
871 operand_entry_t oelast
= ops
->last ();
872 tree type
= TREE_TYPE (oelast
->op
);
874 if (oelast
->rank
== 0
875 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
880 if (integer_zerop (oelast
->op
))
882 if (ops
->length () != 1)
884 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
885 fprintf (dump_file
, "Found & 0, removing all other ops\n");
887 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
890 ops
->quick_push (oelast
);
894 else if (integer_all_onesp (oelast
->op
))
896 if (ops
->length () != 1)
898 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
899 fprintf (dump_file
, "Found & -1, removing\n");
901 reassociate_stats
.ops_eliminated
++;
906 if (integer_all_onesp (oelast
->op
))
908 if (ops
->length () != 1)
910 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
911 fprintf (dump_file
, "Found | -1, removing all other ops\n");
913 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
916 ops
->quick_push (oelast
);
920 else if (integer_zerop (oelast
->op
))
922 if (ops
->length () != 1)
924 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
925 fprintf (dump_file
, "Found | 0, removing\n");
927 reassociate_stats
.ops_eliminated
++;
932 if (integer_zerop (oelast
->op
)
933 || (FLOAT_TYPE_P (type
)
934 && !HONOR_NANS (type
)
935 && !HONOR_SIGNED_ZEROS (type
)
936 && real_zerop (oelast
->op
)))
938 if (ops
->length () != 1)
940 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
941 fprintf (dump_file
, "Found * 0, removing all other ops\n");
943 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
945 ops
->quick_push (oelast
);
949 else if (integer_onep (oelast
->op
)
950 || (FLOAT_TYPE_P (type
)
951 && !HONOR_SNANS (type
)
952 && real_onep (oelast
->op
)))
954 if (ops
->length () != 1)
956 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
957 fprintf (dump_file
, "Found * 1, removing\n");
959 reassociate_stats
.ops_eliminated
++;
967 if (integer_zerop (oelast
->op
)
968 || (FLOAT_TYPE_P (type
)
969 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
970 && fold_real_zero_addition_p (type
, oelast
->op
,
971 opcode
== MINUS_EXPR
)))
973 if (ops
->length () != 1)
975 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
976 fprintf (dump_file
, "Found [|^+] 0, removing\n");
978 reassociate_stats
.ops_eliminated
++;
990 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
993 /* Structure for tracking and counting operands. */
997 enum tree_code oecode
;
1002 /* The heap for the oecount hashtable and the sorted list of operands. */
1003 static vec
<oecount
> cvec
;
1006 /* Oecount hashtable helpers. */
1008 struct oecount_hasher
: int_hash
<int, 0, 1>
1010 static inline hashval_t
hash (int);
1011 static inline bool equal (int, int);
1014 /* Hash function for oecount. */
1017 oecount_hasher::hash (int p
)
1019 const oecount
*c
= &cvec
[p
- 42];
1020 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1023 /* Comparison function for oecount. */
1026 oecount_hasher::equal (int p1
, int p2
)
1028 const oecount
*c1
= &cvec
[p1
- 42];
1029 const oecount
*c2
= &cvec
[p2
- 42];
1030 return (c1
->oecode
== c2
->oecode
1031 && c1
->op
== c2
->op
);
1034 /* Comparison function for qsort sorting oecount elements by count. */
1037 oecount_cmp (const void *p1
, const void *p2
)
1039 const oecount
*c1
= (const oecount
*)p1
;
1040 const oecount
*c2
= (const oecount
*)p2
;
1041 if (c1
->cnt
!= c2
->cnt
)
1042 return c1
->cnt
- c2
->cnt
;
1044 /* If counts are identical, use unique IDs to stabilize qsort. */
1045 return c1
->id
- c2
->id
;
1048 /* Return TRUE iff STMT represents a builtin call that raises OP
1049 to some exponent. */
1052 stmt_is_power_of_op (gimple stmt
, tree op
)
1056 if (!is_gimple_call (stmt
))
1059 fndecl
= gimple_call_fndecl (stmt
);
1062 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1065 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1067 CASE_FLT_FN (BUILT_IN_POW
):
1068 CASE_FLT_FN (BUILT_IN_POWI
):
1069 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1076 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1077 in place and return the result. Assumes that stmt_is_power_of_op
1078 was previously called for STMT and returned TRUE. */
1080 static HOST_WIDE_INT
1081 decrement_power (gimple stmt
)
1083 REAL_VALUE_TYPE c
, cint
;
1084 HOST_WIDE_INT power
;
1087 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1089 CASE_FLT_FN (BUILT_IN_POW
):
1090 arg1
= gimple_call_arg (stmt
, 1);
1091 c
= TREE_REAL_CST (arg1
);
1092 power
= real_to_integer (&c
) - 1;
1093 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1094 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1097 CASE_FLT_FN (BUILT_IN_POWI
):
1098 arg1
= gimple_call_arg (stmt
, 1);
1099 power
= TREE_INT_CST_LOW (arg1
) - 1;
1100 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1108 /* Find the single immediate use of STMT's LHS, and replace it
1109 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1110 replace *DEF with OP as well. */
1113 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1118 gimple_stmt_iterator gsi
;
1120 if (is_gimple_call (stmt
))
1121 lhs
= gimple_call_lhs (stmt
);
1123 lhs
= gimple_assign_lhs (stmt
);
1125 gcc_assert (has_single_use (lhs
));
1126 single_imm_use (lhs
, &use
, &use_stmt
);
1130 if (TREE_CODE (op
) != SSA_NAME
)
1131 update_stmt (use_stmt
);
1132 gsi
= gsi_for_stmt (stmt
);
1133 unlink_stmt_vdef (stmt
);
1134 reassoc_remove_stmt (&gsi
);
1135 release_defs (stmt
);
1138 /* Walks the linear chain with result *DEF searching for an operation
1139 with operand OP and code OPCODE removing that from the chain. *DEF
1140 is updated if there is only one operand but no operation left. */
1143 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1145 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1151 if (opcode
== MULT_EXPR
1152 && stmt_is_power_of_op (stmt
, op
))
1154 if (decrement_power (stmt
) == 1)
1155 propagate_op_to_single_use (op
, stmt
, def
);
1159 name
= gimple_assign_rhs1 (stmt
);
1161 /* If this is the operation we look for and one of the operands
1162 is ours simply propagate the other operand into the stmts
1164 if (gimple_assign_rhs_code (stmt
) == opcode
1166 || gimple_assign_rhs2 (stmt
) == op
))
1169 name
= gimple_assign_rhs2 (stmt
);
1170 propagate_op_to_single_use (name
, stmt
, def
);
1174 /* We might have a multiply of two __builtin_pow* calls, and
1175 the operand might be hiding in the rightmost one. */
1176 if (opcode
== MULT_EXPR
1177 && gimple_assign_rhs_code (stmt
) == opcode
1178 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1179 && has_single_use (gimple_assign_rhs2 (stmt
)))
1181 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1182 if (stmt_is_power_of_op (stmt2
, op
))
1184 if (decrement_power (stmt2
) == 1)
1185 propagate_op_to_single_use (op
, stmt2
, def
);
1190 /* Continue walking the chain. */
1191 gcc_assert (name
!= op
1192 && TREE_CODE (name
) == SSA_NAME
);
1193 stmt
= SSA_NAME_DEF_STMT (name
);
1198 /* Returns true if statement S1 dominates statement S2. Like
1199 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1202 reassoc_stmt_dominates_stmt_p (gimple s1
, gimple s2
)
1204 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1206 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1207 SSA_NAME. Assume it lives at the beginning of function and
1208 thus dominates everything. */
1209 if (!bb1
|| s1
== s2
)
1212 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1218 /* PHIs in the same basic block are assumed to be
1219 executed all in parallel, if only one stmt is a PHI,
1220 it dominates the other stmt in the same basic block. */
1221 if (gimple_code (s1
) == GIMPLE_PHI
)
1224 if (gimple_code (s2
) == GIMPLE_PHI
)
1227 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1229 if (gimple_uid (s1
) < gimple_uid (s2
))
1232 if (gimple_uid (s1
) > gimple_uid (s2
))
1235 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1236 unsigned int uid
= gimple_uid (s1
);
1237 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1239 gimple s
= gsi_stmt (gsi
);
1240 if (gimple_uid (s
) != uid
)
1249 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1252 /* Insert STMT after INSERT_POINT. */
1255 insert_stmt_after (gimple stmt
, gimple insert_point
)
1257 gimple_stmt_iterator gsi
;
1260 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1261 bb
= gimple_bb (insert_point
);
1262 else if (!stmt_ends_bb_p (insert_point
))
1264 gsi
= gsi_for_stmt (insert_point
);
1265 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1266 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1270 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1271 thus if it must end a basic block, it should be a call that can
1272 throw, or some assignment that can throw. If it throws, the LHS
1273 of it will not be initialized though, so only valid places using
1274 the SSA_NAME should be dominated by the fallthru edge. */
1275 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1276 gsi
= gsi_after_labels (bb
);
1277 if (gsi_end_p (gsi
))
1279 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1280 gimple_set_uid (stmt
,
1281 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1284 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1285 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1288 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1289 the result. Places the statement after the definition of either
1290 OP1 or OP2. Returns the new statement. */
1293 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1295 gimple op1def
= NULL
, op2def
= NULL
;
1296 gimple_stmt_iterator gsi
;
1300 /* Create the addition statement. */
1301 op
= make_ssa_name (type
);
1302 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1304 /* Find an insertion place and insert. */
1305 if (TREE_CODE (op1
) == SSA_NAME
)
1306 op1def
= SSA_NAME_DEF_STMT (op1
);
1307 if (TREE_CODE (op2
) == SSA_NAME
)
1308 op2def
= SSA_NAME_DEF_STMT (op2
);
1309 if ((!op1def
|| gimple_nop_p (op1def
))
1310 && (!op2def
|| gimple_nop_p (op2def
)))
1312 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1313 if (gsi_end_p (gsi
))
1315 gimple_stmt_iterator gsi2
1316 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1317 gimple_set_uid (sum
,
1318 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1321 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1322 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1326 gimple insert_point
;
1327 if ((!op1def
|| gimple_nop_p (op1def
))
1328 || (op2def
&& !gimple_nop_p (op2def
)
1329 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1330 insert_point
= op2def
;
1332 insert_point
= op1def
;
1333 insert_stmt_after (sum
, insert_point
);
1340 /* Perform un-distribution of divisions and multiplications.
1341 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1342 to (A + B) / X for real X.
1344 The algorithm is organized as follows.
1346 - First we walk the addition chain *OPS looking for summands that
1347 are defined by a multiplication or a real division. This results
1348 in the candidates bitmap with relevant indices into *OPS.
1350 - Second we build the chains of multiplications or divisions for
1351 these candidates, counting the number of occurrences of (operand, code)
1352 pairs in all of the candidates chains.
1354 - Third we sort the (operand, code) pairs by number of occurrence and
1355 process them starting with the pair with the most uses.
1357 * For each such pair we walk the candidates again to build a
1358 second candidate bitmap noting all multiplication/division chains
1359 that have at least one occurrence of (operand, code).
1361 * We build an alternate addition chain only covering these
1362 candidates with one (operand, code) operation removed from their
1363 multiplication/division chain.
1365 * The first candidate gets replaced by the alternate addition chain
1366 multiplied/divided by the operand.
1368 * All candidate chains get disabled for further processing and
1369 processing of (operand, code) pairs continues.
1371 The alternate addition chains built are re-processed by the main
1372 reassociation algorithm which allows optimizing a * x * y + b * y * x
1373 to (a + b ) * x * y in one invocation of the reassociation pass. */
1376 undistribute_ops_list (enum tree_code opcode
,
1377 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1379 unsigned int length
= ops
->length ();
1380 operand_entry_t oe1
;
1382 sbitmap candidates
, candidates2
;
1383 unsigned nr_candidates
, nr_candidates2
;
1384 sbitmap_iterator sbi0
;
1385 vec
<operand_entry_t
> *subops
;
1386 bool changed
= false;
1387 int next_oecount_id
= 0;
1390 || opcode
!= PLUS_EXPR
)
1393 /* Build a list of candidates to process. */
1394 candidates
= sbitmap_alloc (length
);
1395 bitmap_clear (candidates
);
1397 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1399 enum tree_code dcode
;
1402 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1404 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1405 if (!is_gimple_assign (oe1def
))
1407 dcode
= gimple_assign_rhs_code (oe1def
);
1408 if ((dcode
!= MULT_EXPR
1409 && dcode
!= RDIV_EXPR
)
1410 || !is_reassociable_op (oe1def
, dcode
, loop
))
1413 bitmap_set_bit (candidates
, i
);
1417 if (nr_candidates
< 2)
1419 sbitmap_free (candidates
);
1423 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1425 fprintf (dump_file
, "searching for un-distribute opportunities ");
1426 print_generic_expr (dump_file
,
1427 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1428 fprintf (dump_file
, " %d\n", nr_candidates
);
1431 /* Build linearized sub-operand lists and the counting table. */
1434 hash_table
<oecount_hasher
> ctable (15);
1436 /* ??? Macro arguments cannot have multi-argument template types in
1437 them. This typedef is needed to workaround that limitation. */
1438 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1439 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1440 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1443 enum tree_code oecode
;
1446 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1447 oecode
= gimple_assign_rhs_code (oedef
);
1448 linearize_expr_tree (&subops
[i
], oedef
,
1449 associative_tree_code (oecode
), false);
1451 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1458 c
.id
= next_oecount_id
++;
1461 idx
= cvec
.length () + 41;
1462 slot
= ctable
.find_slot (idx
, INSERT
);
1470 cvec
[*slot
- 42].cnt
++;
1475 /* Sort the counting table. */
1476 cvec
.qsort (oecount_cmp
);
1478 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1481 fprintf (dump_file
, "Candidates:\n");
1482 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1484 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1485 c
->oecode
== MULT_EXPR
1486 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1487 print_generic_expr (dump_file
, c
->op
, 0);
1488 fprintf (dump_file
, "\n");
1492 /* Process the (operand, code) pairs in order of most occurrence. */
1493 candidates2
= sbitmap_alloc (length
);
1494 while (!cvec
.is_empty ())
1496 oecount
*c
= &cvec
.last ();
1500 /* Now collect the operands in the outer chain that contain
1501 the common operand in their inner chain. */
1502 bitmap_clear (candidates2
);
1504 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1507 enum tree_code oecode
;
1509 tree op
= (*ops
)[i
]->op
;
1511 /* If we undistributed in this chain already this may be
1513 if (TREE_CODE (op
) != SSA_NAME
)
1516 oedef
= SSA_NAME_DEF_STMT (op
);
1517 oecode
= gimple_assign_rhs_code (oedef
);
1518 if (oecode
!= c
->oecode
)
1521 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1523 if (oe1
->op
== c
->op
)
1525 bitmap_set_bit (candidates2
, i
);
1532 if (nr_candidates2
>= 2)
1534 operand_entry_t oe1
, oe2
;
1536 int first
= bitmap_first_set_bit (candidates2
);
1538 /* Build the new addition chain. */
1539 oe1
= (*ops
)[first
];
1540 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1542 fprintf (dump_file
, "Building (");
1543 print_generic_expr (dump_file
, oe1
->op
, 0);
1545 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1546 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1550 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1552 fprintf (dump_file
, " + ");
1553 print_generic_expr (dump_file
, oe2
->op
, 0);
1555 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1556 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1557 oe1
->op
, oe2
->op
, opcode
);
1558 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1560 oe1
->op
= gimple_get_lhs (sum
);
1563 /* Apply the multiplication/division. */
1564 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1565 oe1
->op
, c
->op
, c
->oecode
);
1566 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1568 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1569 print_generic_expr (dump_file
, c
->op
, 0);
1570 fprintf (dump_file
, "\n");
1573 /* Record it in the addition chain and disable further
1574 undistribution with this op. */
1575 oe1
->op
= gimple_assign_lhs (prod
);
1576 oe1
->rank
= get_rank (oe1
->op
);
1577 subops
[first
].release ();
1585 for (i
= 0; i
< ops
->length (); ++i
)
1586 subops
[i
].release ();
1589 sbitmap_free (candidates
);
1590 sbitmap_free (candidates2
);
1595 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1596 expression, examine the other OPS to see if any of them are comparisons
1597 of the same values, which we may be able to combine or eliminate.
1598 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1601 eliminate_redundant_comparison (enum tree_code opcode
,
1602 vec
<operand_entry_t
> *ops
,
1603 unsigned int currindex
,
1604 operand_entry_t curr
)
1607 enum tree_code lcode
, rcode
;
1612 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1615 /* Check that CURR is a comparison. */
1616 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1618 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1619 if (!is_gimple_assign (def1
))
1621 lcode
= gimple_assign_rhs_code (def1
);
1622 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1624 op1
= gimple_assign_rhs1 (def1
);
1625 op2
= gimple_assign_rhs2 (def1
);
1627 /* Now look for a similar comparison in the remaining OPS. */
1628 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1632 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1634 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1635 if (!is_gimple_assign (def2
))
1637 rcode
= gimple_assign_rhs_code (def2
);
1638 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1641 /* If we got here, we have a match. See if we can combine the
1643 if (opcode
== BIT_IOR_EXPR
)
1644 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1645 rcode
, gimple_assign_rhs1 (def2
),
1646 gimple_assign_rhs2 (def2
));
1648 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1649 rcode
, gimple_assign_rhs1 (def2
),
1650 gimple_assign_rhs2 (def2
));
1654 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1655 always give us a boolean_type_node value back. If the original
1656 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1657 we need to convert. */
1658 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1659 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1661 if (TREE_CODE (t
) != INTEGER_CST
1662 && !operand_equal_p (t
, curr
->op
, 0))
1664 enum tree_code subcode
;
1665 tree newop1
, newop2
;
1666 if (!COMPARISON_CLASS_P (t
))
1668 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1669 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1670 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1671 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1675 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1677 fprintf (dump_file
, "Equivalence: ");
1678 print_generic_expr (dump_file
, curr
->op
, 0);
1679 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1680 print_generic_expr (dump_file
, oe
->op
, 0);
1681 fprintf (dump_file
, " -> ");
1682 print_generic_expr (dump_file
, t
, 0);
1683 fprintf (dump_file
, "\n");
1686 /* Now we can delete oe, as it has been subsumed by the new combined
1688 ops
->ordered_remove (i
);
1689 reassociate_stats
.ops_eliminated
++;
1691 /* If t is the same as curr->op, we're done. Otherwise we must
1692 replace curr->op with t. Special case is if we got a constant
1693 back, in which case we add it to the end instead of in place of
1694 the current entry. */
1695 if (TREE_CODE (t
) == INTEGER_CST
)
1697 ops
->ordered_remove (currindex
);
1698 add_to_ops_vec (ops
, t
);
1700 else if (!operand_equal_p (t
, curr
->op
, 0))
1703 enum tree_code subcode
;
1706 gcc_assert (COMPARISON_CLASS_P (t
));
1707 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1708 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1709 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1710 gcc_checking_assert (is_gimple_val (newop1
)
1711 && is_gimple_val (newop2
));
1712 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1713 curr
->op
= gimple_get_lhs (sum
);
1721 /* Perform various identities and other optimizations on the list of
1722 operand entries, stored in OPS. The tree code for the binary
1723 operation between all the operands is OPCODE. */
1726 optimize_ops_list (enum tree_code opcode
,
1727 vec
<operand_entry_t
> *ops
)
1729 unsigned int length
= ops
->length ();
1732 operand_entry_t oelast
= NULL
;
1733 bool iterate
= false;
1738 oelast
= ops
->last ();
1740 /* If the last two are constants, pop the constants off, merge them
1741 and try the next two. */
1742 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1744 operand_entry_t oelm1
= (*ops
)[length
- 2];
1746 if (oelm1
->rank
== 0
1747 && is_gimple_min_invariant (oelm1
->op
)
1748 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1749 TREE_TYPE (oelast
->op
)))
1751 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1752 oelm1
->op
, oelast
->op
);
1754 if (folded
&& is_gimple_min_invariant (folded
))
1756 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1757 fprintf (dump_file
, "Merging constants\n");
1762 add_to_ops_vec (ops
, folded
);
1763 reassociate_stats
.constants_eliminated
++;
1765 optimize_ops_list (opcode
, ops
);
1771 eliminate_using_constants (opcode
, ops
);
1774 for (i
= 0; ops
->iterate (i
, &oe
);)
1778 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1780 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1781 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1782 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1794 length
= ops
->length ();
1795 oelast
= ops
->last ();
1798 optimize_ops_list (opcode
, ops
);
1801 /* The following functions are subroutines to optimize_range_tests and allow
1802 it to try to change a logical combination of comparisons into a range
1806 X == 2 || X == 5 || X == 3 || X == 4
1810 (unsigned) (X - 2) <= 3
1812 For more information see comments above fold_test_range in fold-const.c,
1813 this implementation is for GIMPLE. */
1821 bool strict_overflow_p
;
1822 unsigned int idx
, next
;
1825 /* This is similar to make_range in fold-const.c, but on top of
1826 GIMPLE instead of trees. If EXP is non-NULL, it should be
1827 an SSA_NAME and STMT argument is ignored, otherwise STMT
1828 argument should be a GIMPLE_COND. */
1831 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1835 bool is_bool
, strict_overflow_p
;
1839 r
->strict_overflow_p
= false;
1841 r
->high
= NULL_TREE
;
1842 if (exp
!= NULL_TREE
1843 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1846 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1847 and see if we can refine the range. Some of the cases below may not
1848 happen, but it doesn't seem worth worrying about this. We "continue"
1849 the outer loop when we've changed something; otherwise we "break"
1850 the switch, which will "break" the while. */
1851 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1854 strict_overflow_p
= false;
1856 if (exp
== NULL_TREE
)
1858 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1860 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1865 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1870 enum tree_code code
;
1871 tree arg0
, arg1
, exp_type
;
1875 if (exp
!= NULL_TREE
)
1877 if (TREE_CODE (exp
) != SSA_NAME
1878 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
1881 stmt
= SSA_NAME_DEF_STMT (exp
);
1882 if (!is_gimple_assign (stmt
))
1885 code
= gimple_assign_rhs_code (stmt
);
1886 arg0
= gimple_assign_rhs1 (stmt
);
1887 arg1
= gimple_assign_rhs2 (stmt
);
1888 exp_type
= TREE_TYPE (exp
);
1892 code
= gimple_cond_code (stmt
);
1893 arg0
= gimple_cond_lhs (stmt
);
1894 arg1
= gimple_cond_rhs (stmt
);
1895 exp_type
= boolean_type_node
;
1898 if (TREE_CODE (arg0
) != SSA_NAME
)
1900 loc
= gimple_location (stmt
);
1904 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1905 /* Ensure the range is either +[-,0], +[0,0],
1906 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1907 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1908 or similar expression of unconditional true or
1909 false, it should not be negated. */
1910 && ((high
&& integer_zerop (high
))
1911 || (low
&& integer_onep (low
))))
1924 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1926 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1931 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1946 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1948 &strict_overflow_p
);
1949 if (nexp
!= NULL_TREE
)
1952 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
1965 r
->strict_overflow_p
= strict_overflow_p
;
1969 /* Comparison function for qsort. Sort entries
1970 without SSA_NAME exp first, then with SSA_NAMEs sorted
1971 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1972 by increasing ->low and if ->low is the same, by increasing
1973 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1977 range_entry_cmp (const void *a
, const void *b
)
1979 const struct range_entry
*p
= (const struct range_entry
*) a
;
1980 const struct range_entry
*q
= (const struct range_entry
*) b
;
1982 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
1984 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1986 /* Group range_entries for the same SSA_NAME together. */
1987 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
1989 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
1991 /* If ->low is different, NULL low goes first, then by
1993 if (p
->low
!= NULL_TREE
)
1995 if (q
->low
!= NULL_TREE
)
1997 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1999 if (tem
&& integer_onep (tem
))
2001 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2003 if (tem
&& integer_onep (tem
))
2009 else if (q
->low
!= NULL_TREE
)
2011 /* If ->high is different, NULL high goes last, before that by
2013 if (p
->high
!= NULL_TREE
)
2015 if (q
->high
!= NULL_TREE
)
2017 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2019 if (tem
&& integer_onep (tem
))
2021 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2023 if (tem
&& integer_onep (tem
))
2029 else if (q
->high
!= NULL_TREE
)
2031 /* If both ranges are the same, sort below by ascending idx. */
2036 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2039 if (p
->idx
< q
->idx
)
2043 gcc_checking_assert (p
->idx
> q
->idx
);
2048 /* Helper routine of optimize_range_test.
2049 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2050 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2051 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2052 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2053 an array of COUNT pointers to other ranges. Return
2054 true if the range merge has been successful.
2055 If OPCODE is ERROR_MARK, this is called from within
2056 maybe_optimize_range_tests and is performing inter-bb range optimization.
2057 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2061 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2062 struct range_entry
**otherrangep
,
2063 unsigned int count
, enum tree_code opcode
,
2064 vec
<operand_entry_t
> *ops
, tree exp
, gimple_seq seq
,
2065 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2067 operand_entry_t oe
= (*ops
)[range
->idx
];
2069 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) :
2070 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2071 location_t loc
= gimple_location (stmt
);
2072 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2073 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2075 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2076 gimple_stmt_iterator gsi
;
2079 if (tem
== NULL_TREE
)
2082 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2083 warning_at (loc
, OPT_Wstrict_overflow
,
2084 "assuming signed overflow does not occur "
2085 "when simplifying range test");
2087 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2089 struct range_entry
*r
;
2090 fprintf (dump_file
, "Optimizing range tests ");
2091 print_generic_expr (dump_file
, range
->exp
, 0);
2092 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2093 print_generic_expr (dump_file
, range
->low
, 0);
2094 fprintf (dump_file
, ", ");
2095 print_generic_expr (dump_file
, range
->high
, 0);
2096 fprintf (dump_file
, "]");
2097 for (i
= 0; i
< count
; i
++)
2103 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2104 print_generic_expr (dump_file
, r
->low
, 0);
2105 fprintf (dump_file
, ", ");
2106 print_generic_expr (dump_file
, r
->high
, 0);
2107 fprintf (dump_file
, "]");
2109 fprintf (dump_file
, "\n into ");
2110 print_generic_expr (dump_file
, tem
, 0);
2111 fprintf (dump_file
, "\n");
2114 if (opcode
== BIT_IOR_EXPR
2115 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2116 tem
= invert_truthvalue_loc (loc
, tem
);
2118 tem
= fold_convert_loc (loc
, optype
, tem
);
2119 gsi
= gsi_for_stmt (stmt
);
2120 unsigned int uid
= gimple_uid (stmt
);
2121 /* In rare cases range->exp can be equal to lhs of stmt.
2122 In that case we have to insert after the stmt rather then before
2123 it. If stmt is a PHI, insert it at the start of the basic block. */
2124 if (op
!= range
->exp
)
2126 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2127 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2131 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2133 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2134 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2135 GSI_CONTINUE_LINKING
);
2139 gsi
= gsi_after_labels (gimple_bb (stmt
));
2140 if (!gsi_end_p (gsi
))
2141 uid
= gimple_uid (gsi_stmt (gsi
));
2144 gsi
= gsi_start_bb (gimple_bb (stmt
));
2146 while (!gsi_end_p (gsi
))
2148 uid
= gimple_uid (gsi_stmt (gsi
));
2152 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2153 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2155 if (gsi_end_p (gsi
))
2156 gsi
= gsi_last_bb (gimple_bb (stmt
));
2160 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2161 if (gimple_uid (gsi_stmt (gsi
)))
2164 gimple_set_uid (gsi_stmt (gsi
), uid
);
2171 range
->strict_overflow_p
= false;
2173 for (i
= 0; i
< count
; i
++)
2176 range
= otherrange
+ i
;
2178 range
= otherrangep
[i
];
2179 oe
= (*ops
)[range
->idx
];
2180 /* Now change all the other range test immediate uses, so that
2181 those tests will be optimized away. */
2182 if (opcode
== ERROR_MARK
)
2185 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2186 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2188 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2189 ? boolean_false_node
: boolean_true_node
);
2192 oe
->op
= error_mark_node
;
2193 range
->exp
= NULL_TREE
;
2198 /* Optimize X == CST1 || X == CST2
2199 if popcount (CST1 ^ CST2) == 1 into
2200 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2201 Similarly for ranges. E.g.
2202 X != 2 && X != 3 && X != 10 && X != 11
2203 will be transformed by the previous optimization into
2204 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2205 and this loop can transform that into
2206 !(((X & ~8) - 2U) <= 1U). */
2209 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2210 tree lowi
, tree lowj
, tree highi
, tree highj
,
2211 vec
<operand_entry_t
> *ops
,
2212 struct range_entry
*rangei
,
2213 struct range_entry
*rangej
)
2215 tree lowxor
, highxor
, tem
, exp
;
2216 /* Check lowi ^ lowj == highi ^ highj and
2217 popcount (lowi ^ lowj) == 1. */
2218 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2219 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2221 if (!integer_pow2p (lowxor
))
2223 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2224 if (!tree_int_cst_equal (lowxor
, highxor
))
2227 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2228 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2229 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2230 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2231 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2232 NULL
, rangei
->in_p
, lowj
, highj
,
2233 rangei
->strict_overflow_p
2234 || rangej
->strict_overflow_p
))
2239 /* Optimize X == CST1 || X == CST2
2240 if popcount (CST2 - CST1) == 1 into
2241 ((X - CST1) & ~(CST2 - CST1)) == 0.
2242 Similarly for ranges. E.g.
2243 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2244 || X == 75 || X == 45
2245 will be transformed by the previous optimization into
2246 (X - 43U) <= 3U || (X - 75U) <= 3U
2247 and this loop can transform that into
2248 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2250 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2251 tree lowi
, tree lowj
, tree highi
, tree highj
,
2252 vec
<operand_entry_t
> *ops
,
2253 struct range_entry
*rangei
,
2254 struct range_entry
*rangej
)
2256 tree tem1
, tem2
, mask
;
2257 /* Check highi - lowi == highj - lowj. */
2258 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2259 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2261 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2262 if (!tree_int_cst_equal (tem1
, tem2
))
2264 /* Check popcount (lowj - lowi) == 1. */
2265 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2266 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2268 if (!integer_pow2p (tem1
))
2271 type
= unsigned_type_for (type
);
2272 tem1
= fold_convert (type
, tem1
);
2273 tem2
= fold_convert (type
, tem2
);
2274 lowi
= fold_convert (type
, lowi
);
2275 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2276 tem1
= fold_binary (MINUS_EXPR
, type
,
2277 fold_convert (type
, rangei
->exp
), lowi
);
2278 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2279 lowj
= build_int_cst (type
, 0);
2280 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2281 NULL
, rangei
->in_p
, lowj
, tem2
,
2282 rangei
->strict_overflow_p
2283 || rangej
->strict_overflow_p
))
2288 /* It does some common checks for function optimize_range_tests_xor and
2289 optimize_range_tests_diff.
2290 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2291 Else it calls optimize_range_tests_diff. */
2294 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2295 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2296 struct range_entry
*ranges
)
2299 bool any_changes
= false;
2300 for (i
= first
; i
< length
; i
++)
2302 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2304 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2306 type
= TREE_TYPE (ranges
[i
].exp
);
2307 if (!INTEGRAL_TYPE_P (type
))
2309 lowi
= ranges
[i
].low
;
2310 if (lowi
== NULL_TREE
)
2311 lowi
= TYPE_MIN_VALUE (type
);
2312 highi
= ranges
[i
].high
;
2313 if (highi
== NULL_TREE
)
2315 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2318 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2320 lowj
= ranges
[j
].low
;
2321 if (lowj
== NULL_TREE
)
2323 highj
= ranges
[j
].high
;
2324 if (highj
== NULL_TREE
)
2325 highj
= TYPE_MAX_VALUE (type
);
2326 /* Check lowj > highi. */
2327 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2329 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2332 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2334 ranges
+ i
, ranges
+ j
);
2336 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2338 ranges
+ i
, ranges
+ j
);
2349 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2350 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2351 EXP on success, NULL otherwise. */
2354 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2355 wide_int
*mask
, tree
*totallowp
)
2357 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2358 if (tem
== NULL_TREE
2359 || TREE_CODE (tem
) != INTEGER_CST
2360 || TREE_OVERFLOW (tem
)
2361 || tree_int_cst_sgn (tem
) == -1
2362 || compare_tree_int (tem
, prec
) != -1)
2365 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2366 *mask
= wi::shifted_mask (0, max
, false, prec
);
2367 if (TREE_CODE (exp
) == BIT_AND_EXPR
2368 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2370 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2371 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2372 if (wi::popcount (msk
) == 1
2373 && wi::ltu_p (msk
, prec
- max
))
2375 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2376 max
+= msk
.to_uhwi ();
2377 exp
= TREE_OPERAND (exp
, 0);
2378 if (integer_zerop (low
)
2379 && TREE_CODE (exp
) == PLUS_EXPR
2380 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2382 tree ret
= TREE_OPERAND (exp
, 0);
2385 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2386 TYPE_PRECISION (TREE_TYPE (low
))));
2387 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
2393 else if (!tree_int_cst_lt (totallow
, tbias
))
2395 bias
= wi::to_widest (tbias
);
2396 bias
-= wi::to_widest (totallow
);
2397 if (wi::ges_p (bias
, 0) && wi::lts_p (bias
, prec
- max
))
2399 *mask
= wi::lshift (*mask
, bias
);
2407 if (!tree_int_cst_lt (totallow
, low
))
2409 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2410 if (tem
== NULL_TREE
2411 || TREE_CODE (tem
) != INTEGER_CST
2412 || TREE_OVERFLOW (tem
)
2413 || compare_tree_int (tem
, prec
- max
) == 1)
2416 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2420 /* Attempt to optimize small range tests using bit test.
2422 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2423 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2424 has been by earlier optimizations optimized into:
2425 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2426 As all the 43 through 82 range is less than 64 numbers,
2427 for 64-bit word targets optimize that into:
2428 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2431 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2432 vec
<operand_entry_t
> *ops
,
2433 struct range_entry
*ranges
)
2436 bool any_changes
= false;
2437 int prec
= GET_MODE_BITSIZE (word_mode
);
2438 auto_vec
<struct range_entry
*, 64> candidates
;
2440 for (i
= first
; i
< length
- 2; i
++)
2442 tree lowi
, highi
, lowj
, highj
, type
;
2444 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2446 type
= TREE_TYPE (ranges
[i
].exp
);
2447 if (!INTEGRAL_TYPE_P (type
))
2449 lowi
= ranges
[i
].low
;
2450 if (lowi
== NULL_TREE
)
2451 lowi
= TYPE_MIN_VALUE (type
);
2452 highi
= ranges
[i
].high
;
2453 if (highi
== NULL_TREE
)
2456 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2457 highi
, &mask
, &lowi
);
2458 if (exp
== NULL_TREE
)
2460 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2461 candidates
.truncate (0);
2462 int end
= MIN (i
+ 64, length
);
2463 for (j
= i
+ 1; j
< end
; j
++)
2466 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2468 if (ranges
[j
].exp
== exp
)
2470 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2472 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2475 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2477 exp2
= TREE_OPERAND (exp2
, 0);
2487 lowj
= ranges
[j
].low
;
2488 if (lowj
== NULL_TREE
)
2490 highj
= ranges
[j
].high
;
2491 if (highj
== NULL_TREE
)
2492 highj
= TYPE_MAX_VALUE (type
);
2494 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2495 highj
, &mask2
, NULL
);
2499 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2500 candidates
.safe_push (&ranges
[j
]);
2503 /* If we need otherwise 3 or more comparisons, use a bit test. */
2504 if (candidates
.length () >= 2)
2506 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2507 wi::to_widest (lowi
)
2508 + prec
- 1 - wi::clz (mask
));
2509 operand_entry_t oe
= (*ops
)[ranges
[i
].idx
];
2511 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
)
2512 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2513 location_t loc
= gimple_location (stmt
);
2514 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2516 /* See if it isn't cheaper to pretend the minimum value of the
2517 range is 0, if maximum value is small enough.
2518 We can avoid then subtraction of the minimum value, but the
2519 mask constant could be perhaps more expensive. */
2520 if (compare_tree_int (lowi
, 0) > 0
2521 && compare_tree_int (high
, prec
) < 0)
2524 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2525 rtx reg
= gen_raw_REG (word_mode
, 10000);
2526 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2527 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2528 GEN_INT (-m
)), speed_p
);
2529 rtx r
= immed_wide_int_const (mask
, word_mode
);
2530 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2531 word_mode
, speed_p
);
2532 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2533 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2534 word_mode
, speed_p
);
2537 mask
= wi::lshift (mask
, m
);
2538 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2542 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2544 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2546 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2547 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2548 fold_convert_loc (loc
, etype
, exp
),
2549 fold_convert_loc (loc
, etype
, lowi
));
2550 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2551 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2552 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2553 build_int_cst (word_type
, 1), exp
);
2554 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2555 wide_int_to_tree (word_type
, mask
));
2556 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2557 build_zero_cst (word_type
));
2558 if (is_gimple_val (exp
))
2561 /* The shift might have undefined behavior if TEM is true,
2562 but reassociate_bb isn't prepared to have basic blocks
2563 split when it is running. So, temporarily emit a code
2564 with BIT_IOR_EXPR instead of &&, and fix it up in
2567 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2568 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2569 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2571 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2572 gimple_seq_add_seq_without_update (&seq
, seq2
);
2573 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2574 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2575 gimple g
= gimple_build_assign (make_ssa_name (optype
),
2576 BIT_IOR_EXPR
, tem
, exp
);
2577 gimple_set_location (g
, loc
);
2578 gimple_seq_add_stmt_without_update (&seq
, g
);
2579 exp
= gimple_assign_lhs (g
);
2580 tree val
= build_zero_cst (optype
);
2581 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2582 candidates
.length (), opcode
, ops
, exp
,
2583 seq
, false, val
, val
, strict_overflow_p
))
2586 reassoc_branch_fixups
.safe_push (tem
);
2589 gimple_seq_discard (seq
);
2595 /* Optimize range tests, similarly how fold_range_test optimizes
2596 it on trees. The tree code for the binary
2597 operation between all the operands is OPCODE.
2598 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2599 maybe_optimize_range_tests for inter-bb range optimization.
2600 In that case if oe->op is NULL, oe->id is bb->index whose
2601 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2602 the actual opcode. */
2605 optimize_range_tests (enum tree_code opcode
,
2606 vec
<operand_entry_t
> *ops
)
2608 unsigned int length
= ops
->length (), i
, j
, first
;
2610 struct range_entry
*ranges
;
2611 bool any_changes
= false;
2616 ranges
= XNEWVEC (struct range_entry
, length
);
2617 for (i
= 0; i
< length
; i
++)
2621 init_range_entry (ranges
+ i
, oe
->op
,
2623 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2624 /* For | invert it now, we will invert it again before emitting
2625 the optimized expression. */
2626 if (opcode
== BIT_IOR_EXPR
2627 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2628 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2631 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2632 for (i
= 0; i
< length
; i
++)
2633 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2636 /* Try to merge ranges. */
2637 for (first
= i
; i
< length
; i
++)
2639 tree low
= ranges
[i
].low
;
2640 tree high
= ranges
[i
].high
;
2641 int in_p
= ranges
[i
].in_p
;
2642 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2643 int update_fail_count
= 0;
2645 for (j
= i
+ 1; j
< length
; j
++)
2647 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2649 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2650 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2652 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2658 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
2659 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
2660 low
, high
, strict_overflow_p
))
2665 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2666 while update_range_test would fail. */
2667 else if (update_fail_count
== 64)
2670 ++update_fail_count
;
2673 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2676 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2677 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2679 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
2680 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
2683 if (any_changes
&& opcode
!= ERROR_MARK
)
2686 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2688 if (oe
->op
== error_mark_node
)
2697 XDELETEVEC (ranges
);
2701 /* Return true if STMT is a cast like:
2707 # _345 = PHI <_123(N), 1(...), 1(...)>
2708 where _234 has bool type, _123 has single use and
2709 bb N has a single successor M. This is commonly used in
2710 the last block of a range test. */
2713 final_range_test_p (gimple stmt
)
2715 basic_block bb
, rhs_bb
;
2718 use_operand_p use_p
;
2721 if (!gimple_assign_cast_p (stmt
))
2723 bb
= gimple_bb (stmt
);
2724 if (!single_succ_p (bb
))
2726 e
= single_succ_edge (bb
);
2727 if (e
->flags
& EDGE_COMPLEX
)
2730 lhs
= gimple_assign_lhs (stmt
);
2731 rhs
= gimple_assign_rhs1 (stmt
);
2732 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2733 || TREE_CODE (rhs
) != SSA_NAME
2734 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2737 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2738 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2741 if (gimple_code (use_stmt
) != GIMPLE_PHI
2742 || gimple_bb (use_stmt
) != e
->dest
)
2745 /* And that the rhs is defined in the same loop. */
2746 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2748 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2754 /* Return true if BB is suitable basic block for inter-bb range test
2755 optimization. If BACKWARD is true, BB should be the only predecessor
2756 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2757 or compared with to find a common basic block to which all conditions
2758 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2759 be the only predecessor of BB. */
2762 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2765 edge_iterator ei
, ei2
;
2769 bool other_edge_seen
= false;
2774 /* Check last stmt first. */
2775 stmt
= last_stmt (bb
);
2777 || (gimple_code (stmt
) != GIMPLE_COND
2778 && (backward
|| !final_range_test_p (stmt
)))
2779 || gimple_visited_p (stmt
)
2780 || stmt_could_throw_p (stmt
)
2783 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2786 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2787 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2788 to *OTHER_BB (if not set yet, try to find it out). */
2789 if (EDGE_COUNT (bb
->succs
) != 2)
2791 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2793 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2795 if (e
->dest
== test_bb
)
2804 if (*other_bb
== NULL
)
2806 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2807 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2809 else if (e
->dest
== e2
->dest
)
2810 *other_bb
= e
->dest
;
2811 if (*other_bb
== NULL
)
2814 if (e
->dest
== *other_bb
)
2815 other_edge_seen
= true;
2819 if (*other_bb
== NULL
|| !other_edge_seen
)
2822 else if (single_succ (bb
) != *other_bb
)
2825 /* Now check all PHIs of *OTHER_BB. */
2826 e
= find_edge (bb
, *other_bb
);
2827 e2
= find_edge (test_bb
, *other_bb
);
2828 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2830 gphi
*phi
= gsi
.phi ();
2831 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2832 corresponding to BB and TEST_BB predecessor must be the same. */
2833 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2834 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2836 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2837 one of the PHIs should have the lhs of the last stmt in
2838 that block as PHI arg and that PHI should have 0 or 1
2839 corresponding to it in all other range test basic blocks
2843 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2844 == gimple_assign_lhs (stmt
)
2845 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2846 || integer_onep (gimple_phi_arg_def (phi
,
2852 gimple test_last
= last_stmt (test_bb
);
2853 if (gimple_code (test_last
) != GIMPLE_COND
2854 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2855 == gimple_assign_lhs (test_last
)
2856 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2857 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2867 /* Return true if BB doesn't have side-effects that would disallow
2868 range test optimization, all SSA_NAMEs set in the bb are consumed
2869 in the bb and there are no PHIs. */
2872 no_side_effect_bb (basic_block bb
)
2874 gimple_stmt_iterator gsi
;
2877 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2879 last
= last_stmt (bb
);
2880 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2882 gimple stmt
= gsi_stmt (gsi
);
2884 imm_use_iterator imm_iter
;
2885 use_operand_p use_p
;
2887 if (is_gimple_debug (stmt
))
2889 if (gimple_has_side_effects (stmt
))
2893 if (!is_gimple_assign (stmt
))
2895 lhs
= gimple_assign_lhs (stmt
);
2896 if (TREE_CODE (lhs
) != SSA_NAME
)
2898 if (gimple_assign_rhs_could_trap_p (stmt
))
2900 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2902 gimple use_stmt
= USE_STMT (use_p
);
2903 if (is_gimple_debug (use_stmt
))
2905 if (gimple_bb (use_stmt
) != bb
)
2912 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2913 return true and fill in *OPS recursively. */
2916 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2919 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2923 if (!is_reassociable_op (stmt
, code
, loop
))
2926 rhs
[0] = gimple_assign_rhs1 (stmt
);
2927 rhs
[1] = gimple_assign_rhs2 (stmt
);
2928 gimple_set_visited (stmt
, true);
2929 for (i
= 0; i
< 2; i
++)
2930 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2931 && !get_ops (rhs
[i
], code
, ops
, loop
)
2932 && has_single_use (rhs
[i
]))
2934 operand_entry_t oe
= operand_entry_pool
.allocate ();
2940 ops
->safe_push (oe
);
2945 /* Find the ops that were added by get_ops starting from VAR, see if
2946 they were changed during update_range_test and if yes, create new
2950 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
2951 unsigned int *pidx
, struct loop
*loop
)
2953 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2957 if (!is_reassociable_op (stmt
, code
, loop
))
2960 rhs
[0] = gimple_assign_rhs1 (stmt
);
2961 rhs
[1] = gimple_assign_rhs2 (stmt
);
2964 for (i
= 0; i
< 2; i
++)
2965 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
2967 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
2968 if (rhs
[2 + i
] == NULL_TREE
)
2970 if (has_single_use (rhs
[i
]))
2971 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
2973 rhs
[2 + i
] = rhs
[i
];
2976 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
2977 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
2979 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
2980 var
= make_ssa_name (TREE_TYPE (var
));
2981 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
2983 gimple_set_uid (g
, gimple_uid (stmt
));
2984 gimple_set_visited (g
, true);
2985 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2990 /* Structure to track the initial value passed to get_ops and
2991 the range in the ops vector for each basic block. */
2993 struct inter_bb_range_test_entry
2996 unsigned int first_idx
, last_idx
;
2999 /* Inter-bb range test optimization. */
3002 maybe_optimize_range_tests (gimple stmt
)
3004 basic_block first_bb
= gimple_bb (stmt
);
3005 basic_block last_bb
= first_bb
;
3006 basic_block other_bb
= NULL
;
3010 auto_vec
<operand_entry_t
> ops
;
3011 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3012 bool any_changes
= false;
3014 /* Consider only basic blocks that end with GIMPLE_COND or
3015 a cast statement satisfying final_range_test_p. All
3016 but the last bb in the first_bb .. last_bb range
3017 should end with GIMPLE_COND. */
3018 if (gimple_code (stmt
) == GIMPLE_COND
)
3020 if (EDGE_COUNT (first_bb
->succs
) != 2)
3023 else if (final_range_test_p (stmt
))
3024 other_bb
= single_succ (first_bb
);
3028 if (stmt_could_throw_p (stmt
))
3031 /* As relative ordering of post-dominator sons isn't fixed,
3032 maybe_optimize_range_tests can be called first on any
3033 bb in the range we want to optimize. So, start searching
3034 backwards, if first_bb can be set to a predecessor. */
3035 while (single_pred_p (first_bb
))
3037 basic_block pred_bb
= single_pred (first_bb
);
3038 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3040 if (!no_side_effect_bb (first_bb
))
3044 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3045 Before starting forward search in last_bb successors, find
3046 out the other_bb. */
3047 if (first_bb
== last_bb
)
3050 /* As non-GIMPLE_COND last stmt always terminates the range,
3051 if forward search didn't discover anything, just give up. */
3052 if (gimple_code (stmt
) != GIMPLE_COND
)
3054 /* Look at both successors. Either it ends with a GIMPLE_COND
3055 and satisfies suitable_cond_bb, or ends with a cast and
3056 other_bb is that cast's successor. */
3057 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
3058 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
3059 || e
->dest
== first_bb
)
3061 else if (single_pred_p (e
->dest
))
3063 stmt
= last_stmt (e
->dest
);
3065 && gimple_code (stmt
) == GIMPLE_COND
3066 && EDGE_COUNT (e
->dest
->succs
) == 2)
3068 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
3074 && final_range_test_p (stmt
)
3075 && find_edge (first_bb
, single_succ (e
->dest
)))
3077 other_bb
= single_succ (e
->dest
);
3078 if (other_bb
== first_bb
)
3082 if (other_bb
== NULL
)
3085 /* Now do the forward search, moving last_bb to successor bbs
3086 that aren't other_bb. */
3087 while (EDGE_COUNT (last_bb
->succs
) == 2)
3089 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
3090 if (e
->dest
!= other_bb
)
3094 if (!single_pred_p (e
->dest
))
3096 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
3098 if (!no_side_effect_bb (e
->dest
))
3102 if (first_bb
== last_bb
)
3104 /* Here basic blocks first_bb through last_bb's predecessor
3105 end with GIMPLE_COND, all of them have one of the edges to
3106 other_bb and another to another block in the range,
3107 all blocks except first_bb don't have side-effects and
3108 last_bb ends with either GIMPLE_COND, or cast satisfying
3109 final_range_test_p. */
3110 for (bb
= last_bb
; ; bb
= single_pred (bb
))
3112 enum tree_code code
;
3114 inter_bb_range_test_entry bb_ent
;
3116 bb_ent
.op
= NULL_TREE
;
3117 bb_ent
.first_idx
= ops
.length ();
3118 bb_ent
.last_idx
= bb_ent
.first_idx
;
3119 e
= find_edge (bb
, other_bb
);
3120 stmt
= last_stmt (bb
);
3121 gimple_set_visited (stmt
, true);
3122 if (gimple_code (stmt
) != GIMPLE_COND
)
3124 use_operand_p use_p
;
3129 lhs
= gimple_assign_lhs (stmt
);
3130 rhs
= gimple_assign_rhs1 (stmt
);
3131 gcc_assert (bb
== last_bb
);
3138 # _345 = PHI <_123(N), 1(...), 1(...)>
3140 or 0 instead of 1. If it is 0, the _234
3141 range test is anded together with all the
3142 other range tests, if it is 1, it is ored with
3144 single_imm_use (lhs
, &use_p
, &phi
);
3145 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
3146 e2
= find_edge (first_bb
, other_bb
);
3148 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
3149 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
3150 code
= BIT_AND_EXPR
;
3153 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
3154 code
= BIT_IOR_EXPR
;
3157 /* If _234 SSA_NAME_DEF_STMT is
3159 (or &, corresponding to 1/0 in the phi arguments,
3160 push into ops the individual range test arguments
3161 of the bitwise or resp. and, recursively. */
3162 if (!get_ops (rhs
, code
, &ops
,
3163 loop_containing_stmt (stmt
))
3164 && has_single_use (rhs
))
3166 /* Otherwise, push the _234 range test itself. */
3167 operand_entry_t oe
= operand_entry_pool
.allocate ();
3177 bb_ent
.last_idx
= ops
.length ();
3179 bbinfo
.safe_push (bb_ent
);
3182 /* Otherwise stmt is GIMPLE_COND. */
3183 code
= gimple_cond_code (stmt
);
3184 lhs
= gimple_cond_lhs (stmt
);
3185 rhs
= gimple_cond_rhs (stmt
);
3186 if (TREE_CODE (lhs
) == SSA_NAME
3187 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3188 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
3189 || rhs
!= boolean_false_node
3190 /* Either push into ops the individual bitwise
3191 or resp. and operands, depending on which
3192 edge is other_bb. */
3193 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
3194 ^ (code
== EQ_EXPR
))
3195 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
3196 loop_containing_stmt (stmt
))))
3198 /* Or push the GIMPLE_COND stmt itself. */
3199 operand_entry_t oe
= operand_entry_pool
.allocate ();
3202 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
3203 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3204 /* oe->op = NULL signs that there is no SSA_NAME
3205 for the range test, and oe->id instead is the
3206 basic block number, at which's end the GIMPLE_COND
3214 else if (ops
.length () > bb_ent
.first_idx
)
3217 bb_ent
.last_idx
= ops
.length ();
3219 bbinfo
.safe_push (bb_ent
);
3223 if (ops
.length () > 1)
3224 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
3228 /* update_ops relies on has_single_use predicates returning the
3229 same values as it did during get_ops earlier. Additionally it
3230 never removes statements, only adds new ones and it should walk
3231 from the single imm use and check the predicate already before
3232 making those changes.
3233 On the other side, the handling of GIMPLE_COND directly can turn
3234 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3235 it needs to be done in a separate loop afterwards. */
3236 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3238 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3239 && bbinfo
[idx
].op
!= NULL_TREE
)
3243 stmt
= last_stmt (bb
);
3244 new_op
= update_ops (bbinfo
[idx
].op
,
3246 ops
[bbinfo
[idx
].first_idx
]->rank
,
3247 ops
, &bbinfo
[idx
].first_idx
,
3248 loop_containing_stmt (stmt
));
3249 if (new_op
== NULL_TREE
)
3251 gcc_assert (bb
== last_bb
);
3252 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
3254 if (bbinfo
[idx
].op
!= new_op
)
3256 imm_use_iterator iter
;
3257 use_operand_p use_p
;
3258 gimple use_stmt
, cast_stmt
= NULL
;
3260 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
3261 if (is_gimple_debug (use_stmt
))
3263 else if (gimple_code (use_stmt
) == GIMPLE_COND
3264 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3265 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3266 SET_USE (use_p
, new_op
);
3267 else if (gimple_assign_cast_p (use_stmt
))
3268 cast_stmt
= use_stmt
;
3273 gcc_assert (bb
== last_bb
);
3274 tree lhs
= gimple_assign_lhs (cast_stmt
);
3275 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
3276 enum tree_code rhs_code
3277 = gimple_assign_rhs_code (cast_stmt
);
3279 if (is_gimple_min_invariant (new_op
))
3281 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
3282 g
= gimple_build_assign (new_lhs
, new_op
);
3285 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
3286 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
3287 gimple_set_uid (g
, gimple_uid (cast_stmt
));
3288 gimple_set_visited (g
, true);
3289 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3290 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3291 if (is_gimple_debug (use_stmt
))
3293 else if (gimple_code (use_stmt
) == GIMPLE_COND
3294 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3295 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3296 SET_USE (use_p
, new_lhs
);
3305 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3307 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3308 && bbinfo
[idx
].op
== NULL_TREE
3309 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
3311 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
3312 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
3313 gimple_cond_make_false (cond_stmt
);
3314 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
3315 gimple_cond_make_true (cond_stmt
);
3318 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
3319 gimple_cond_set_lhs (cond_stmt
,
3320 ops
[bbinfo
[idx
].first_idx
]->op
);
3321 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
3323 update_stmt (cond_stmt
);
3331 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3332 of STMT in it's operands. This is also known as a "destructive
3333 update" operation. */
3336 is_phi_for_stmt (gimple stmt
, tree operand
)
3341 use_operand_p arg_p
;
3344 if (TREE_CODE (operand
) != SSA_NAME
)
3347 lhs
= gimple_assign_lhs (stmt
);
3349 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3350 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
3354 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
3355 if (lhs
== USE_FROM_PTR (arg_p
))
3360 /* Remove def stmt of VAR if VAR has zero uses and recurse
3361 on rhs1 operand if so. */
3364 remove_visited_stmt_chain (tree var
)
3367 gimple_stmt_iterator gsi
;
3371 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3373 stmt
= SSA_NAME_DEF_STMT (var
);
3374 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3376 var
= gimple_assign_rhs1 (stmt
);
3377 gsi
= gsi_for_stmt (stmt
);
3378 reassoc_remove_stmt (&gsi
);
3379 release_defs (stmt
);
3386 /* This function checks three consequtive operands in
3387 passed operands vector OPS starting from OPINDEX and
3388 swaps two operands if it is profitable for binary operation
3389 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3391 We pair ops with the same rank if possible.
3393 The alternative we try is to see if STMT is a destructive
3394 update style statement, which is like:
3397 In that case, we want to use the destructive update form to
3398 expose the possible vectorizer sum reduction opportunity.
3399 In that case, the third operand will be the phi node. This
3400 check is not performed if STMT is null.
3402 We could, of course, try to be better as noted above, and do a
3403 lot of work to try to find these opportunities in >3 operand
3404 cases, but it is unlikely to be worth it. */
3407 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3408 unsigned int opindex
, gimple stmt
)
3410 operand_entry_t oe1
, oe2
, oe3
;
3413 oe2
= ops
[opindex
+ 1];
3414 oe3
= ops
[opindex
+ 2];
3416 if ((oe1
->rank
== oe2
->rank
3417 && oe2
->rank
!= oe3
->rank
)
3418 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3419 && !is_phi_for_stmt (stmt
, oe1
->op
)
3420 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3422 struct operand_entry temp
= *oe3
;
3424 oe3
->rank
= oe1
->rank
;
3426 oe1
->rank
= temp
.rank
;
3428 else if ((oe1
->rank
== oe3
->rank
3429 && oe2
->rank
!= oe3
->rank
)
3430 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3431 && !is_phi_for_stmt (stmt
, oe1
->op
)
3432 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3434 struct operand_entry temp
= *oe2
;
3436 oe2
->rank
= oe1
->rank
;
3438 oe1
->rank
= temp
.rank
;
3442 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3443 two definitions, otherwise return STMT. */
3445 static inline gimple
3446 find_insert_point (gimple stmt
, tree rhs1
, tree rhs2
)
3448 if (TREE_CODE (rhs1
) == SSA_NAME
3449 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3450 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3451 if (TREE_CODE (rhs2
) == SSA_NAME
3452 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3453 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3457 /* Recursively rewrite our linearized statements so that the operators
3458 match those in OPS[OPINDEX], putting the computation in rank
3459 order. Return new lhs. */
3462 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3463 vec
<operand_entry_t
> ops
, bool changed
)
3465 tree rhs1
= gimple_assign_rhs1 (stmt
);
3466 tree rhs2
= gimple_assign_rhs2 (stmt
);
3467 tree lhs
= gimple_assign_lhs (stmt
);
3470 /* The final recursion case for this function is that you have
3471 exactly two operations left.
3472 If we had exactly one op in the entire list to start with, we
3473 would have never called this function, and the tail recursion
3474 rewrites them one at a time. */
3475 if (opindex
+ 2 == ops
.length ())
3477 operand_entry_t oe1
, oe2
;
3480 oe2
= ops
[opindex
+ 1];
3482 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3484 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3485 unsigned int uid
= gimple_uid (stmt
);
3487 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3489 fprintf (dump_file
, "Transforming ");
3490 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3493 /* Even when changed is false, reassociation could have e.g. removed
3494 some redundant operations, so unless we are just swapping the
3495 arguments or unless there is no change at all (then we just
3496 return lhs), force creation of a new SSA_NAME. */
3497 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
3499 gimple insert_point
= find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3500 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3502 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3504 gimple_set_uid (stmt
, uid
);
3505 gimple_set_visited (stmt
, true);
3506 if (insert_point
== gsi_stmt (gsi
))
3507 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3509 insert_stmt_after (stmt
, insert_point
);
3513 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3515 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3516 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3520 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3521 remove_visited_stmt_chain (rhs1
);
3523 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3525 fprintf (dump_file
, " into ");
3526 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3532 /* If we hit here, we should have 3 or more ops left. */
3533 gcc_assert (opindex
+ 2 < ops
.length ());
3535 /* Rewrite the next operator. */
3538 /* Recurse on the LHS of the binary operator, which is guaranteed to
3539 be the non-leaf side. */
3541 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3542 changed
|| oe
->op
!= rhs2
);
3544 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3546 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3548 fprintf (dump_file
, "Transforming ");
3549 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3552 /* If changed is false, this is either opindex == 0
3553 or all outer rhs2's were equal to corresponding oe->op,
3554 and powi_result is NULL.
3555 That means lhs is equivalent before and after reassociation.
3556 Otherwise ensure the old lhs SSA_NAME is not reused and
3557 create a new stmt as well, so that any debug stmts will be
3558 properly adjusted. */
3561 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3562 unsigned int uid
= gimple_uid (stmt
);
3563 gimple insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3565 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3566 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3568 gimple_set_uid (stmt
, uid
);
3569 gimple_set_visited (stmt
, true);
3570 if (insert_point
== gsi_stmt (gsi
))
3571 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3573 insert_stmt_after (stmt
, insert_point
);
3577 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3579 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3580 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3584 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3586 fprintf (dump_file
, " into ");
3587 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3593 /* Find out how many cycles we need to compute statements chain.
3594 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3595 maximum number of independent statements we may execute per cycle. */
3598 get_required_cycles (int ops_num
, int cpu_width
)
3604 /* While we have more than 2 * cpu_width operands
3605 we may reduce number of operands by cpu_width
3607 res
= ops_num
/ (2 * cpu_width
);
3609 /* Remained operands count may be reduced twice per cycle
3610 until we have only one operand. */
3611 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3612 elog
= exact_log2 (rest
);
3616 res
+= floor_log2 (rest
) + 1;
3621 /* Returns an optimal number of registers to use for computation of
3622 given statements. */
3625 get_reassociation_width (int ops_num
, enum tree_code opc
,
3628 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3633 if (param_width
> 0)
3634 width
= param_width
;
3636 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3641 /* Get the minimal time required for sequence computation. */
3642 cycles_best
= get_required_cycles (ops_num
, width
);
3644 /* Check if we may use less width and still compute sequence for
3645 the same time. It will allow us to reduce registers usage.
3646 get_required_cycles is monotonically increasing with lower width
3647 so we can perform a binary search for the minimal width that still
3648 results in the optimal cycle count. */
3650 while (width
> width_min
)
3652 int width_mid
= (width
+ width_min
) / 2;
3654 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3656 else if (width_min
< width_mid
)
3657 width_min
= width_mid
;
3665 /* Recursively rewrite our linearized statements so that the operators
3666 match those in OPS[OPINDEX], putting the computation in rank
3667 order and trying to allow operations to be executed in
3671 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
3672 vec
<operand_entry_t
> ops
)
3674 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3675 int op_num
= ops
.length ();
3676 int stmt_num
= op_num
- 1;
3677 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3678 int op_index
= op_num
- 1;
3680 int ready_stmts_end
= 0;
3682 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3684 /* We start expression rewriting from the top statements.
3685 So, in this loop we create a full list of statements
3686 we will work with. */
3687 stmts
[stmt_num
- 1] = stmt
;
3688 for (i
= stmt_num
- 2; i
>= 0; i
--)
3689 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3691 for (i
= 0; i
< stmt_num
; i
++)
3695 /* Determine whether we should use results of
3696 already handled statements or not. */
3697 if (ready_stmts_end
== 0
3698 && (i
- stmt_index
>= width
|| op_index
< 1))
3699 ready_stmts_end
= i
;
3701 /* Now we choose operands for the next statement. Non zero
3702 value in ready_stmts_end means here that we should use
3703 the result of already generated statements as new operand. */
3704 if (ready_stmts_end
> 0)
3706 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3707 if (ready_stmts_end
> stmt_index
)
3708 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3709 else if (op_index
>= 0)
3710 op2
= ops
[op_index
--]->op
;
3713 gcc_assert (stmt_index
< i
);
3714 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3717 if (stmt_index
>= ready_stmts_end
)
3718 ready_stmts_end
= 0;
3723 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3724 op2
= ops
[op_index
--]->op
;
3725 op1
= ops
[op_index
--]->op
;
3728 /* If we emit the last statement then we should put
3729 operands into the last statement. It will also
3731 if (op_index
< 0 && stmt_index
== i
)
3734 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3736 fprintf (dump_file
, "Transforming ");
3737 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3740 /* We keep original statement only for the last one. All
3741 others are recreated. */
3742 if (i
== stmt_num
- 1)
3744 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3745 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3746 update_stmt (stmts
[i
]);
3749 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3751 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3753 fprintf (dump_file
, " into ");
3754 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3758 remove_visited_stmt_chain (last_rhs1
);
3761 /* Transform STMT, which is really (A +B) + (C + D) into the left
3762 linear form, ((A+B)+C)+D.
3763 Recurse on D if necessary. */
3766 linearize_expr (gimple stmt
)
3768 gimple_stmt_iterator gsi
;
3769 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3770 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3771 gimple oldbinrhs
= binrhs
;
3772 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3773 gimple newbinrhs
= NULL
;
3774 struct loop
*loop
= loop_containing_stmt (stmt
);
3775 tree lhs
= gimple_assign_lhs (stmt
);
3777 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3778 && is_reassociable_op (binrhs
, rhscode
, loop
));
3780 gsi
= gsi_for_stmt (stmt
);
3782 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3783 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
3784 gimple_assign_rhs_code (binrhs
),
3785 gimple_assign_lhs (binlhs
),
3786 gimple_assign_rhs2 (binrhs
));
3787 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3788 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3789 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3791 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3792 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3794 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3796 fprintf (dump_file
, "Linearized: ");
3797 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3800 reassociate_stats
.linearized
++;
3803 gsi
= gsi_for_stmt (oldbinrhs
);
3804 reassoc_remove_stmt (&gsi
);
3805 release_defs (oldbinrhs
);
3807 gimple_set_visited (stmt
, true);
3808 gimple_set_visited (binlhs
, true);
3809 gimple_set_visited (binrhs
, true);
3811 /* Tail recurse on the new rhs if it still needs reassociation. */
3812 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3813 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3814 want to change the algorithm while converting to tuples. */
3815 linearize_expr (stmt
);
3818 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3819 it. Otherwise, return NULL. */
3822 get_single_immediate_use (tree lhs
)
3824 use_operand_p immuse
;
3827 if (TREE_CODE (lhs
) == SSA_NAME
3828 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3829 && is_gimple_assign (immusestmt
))
3835 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3836 representing the negated value. Insertions of any necessary
3837 instructions go before GSI.
3838 This function is recursive in that, if you hand it "a_5" as the
3839 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3840 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3843 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3845 gimple negatedefstmt
= NULL
;
3846 tree resultofnegate
;
3847 gimple_stmt_iterator gsi
;
3850 /* If we are trying to negate a name, defined by an add, negate the
3851 add operands instead. */
3852 if (TREE_CODE (tonegate
) == SSA_NAME
)
3853 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3854 if (TREE_CODE (tonegate
) == SSA_NAME
3855 && is_gimple_assign (negatedefstmt
)
3856 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3857 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3858 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3860 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3861 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3862 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3865 gsi
= gsi_for_stmt (negatedefstmt
);
3866 rhs1
= negate_value (rhs1
, &gsi
);
3868 gsi
= gsi_for_stmt (negatedefstmt
);
3869 rhs2
= negate_value (rhs2
, &gsi
);
3871 gsi
= gsi_for_stmt (negatedefstmt
);
3872 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3873 gimple_set_visited (negatedefstmt
, true);
3874 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
3875 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3876 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3880 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3881 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3882 NULL_TREE
, true, GSI_SAME_STMT
);
3884 uid
= gimple_uid (gsi_stmt (gsi
));
3885 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3887 gimple stmt
= gsi_stmt (gsi
);
3888 if (gimple_uid (stmt
) != 0)
3890 gimple_set_uid (stmt
, uid
);
3892 return resultofnegate
;
3895 /* Return true if we should break up the subtract in STMT into an add
3896 with negate. This is true when we the subtract operands are really
3897 adds, or the subtract itself is used in an add expression. In
3898 either case, breaking up the subtract into an add with negate
3899 exposes the adds to reassociation. */
3902 should_break_up_subtract (gimple stmt
)
3904 tree lhs
= gimple_assign_lhs (stmt
);
3905 tree binlhs
= gimple_assign_rhs1 (stmt
);
3906 tree binrhs
= gimple_assign_rhs2 (stmt
);
3908 struct loop
*loop
= loop_containing_stmt (stmt
);
3910 if (TREE_CODE (binlhs
) == SSA_NAME
3911 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3914 if (TREE_CODE (binrhs
) == SSA_NAME
3915 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3918 if (TREE_CODE (lhs
) == SSA_NAME
3919 && (immusestmt
= get_single_immediate_use (lhs
))
3920 && is_gimple_assign (immusestmt
)
3921 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3922 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3927 /* Transform STMT from A - B into A + -B. */
3930 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3932 tree rhs1
= gimple_assign_rhs1 (stmt
);
3933 tree rhs2
= gimple_assign_rhs2 (stmt
);
3935 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3937 fprintf (dump_file
, "Breaking up subtract ");
3938 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3941 rhs2
= negate_value (rhs2
, gsip
);
3942 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3946 /* Determine whether STMT is a builtin call that raises an SSA name
3947 to an integer power and has only one use. If so, and this is early
3948 reassociation and unsafe math optimizations are permitted, place
3949 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3950 If any of these conditions does not hold, return FALSE. */
3953 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3956 REAL_VALUE_TYPE c
, cint
;
3958 if (!first_pass_instance
3959 || !flag_unsafe_math_optimizations
3960 || !is_gimple_call (stmt
)
3961 || !has_single_use (gimple_call_lhs (stmt
)))
3964 fndecl
= gimple_call_fndecl (stmt
);
3967 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3970 switch (DECL_FUNCTION_CODE (fndecl
))
3972 CASE_FLT_FN (BUILT_IN_POW
):
3973 if (flag_errno_math
)
3976 *base
= gimple_call_arg (stmt
, 0);
3977 arg1
= gimple_call_arg (stmt
, 1);
3979 if (TREE_CODE (arg1
) != REAL_CST
)
3982 c
= TREE_REAL_CST (arg1
);
3984 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
3987 *exponent
= real_to_integer (&c
);
3988 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
3989 if (!real_identical (&c
, &cint
))
3994 CASE_FLT_FN (BUILT_IN_POWI
):
3995 *base
= gimple_call_arg (stmt
, 0);
3996 arg1
= gimple_call_arg (stmt
, 1);
3998 if (!tree_fits_shwi_p (arg1
))
4001 *exponent
= tree_to_shwi (arg1
);
4008 /* Expanding negative exponents is generally unproductive, so we don't
4009 complicate matters with those. Exponents of zero and one should
4010 have been handled by expression folding. */
4011 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
4017 /* Recursively linearize a binary expression that is the RHS of STMT.
4018 Place the operands of the expression tree in the vector named OPS. */
4021 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
4022 bool is_associative
, bool set_visited
)
4024 tree binlhs
= gimple_assign_rhs1 (stmt
);
4025 tree binrhs
= gimple_assign_rhs2 (stmt
);
4026 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
4027 bool binlhsisreassoc
= false;
4028 bool binrhsisreassoc
= false;
4029 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4030 struct loop
*loop
= loop_containing_stmt (stmt
);
4031 tree base
= NULL_TREE
;
4032 HOST_WIDE_INT exponent
= 0;
4035 gimple_set_visited (stmt
, true);
4037 if (TREE_CODE (binlhs
) == SSA_NAME
)
4039 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
4040 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
4041 && !stmt_could_throw_p (binlhsdef
));
4044 if (TREE_CODE (binrhs
) == SSA_NAME
)
4046 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
4047 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
4048 && !stmt_could_throw_p (binrhsdef
));
4051 /* If the LHS is not reassociable, but the RHS is, we need to swap
4052 them. If neither is reassociable, there is nothing we can do, so
4053 just put them in the ops vector. If the LHS is reassociable,
4054 linearize it. If both are reassociable, then linearize the RHS
4057 if (!binlhsisreassoc
)
4059 /* If this is not a associative operation like division, give up. */
4060 if (!is_associative
)
4062 add_to_ops_vec (ops
, binrhs
);
4066 if (!binrhsisreassoc
)
4068 if (rhscode
== MULT_EXPR
4069 && TREE_CODE (binrhs
) == SSA_NAME
4070 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
4072 add_repeat_to_ops_vec (ops
, base
, exponent
);
4073 gimple_set_visited (binrhsdef
, true);
4076 add_to_ops_vec (ops
, binrhs
);
4078 if (rhscode
== MULT_EXPR
4079 && TREE_CODE (binlhs
) == SSA_NAME
4080 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
4082 add_repeat_to_ops_vec (ops
, base
, exponent
);
4083 gimple_set_visited (binlhsdef
, true);
4086 add_to_ops_vec (ops
, binlhs
);
4091 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4093 fprintf (dump_file
, "swapping operands of ");
4094 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4097 swap_ssa_operands (stmt
,
4098 gimple_assign_rhs1_ptr (stmt
),
4099 gimple_assign_rhs2_ptr (stmt
));
4102 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4104 fprintf (dump_file
, " is now ");
4105 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4108 /* We want to make it so the lhs is always the reassociative op,
4110 std::swap (binlhs
, binrhs
);
4112 else if (binrhsisreassoc
)
4114 linearize_expr (stmt
);
4115 binlhs
= gimple_assign_rhs1 (stmt
);
4116 binrhs
= gimple_assign_rhs2 (stmt
);
4119 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
4120 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
4122 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
4123 is_associative
, set_visited
);
4125 if (rhscode
== MULT_EXPR
4126 && TREE_CODE (binrhs
) == SSA_NAME
4127 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
4129 add_repeat_to_ops_vec (ops
, base
, exponent
);
4130 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
4133 add_to_ops_vec (ops
, binrhs
);
4136 /* Repropagate the negates back into subtracts, since no other pass
4137 currently does it. */
4140 repropagate_negates (void)
4145 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
4147 gimple user
= get_single_immediate_use (negate
);
4149 if (!user
|| !is_gimple_assign (user
))
4152 /* The negate operand can be either operand of a PLUS_EXPR
4153 (it can be the LHS if the RHS is a constant for example).
4155 Force the negate operand to the RHS of the PLUS_EXPR, then
4156 transform the PLUS_EXPR into a MINUS_EXPR. */
4157 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
4159 /* If the negated operand appears on the LHS of the
4160 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4161 to force the negated operand to the RHS of the PLUS_EXPR. */
4162 if (gimple_assign_rhs1 (user
) == negate
)
4164 swap_ssa_operands (user
,
4165 gimple_assign_rhs1_ptr (user
),
4166 gimple_assign_rhs2_ptr (user
));
4169 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4170 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4171 if (gimple_assign_rhs2 (user
) == negate
)
4173 tree rhs1
= gimple_assign_rhs1 (user
);
4174 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
4175 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4176 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
4180 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
4182 if (gimple_assign_rhs1 (user
) == negate
)
4187 which we transform into
4190 This pushes down the negate which we possibly can merge
4191 into some other operation, hence insert it into the
4192 plus_negates vector. */
4193 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4194 tree a
= gimple_assign_rhs1 (feed
);
4195 tree b
= gimple_assign_rhs2 (user
);
4196 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
4197 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
4198 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
4199 gimple g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
4200 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
4201 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
4202 user
= gsi_stmt (gsi2
);
4204 reassoc_remove_stmt (&gsi
);
4205 release_defs (feed
);
4206 plus_negates
.safe_push (gimple_assign_lhs (user
));
4210 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4211 rid of one operation. */
4212 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4213 tree a
= gimple_assign_rhs1 (feed
);
4214 tree rhs1
= gimple_assign_rhs1 (user
);
4215 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4216 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
4217 update_stmt (gsi_stmt (gsi
));
4223 /* Returns true if OP is of a type for which we can do reassociation.
4224 That is for integral or non-saturating fixed-point types, and for
4225 floating point type when associative-math is enabled. */
4228 can_reassociate_p (tree op
)
4230 tree type
= TREE_TYPE (op
);
4231 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
4232 || NON_SAT_FIXED_POINT_TYPE_P (type
)
4233 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
4238 /* Break up subtract operations in block BB.
4240 We do this top down because we don't know whether the subtract is
4241 part of a possible chain of reassociation except at the top.
4250 we want to break up k = t - q, but we won't until we've transformed q
4251 = b - r, which won't be broken up until we transform b = c - d.
4253 En passant, clear the GIMPLE visited flag on every statement
4254 and set UIDs within each basic block. */
4257 break_up_subtract_bb (basic_block bb
)
4259 gimple_stmt_iterator gsi
;
4261 unsigned int uid
= 1;
4263 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4265 gimple stmt
= gsi_stmt (gsi
);
4266 gimple_set_visited (stmt
, false);
4267 gimple_set_uid (stmt
, uid
++);
4269 if (!is_gimple_assign (stmt
)
4270 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
4273 /* Look for simple gimple subtract operations. */
4274 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
4276 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
4277 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
4280 /* Check for a subtract used only in an addition. If this
4281 is the case, transform it into add of a negate for better
4282 reassociation. IE transform C = A-B into C = A + -B if C
4283 is only used in an addition. */
4284 if (should_break_up_subtract (stmt
))
4285 break_up_subtract (stmt
, &gsi
);
4287 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
4288 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
4289 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
4291 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
4293 son
= next_dom_son (CDI_DOMINATORS
, son
))
4294 break_up_subtract_bb (son
);
4297 /* Used for repeated factor analysis. */
4298 struct repeat_factor_d
4300 /* An SSA name that occurs in a multiply chain. */
4303 /* Cached rank of the factor. */
4306 /* Number of occurrences of the factor in the chain. */
4307 HOST_WIDE_INT count
;
4309 /* An SSA name representing the product of this factor and
4310 all factors appearing later in the repeated factor vector. */
4314 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
4315 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
4318 static vec
<repeat_factor
> repeat_factor_vec
;
4320 /* Used for sorting the repeat factor vector. Sort primarily by
4321 ascending occurrence count, secondarily by descending rank. */
4324 compare_repeat_factors (const void *x1
, const void *x2
)
4326 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
4327 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
4329 if (rf1
->count
!= rf2
->count
)
4330 return rf1
->count
- rf2
->count
;
4332 return rf2
->rank
- rf1
->rank
;
4335 /* Look for repeated operands in OPS in the multiply tree rooted at
4336 STMT. Replace them with an optimal sequence of multiplies and powi
4337 builtin calls, and remove the used operands from OPS. Return an
4338 SSA name representing the value of the replacement sequence. */
4341 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
4343 unsigned i
, j
, vec_len
;
4346 repeat_factor_t rf1
, rf2
;
4347 repeat_factor rfnew
;
4348 tree result
= NULL_TREE
;
4349 tree target_ssa
, iter_result
;
4350 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4351 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4352 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4353 gimple mul_stmt
, pow_stmt
;
4355 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4360 /* Allocate the repeated factor vector. */
4361 repeat_factor_vec
.create (10);
4363 /* Scan the OPS vector for all SSA names in the product and build
4364 up a vector of occurrence counts for each factor. */
4365 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4367 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4369 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4371 if (rf1
->factor
== oe
->op
)
4373 rf1
->count
+= oe
->count
;
4378 if (j
>= repeat_factor_vec
.length ())
4380 rfnew
.factor
= oe
->op
;
4381 rfnew
.rank
= oe
->rank
;
4382 rfnew
.count
= oe
->count
;
4383 rfnew
.repr
= NULL_TREE
;
4384 repeat_factor_vec
.safe_push (rfnew
);
4389 /* Sort the repeated factor vector by (a) increasing occurrence count,
4390 and (b) decreasing rank. */
4391 repeat_factor_vec
.qsort (compare_repeat_factors
);
4393 /* It is generally best to combine as many base factors as possible
4394 into a product before applying __builtin_powi to the result.
4395 However, the sort order chosen for the repeated factor vector
4396 allows us to cache partial results for the product of the base
4397 factors for subsequent use. When we already have a cached partial
4398 result from a previous iteration, it is best to make use of it
4399 before looking for another __builtin_pow opportunity.
4401 As an example, consider x * x * y * y * y * z * z * z * z.
4402 We want to first compose the product x * y * z, raise it to the
4403 second power, then multiply this by y * z, and finally multiply
4404 by z. This can be done in 5 multiplies provided we cache y * z
4405 for use in both expressions:
4413 If we instead ignored the cached y * z and first multiplied by
4414 the __builtin_pow opportunity z * z, we would get the inferior:
4423 vec_len
= repeat_factor_vec
.length ();
4425 /* Repeatedly look for opportunities to create a builtin_powi call. */
4428 HOST_WIDE_INT power
;
4430 /* First look for the largest cached product of factors from
4431 preceding iterations. If found, create a builtin_powi for
4432 it if the minimum occurrence count for its factors is at
4433 least 2, or just use this cached product as our next
4434 multiplicand if the minimum occurrence count is 1. */
4435 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4437 if (rf1
->repr
&& rf1
->count
> 0)
4447 iter_result
= rf1
->repr
;
4449 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4453 fputs ("Multiplying by cached product ", dump_file
);
4454 for (elt
= j
; elt
< vec_len
; elt
++)
4456 rf
= &repeat_factor_vec
[elt
];
4457 print_generic_expr (dump_file
, rf
->factor
, 0);
4458 if (elt
< vec_len
- 1)
4459 fputs (" * ", dump_file
);
4461 fputs ("\n", dump_file
);
4466 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4467 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4468 build_int_cst (integer_type_node
,
4470 gimple_call_set_lhs (pow_stmt
, iter_result
);
4471 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4472 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4474 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4478 fputs ("Building __builtin_pow call for cached product (",
4480 for (elt
= j
; elt
< vec_len
; elt
++)
4482 rf
= &repeat_factor_vec
[elt
];
4483 print_generic_expr (dump_file
, rf
->factor
, 0);
4484 if (elt
< vec_len
- 1)
4485 fputs (" * ", dump_file
);
4487 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
4494 /* Otherwise, find the first factor in the repeated factor
4495 vector whose occurrence count is at least 2. If no such
4496 factor exists, there are no builtin_powi opportunities
4498 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4500 if (rf1
->count
>= 2)
4509 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4513 fputs ("Building __builtin_pow call for (", dump_file
);
4514 for (elt
= j
; elt
< vec_len
; elt
++)
4516 rf
= &repeat_factor_vec
[elt
];
4517 print_generic_expr (dump_file
, rf
->factor
, 0);
4518 if (elt
< vec_len
- 1)
4519 fputs (" * ", dump_file
);
4521 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4524 reassociate_stats
.pows_created
++;
4526 /* Visit each element of the vector in reverse order (so that
4527 high-occurrence elements are visited first, and within the
4528 same occurrence count, lower-ranked elements are visited
4529 first). Form a linear product of all elements in this order
4530 whose occurrencce count is at least that of element J.
4531 Record the SSA name representing the product of each element
4532 with all subsequent elements in the vector. */
4533 if (j
== vec_len
- 1)
4534 rf1
->repr
= rf1
->factor
;
4537 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4541 rf1
= &repeat_factor_vec
[ii
];
4542 rf2
= &repeat_factor_vec
[ii
+ 1];
4544 /* Init the last factor's representative to be itself. */
4546 rf2
->repr
= rf2
->factor
;
4551 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4552 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
4554 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4555 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4556 rf1
->repr
= target_ssa
;
4558 /* Don't reprocess the multiply we just introduced. */
4559 gimple_set_visited (mul_stmt
, true);
4563 /* Form a call to __builtin_powi for the maximum product
4564 just formed, raised to the power obtained earlier. */
4565 rf1
= &repeat_factor_vec
[j
];
4566 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4567 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4568 build_int_cst (integer_type_node
,
4570 gimple_call_set_lhs (pow_stmt
, iter_result
);
4571 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4572 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4575 /* If we previously formed at least one other builtin_powi call,
4576 form the product of this one and those others. */
4579 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4580 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
4581 result
, iter_result
);
4582 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4583 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4584 gimple_set_visited (mul_stmt
, true);
4585 result
= new_result
;
4588 result
= iter_result
;
4590 /* Decrement the occurrence count of each element in the product
4591 by the count found above, and remove this many copies of each
4593 for (i
= j
; i
< vec_len
; i
++)
4598 rf1
= &repeat_factor_vec
[i
];
4599 rf1
->count
-= power
;
4601 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4603 if (oe
->op
== rf1
->factor
)
4607 ops
->ordered_remove (n
);
4623 /* At this point all elements in the repeated factor vector have a
4624 remaining occurrence count of 0 or 1, and those with a count of 1
4625 don't have cached representatives. Re-sort the ops vector and
4627 ops
->qsort (sort_by_operand_rank
);
4628 repeat_factor_vec
.release ();
4630 /* Return the final product computed herein. Note that there may
4631 still be some elements with single occurrence count left in OPS;
4632 those will be handled by the normal reassociation logic. */
4636 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4639 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4643 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4645 fprintf (dump_file
, "Transforming ");
4646 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4649 rhs1
= gimple_assign_rhs1 (stmt
);
4650 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4652 remove_visited_stmt_chain (rhs1
);
4654 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4656 fprintf (dump_file
, " into ");
4657 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4661 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4664 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4665 tree rhs1
, tree rhs2
)
4667 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4669 fprintf (dump_file
, "Transforming ");
4670 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4673 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4674 update_stmt (gsi_stmt (*gsi
));
4675 remove_visited_stmt_chain (rhs1
);
4677 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4679 fprintf (dump_file
, " into ");
4680 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4684 /* Reassociate expressions in basic block BB and its post-dominator as
4688 reassociate_bb (basic_block bb
)
4690 gimple_stmt_iterator gsi
;
4692 gimple stmt
= last_stmt (bb
);
4694 if (stmt
&& !gimple_visited_p (stmt
))
4695 maybe_optimize_range_tests (stmt
);
4697 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4699 stmt
= gsi_stmt (gsi
);
4701 if (is_gimple_assign (stmt
)
4702 && !stmt_could_throw_p (stmt
))
4704 tree lhs
, rhs1
, rhs2
;
4705 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4707 /* If this is not a gimple binary expression, there is
4708 nothing for us to do with it. */
4709 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4712 /* If this was part of an already processed statement,
4713 we don't need to touch it again. */
4714 if (gimple_visited_p (stmt
))
4716 /* This statement might have become dead because of previous
4718 if (has_zero_uses (gimple_get_lhs (stmt
)))
4720 reassoc_remove_stmt (&gsi
);
4721 release_defs (stmt
);
4722 /* We might end up removing the last stmt above which
4723 places the iterator to the end of the sequence.
4724 Reset it to the last stmt in this case which might
4725 be the end of the sequence as well if we removed
4726 the last statement of the sequence. In which case
4727 we need to bail out. */
4728 if (gsi_end_p (gsi
))
4730 gsi
= gsi_last_bb (bb
);
4731 if (gsi_end_p (gsi
))
4738 lhs
= gimple_assign_lhs (stmt
);
4739 rhs1
= gimple_assign_rhs1 (stmt
);
4740 rhs2
= gimple_assign_rhs2 (stmt
);
4742 /* For non-bit or min/max operations we can't associate
4743 all types. Verify that here. */
4744 if (rhs_code
!= BIT_IOR_EXPR
4745 && rhs_code
!= BIT_AND_EXPR
4746 && rhs_code
!= BIT_XOR_EXPR
4747 && rhs_code
!= MIN_EXPR
4748 && rhs_code
!= MAX_EXPR
4749 && (!can_reassociate_p (lhs
)
4750 || !can_reassociate_p (rhs1
)
4751 || !can_reassociate_p (rhs2
)))
4754 if (associative_tree_code (rhs_code
))
4756 auto_vec
<operand_entry_t
> ops
;
4757 tree powi_result
= NULL_TREE
;
4759 /* There may be no immediate uses left by the time we
4760 get here because we may have eliminated them all. */
4761 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4764 gimple_set_visited (stmt
, true);
4765 linearize_expr_tree (&ops
, stmt
, true, true);
4766 ops
.qsort (sort_by_operand_rank
);
4767 optimize_ops_list (rhs_code
, &ops
);
4768 if (undistribute_ops_list (rhs_code
, &ops
,
4769 loop_containing_stmt (stmt
)))
4771 ops
.qsort (sort_by_operand_rank
);
4772 optimize_ops_list (rhs_code
, &ops
);
4775 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4776 optimize_range_tests (rhs_code
, &ops
);
4778 if (first_pass_instance
4779 && rhs_code
== MULT_EXPR
4780 && flag_unsafe_math_optimizations
)
4781 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4783 /* If the operand vector is now empty, all operands were
4784 consumed by the __builtin_powi optimization. */
4785 if (ops
.length () == 0)
4786 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4787 else if (ops
.length () == 1)
4789 tree last_op
= ops
.last ()->op
;
4792 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4795 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4799 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4800 int ops_num
= ops
.length ();
4801 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4804 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4806 "Width = %d was chosen for reassociation\n", width
);
4809 && ops
.length () > 3)
4810 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
4814 /* When there are three operands left, we want
4815 to make sure the ones that get the double
4816 binary op are chosen wisely. */
4817 int len
= ops
.length ();
4819 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4821 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4822 powi_result
!= NULL
);
4825 /* If we combined some repeated factors into a
4826 __builtin_powi call, multiply that result by the
4827 reassociated operands. */
4830 gimple mul_stmt
, lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4831 tree type
= TREE_TYPE (lhs
);
4832 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4834 gimple_set_lhs (lhs_stmt
, target_ssa
);
4835 update_stmt (lhs_stmt
);
4837 target_ssa
= new_lhs
;
4838 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
4839 powi_result
, target_ssa
);
4840 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4841 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4847 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4849 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4850 reassociate_bb (son
);
4853 /* Add jumps around shifts for range tests turned into bit tests.
4854 For each SSA_NAME VAR we have code like:
4855 VAR = ...; // final stmt of range comparison
4856 // bit test here...;
4857 OTHERVAR = ...; // final stmt of the bit test sequence
4858 RES = VAR | OTHERVAR;
4859 Turn the above into:
4866 // bit test here...;
4869 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4877 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
4879 gimple def_stmt
= SSA_NAME_DEF_STMT (var
);
4882 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
4884 && is_gimple_assign (use_stmt
)
4885 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
4886 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
4888 basic_block cond_bb
= gimple_bb (def_stmt
);
4889 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
4890 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
4892 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
4893 gimple g
= gimple_build_cond (NE_EXPR
, var
,
4894 build_zero_cst (TREE_TYPE (var
)),
4895 NULL_TREE
, NULL_TREE
);
4896 location_t loc
= gimple_location (use_stmt
);
4897 gimple_set_location (g
, loc
);
4898 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
4900 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
4901 etrue
->probability
= REG_BR_PROB_BASE
/ 2;
4902 etrue
->count
= cond_bb
->count
/ 2;
4903 edge efalse
= find_edge (cond_bb
, then_bb
);
4904 efalse
->flags
= EDGE_FALSE_VALUE
;
4905 efalse
->probability
-= etrue
->probability
;
4906 efalse
->count
-= etrue
->count
;
4907 then_bb
->count
-= etrue
->count
;
4909 tree othervar
= NULL_TREE
;
4910 if (gimple_assign_rhs1 (use_stmt
) == var
)
4911 othervar
= gimple_assign_rhs2 (use_stmt
);
4912 else if (gimple_assign_rhs2 (use_stmt
) == var
)
4913 othervar
= gimple_assign_rhs1 (use_stmt
);
4916 tree lhs
= gimple_assign_lhs (use_stmt
);
4917 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
4918 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
4919 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
4920 gsi
= gsi_for_stmt (use_stmt
);
4921 gsi_remove (&gsi
, true);
4923 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
4924 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
4926 reassoc_branch_fixups
.release ();
4929 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4930 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4932 /* Dump the operand entry vector OPS to FILE. */
4935 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4940 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4942 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4943 print_generic_expr (file
, oe
->op
, 0);
4947 /* Dump the operand entry vector OPS to STDERR. */
4950 debug_ops_vector (vec
<operand_entry_t
> ops
)
4952 dump_ops_vector (stderr
, ops
);
4958 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
4959 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
4962 /* Initialize the reassociation pass. */
4969 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
4971 /* Find the loops, so that we can prevent moving calculations in
4973 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
4975 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
4977 next_operand_entry_id
= 0;
4979 /* Reverse RPO (Reverse Post Order) will give us something where
4980 deeper loops come later. */
4981 pre_and_rev_post_order_compute (NULL
, bbs
, false);
4982 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
4983 operand_rank
= new hash_map
<tree
, long>;
4985 /* Give each default definition a distinct rank. This includes
4986 parameters and the static chain. Walk backwards over all
4987 SSA names so that we get proper rank ordering according
4988 to tree_swap_operands_p. */
4989 for (i
= num_ssa_names
- 1; i
> 0; --i
)
4991 tree name
= ssa_name (i
);
4992 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
4993 insert_operand_rank (name
, ++rank
);
4996 /* Set up rank for each BB */
4997 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
4998 bb_rank
[bbs
[i
]] = ++rank
<< 16;
5001 calculate_dominance_info (CDI_POST_DOMINATORS
);
5002 plus_negates
= vNULL
;
5005 /* Cleanup after the reassociation pass, and print stats if
5011 statistics_counter_event (cfun
, "Linearized",
5012 reassociate_stats
.linearized
);
5013 statistics_counter_event (cfun
, "Constants eliminated",
5014 reassociate_stats
.constants_eliminated
);
5015 statistics_counter_event (cfun
, "Ops eliminated",
5016 reassociate_stats
.ops_eliminated
);
5017 statistics_counter_event (cfun
, "Statements rewritten",
5018 reassociate_stats
.rewritten
);
5019 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
5020 reassociate_stats
.pows_encountered
);
5021 statistics_counter_event (cfun
, "Built-in powi calls created",
5022 reassociate_stats
.pows_created
);
5024 delete operand_rank
;
5025 operand_entry_pool
.release ();
5027 plus_negates
.release ();
5028 free_dominance_info (CDI_POST_DOMINATORS
);
5029 loop_optimizer_finalize ();
5032 /* Gate and execute functions for Reassociation. */
5035 execute_reassoc (void)
5040 repropagate_negates ();
5049 const pass_data pass_data_reassoc
=
5051 GIMPLE_PASS
, /* type */
5052 "reassoc", /* name */
5053 OPTGROUP_NONE
, /* optinfo_flags */
5054 TV_TREE_REASSOC
, /* tv_id */
5055 ( PROP_cfg
| PROP_ssa
), /* properties_required */
5056 0, /* properties_provided */
5057 0, /* properties_destroyed */
5058 0, /* todo_flags_start */
5059 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
5062 class pass_reassoc
: public gimple_opt_pass
5065 pass_reassoc (gcc::context
*ctxt
)
5066 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
5069 /* opt_pass methods: */
5070 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
5071 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
5072 virtual unsigned int execute (function
*) { return execute_reassoc (); }
5074 }; // class pass_reassoc
5079 make_pass_reassoc (gcc::context
*ctxt
)
5081 return new pass_reassoc (ctxt
);