1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
24 #include "hash-table.h"
29 #include "stor-layout.h"
35 #include "hard-reg-set.h"
38 #include "dominance.h"
41 #include "basic-block.h"
42 #include "gimple-pretty-print.h"
43 #include "tree-inline.h"
45 #include "tree-ssa-alias.h"
46 #include "internal-fn.h"
47 #include "gimple-fold.h"
49 #include "gimple-expr.h"
52 #include "gimple-iterator.h"
53 #include "gimplify-me.h"
54 #include "gimple-ssa.h"
56 #include "tree-phinodes.h"
57 #include "ssa-iterators.h"
58 #include "stringpool.h"
59 #include "tree-ssanames.h"
60 #include "tree-ssa-loop-niter.h"
61 #include "tree-ssa-loop.h"
65 #include "tree-iterator.h"
66 #include "tree-pass.h"
67 #include "alloc-pool.h"
68 #include "langhooks.h"
73 #include "diagnostic-core.h"
76 #include "insn-codes.h"
79 /* This is a simple global reassociation pass. It is, in part, based
80 on the LLVM pass of the same name (They do some things more/less
81 than we do, in different orders, etc).
83 It consists of five steps:
85 1. Breaking up subtract operations into addition + negate, where
86 it would promote the reassociation of adds.
88 2. Left linearization of the expression trees, so that (A+B)+(C+D)
89 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
90 During linearization, we place the operands of the binary
91 expressions into a vector of operand_entry_t
93 3. Optimization of the operand lists, eliminating things like a +
96 3a. Combine repeated factors with the same occurrence counts
97 into a __builtin_powi call that will later be optimized into
98 an optimal number of multiplies.
100 4. Rewrite the expression trees we linearized and optimized so
101 they are in proper rank order.
103 5. Repropagate negates, as nothing else will clean it up ATM.
105 A bit of theory on #4, since nobody seems to write anything down
106 about why it makes sense to do it the way they do it:
108 We could do this much nicer theoretically, but don't (for reasons
109 explained after how to do it theoretically nice :P).
111 In order to promote the most redundancy elimination, you want
112 binary expressions whose operands are the same rank (or
113 preferably, the same value) exposed to the redundancy eliminator,
114 for possible elimination.
116 So the way to do this if we really cared, is to build the new op
117 tree from the leaves to the roots, merging as you go, and putting the
118 new op on the end of the worklist, until you are left with one
119 thing on the worklist.
121 IE if you have to rewrite the following set of operands (listed with
122 rank in parentheses), with opcode PLUS_EXPR:
124 a (1), b (1), c (1), d (2), e (2)
127 We start with our merge worklist empty, and the ops list with all of
130 You want to first merge all leaves of the same rank, as much as
133 So first build a binary op of
135 mergetmp = a + b, and put "mergetmp" on the merge worklist.
137 Because there is no three operand form of PLUS_EXPR, c is not going to
138 be exposed to redundancy elimination as a rank 1 operand.
140 So you might as well throw it on the merge worklist (you could also
141 consider it to now be a rank two operand, and merge it with d and e,
142 but in this case, you then have evicted e from a binary op. So at
143 least in this situation, you can't win.)
145 Then build a binary op of d + e
148 and put mergetmp2 on the merge worklist.
150 so merge worklist = {mergetmp, c, mergetmp2}
152 Continue building binary ops of these operations until you have only
153 one operation left on the worklist.
158 mergetmp3 = mergetmp + c
160 worklist = {mergetmp2, mergetmp3}
162 mergetmp4 = mergetmp2 + mergetmp3
164 worklist = {mergetmp4}
166 because we have one operation left, we can now just set the original
167 statement equal to the result of that operation.
169 This will at least expose a + b and d + e to redundancy elimination
170 as binary operations.
172 For extra points, you can reuse the old statements to build the
173 mergetmps, since you shouldn't run out.
175 So why don't we do this?
177 Because it's expensive, and rarely will help. Most trees we are
178 reassociating have 3 or less ops. If they have 2 ops, they already
179 will be written into a nice single binary op. If you have 3 ops, a
180 single simple check suffices to tell you whether the first two are of the
181 same rank. If so, you know to order it
184 newstmt = mergetmp + op3
188 newstmt = mergetmp + op1
190 If all three are of the same rank, you can't expose them all in a
191 single binary operator anyway, so the above is *still* the best you
194 Thus, this is what we do. When we have three ops left, we check to see
195 what order to put them in, and call it a day. As a nod to vector sum
196 reduction, we check if any of the ops are really a phi node that is a
197 destructive update for the associating op, and keep the destructive
198 update together for vector sum reduction recognition. */
205 int constants_eliminated
;
208 int pows_encountered
;
212 /* Operator, rank pair. */
213 typedef struct operand_entry
221 static alloc_pool operand_entry_pool
;
223 /* This is used to assign a unique ID to each struct operand_entry
224 so that qsort results are identical on different hosts. */
225 static int next_operand_entry_id
;
227 /* Starting rank number for a given basic block, so that we can rank
228 operations using unmovable instructions in that BB based on the bb
230 static long *bb_rank
;
232 /* Operand->rank hashtable. */
233 static hash_map
<tree
, long> *operand_rank
;
235 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
236 all basic blocks the CFG should be adjusted - basic blocks
237 split right after that SSA_NAME's definition statement and before
238 the only use, which must be a bit ior. */
239 static vec
<tree
> reassoc_branch_fixups
;
242 static long get_rank (tree
);
243 static bool reassoc_stmt_dominates_stmt_p (gimple
, gimple
);
245 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
246 possibly added by gsi_remove. */
249 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
251 gimple stmt
= gsi_stmt (*gsi
);
253 if (!MAY_HAVE_DEBUG_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
254 return gsi_remove (gsi
, true);
256 gimple_stmt_iterator prev
= *gsi
;
258 unsigned uid
= gimple_uid (stmt
);
259 basic_block bb
= gimple_bb (stmt
);
260 bool ret
= gsi_remove (gsi
, true);
261 if (!gsi_end_p (prev
))
264 prev
= gsi_start_bb (bb
);
265 gimple end_stmt
= gsi_stmt (*gsi
);
266 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
268 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
269 gimple_set_uid (stmt
, uid
);
275 /* Bias amount for loop-carried phis. We want this to be larger than
276 the depth of any reassociation tree we can see, but not larger than
277 the rank difference between two blocks. */
278 #define PHI_LOOP_BIAS (1 << 15)
280 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
281 an innermost loop, and the phi has only a single use which is inside
282 the loop, then the rank is the block rank of the loop latch plus an
283 extra bias for the loop-carried dependence. This causes expressions
284 calculated into an accumulator variable to be independent for each
285 iteration of the loop. If STMT is some other phi, the rank is the
286 block rank of its containing block. */
288 phi_rank (gimple stmt
)
290 basic_block bb
= gimple_bb (stmt
);
291 struct loop
*father
= bb
->loop_father
;
297 /* We only care about real loops (those with a latch). */
299 return bb_rank
[bb
->index
];
301 /* Interesting phis must be in headers of innermost loops. */
302 if (bb
!= father
->header
304 return bb_rank
[bb
->index
];
306 /* Ignore virtual SSA_NAMEs. */
307 res
= gimple_phi_result (stmt
);
308 if (virtual_operand_p (res
))
309 return bb_rank
[bb
->index
];
311 /* The phi definition must have a single use, and that use must be
312 within the loop. Otherwise this isn't an accumulator pattern. */
313 if (!single_imm_use (res
, &use
, &use_stmt
)
314 || gimple_bb (use_stmt
)->loop_father
!= father
)
315 return bb_rank
[bb
->index
];
317 /* Look for phi arguments from within the loop. If found, bias this phi. */
318 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
320 tree arg
= gimple_phi_arg_def (stmt
, i
);
321 if (TREE_CODE (arg
) == SSA_NAME
322 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
324 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
325 if (gimple_bb (def_stmt
)->loop_father
== father
)
326 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
330 /* Must be an uninteresting phi. */
331 return bb_rank
[bb
->index
];
334 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
335 loop-carried dependence of an innermost loop, return TRUE; else
338 loop_carried_phi (tree exp
)
343 if (TREE_CODE (exp
) != SSA_NAME
344 || SSA_NAME_IS_DEFAULT_DEF (exp
))
347 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
349 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
352 /* Non-loop-carried phis have block rank. Loop-carried phis have
353 an additional bias added in. If this phi doesn't have block rank,
354 it's biased and should not be propagated. */
355 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
357 if (phi_rank (phi_stmt
) != block_rank
)
363 /* Return the maximum of RANK and the rank that should be propagated
364 from expression OP. For most operands, this is just the rank of OP.
365 For loop-carried phis, the value is zero to avoid undoing the bias
366 in favor of the phi. */
368 propagate_rank (long rank
, tree op
)
372 if (loop_carried_phi (op
))
375 op_rank
= get_rank (op
);
377 return MAX (rank
, op_rank
);
380 /* Look up the operand rank structure for expression E. */
383 find_operand_rank (tree e
)
385 long *slot
= operand_rank
->get (e
);
386 return slot
? *slot
: -1;
389 /* Insert {E,RANK} into the operand rank hashtable. */
392 insert_operand_rank (tree e
, long rank
)
394 gcc_assert (rank
> 0);
395 gcc_assert (!operand_rank
->put (e
, rank
));
398 /* Given an expression E, return the rank of the expression. */
403 /* Constants have rank 0. */
404 if (is_gimple_min_invariant (e
))
407 /* SSA_NAME's have the rank of the expression they are the result
409 For globals and uninitialized values, the rank is 0.
410 For function arguments, use the pre-setup rank.
411 For PHI nodes, stores, asm statements, etc, we use the rank of
413 For simple operations, the rank is the maximum rank of any of
414 its operands, or the bb_rank, whichever is less.
415 I make no claims that this is optimal, however, it gives good
418 /* We make an exception to the normal ranking system to break
419 dependences of accumulator variables in loops. Suppose we
420 have a simple one-block loop containing:
427 As shown, each iteration of the calculation into x is fully
428 dependent upon the iteration before it. We would prefer to
429 see this in the form:
436 If the loop is unrolled, the calculations of b and c from
437 different iterations can be interleaved.
439 To obtain this result during reassociation, we bias the rank
440 of the phi definition x_1 upward, when it is recognized as an
441 accumulator pattern. The artificial rank causes it to be
442 added last, providing the desired independence. */
444 if (TREE_CODE (e
) == SSA_NAME
)
451 if (SSA_NAME_IS_DEFAULT_DEF (e
))
452 return find_operand_rank (e
);
454 stmt
= SSA_NAME_DEF_STMT (e
);
455 if (gimple_code (stmt
) == GIMPLE_PHI
)
456 return phi_rank (stmt
);
458 if (!is_gimple_assign (stmt
)
459 || gimple_vdef (stmt
))
460 return bb_rank
[gimple_bb (stmt
)->index
];
462 /* If we already have a rank for this expression, use that. */
463 rank
= find_operand_rank (e
);
467 /* Otherwise, find the maximum rank for the operands. As an
468 exception, remove the bias from loop-carried phis when propagating
469 the rank so that dependent operations are not also biased. */
471 if (gimple_assign_single_p (stmt
))
473 tree rhs
= gimple_assign_rhs1 (stmt
);
474 n
= TREE_OPERAND_LENGTH (rhs
);
476 rank
= propagate_rank (rank
, rhs
);
479 for (i
= 0; i
< n
; i
++)
481 op
= TREE_OPERAND (rhs
, i
);
484 rank
= propagate_rank (rank
, op
);
490 n
= gimple_num_ops (stmt
);
491 for (i
= 1; i
< n
; i
++)
493 op
= gimple_op (stmt
, i
);
495 rank
= propagate_rank (rank
, op
);
499 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
501 fprintf (dump_file
, "Rank for ");
502 print_generic_expr (dump_file
, e
, 0);
503 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
506 /* Note the rank in the hashtable so we don't recompute it. */
507 insert_operand_rank (e
, (rank
+ 1));
511 /* Globals, etc, are rank 0 */
516 /* We want integer ones to end up last no matter what, since they are
517 the ones we can do the most with. */
518 #define INTEGER_CONST_TYPE 1 << 3
519 #define FLOAT_CONST_TYPE 1 << 2
520 #define OTHER_CONST_TYPE 1 << 1
522 /* Classify an invariant tree into integer, float, or other, so that
523 we can sort them to be near other constants of the same type. */
525 constant_type (tree t
)
527 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
528 return INTEGER_CONST_TYPE
;
529 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
530 return FLOAT_CONST_TYPE
;
532 return OTHER_CONST_TYPE
;
535 /* qsort comparison function to sort operand entries PA and PB by rank
536 so that the sorted array is ordered by rank in decreasing order. */
538 sort_by_operand_rank (const void *pa
, const void *pb
)
540 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
541 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
543 /* It's nicer for optimize_expression if constants that are likely
544 to fold when added/multiplied//whatever are put next to each
545 other. Since all constants have rank 0, order them by type. */
546 if (oeb
->rank
== 0 && oea
->rank
== 0)
548 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
549 return constant_type (oeb
->op
) - constant_type (oea
->op
);
551 /* To make sorting result stable, we use unique IDs to determine
553 return oeb
->id
- oea
->id
;
556 /* Lastly, make sure the versions that are the same go next to each
558 if ((oeb
->rank
- oea
->rank
== 0)
559 && TREE_CODE (oea
->op
) == SSA_NAME
560 && TREE_CODE (oeb
->op
) == SSA_NAME
)
562 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
563 versions of removed SSA_NAMEs, so if possible, prefer to sort
564 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
566 if (!SSA_NAME_IS_DEFAULT_DEF (oea
->op
)
567 && !SSA_NAME_IS_DEFAULT_DEF (oeb
->op
)
568 && SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
570 gimple stmta
= SSA_NAME_DEF_STMT (oea
->op
);
571 gimple stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
572 basic_block bba
= gimple_bb (stmta
);
573 basic_block bbb
= gimple_bb (stmtb
);
576 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
577 return bb_rank
[bbb
->index
] - bb_rank
[bba
->index
];
581 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
582 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
588 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
589 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
591 return oeb
->id
- oea
->id
;
594 if (oeb
->rank
!= oea
->rank
)
595 return oeb
->rank
- oea
->rank
;
597 return oeb
->id
- oea
->id
;
600 /* Add an operand entry to *OPS for the tree operand OP. */
603 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
605 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
608 oe
->rank
= get_rank (op
);
609 oe
->id
= next_operand_entry_id
++;
614 /* Add an operand entry to *OPS for the tree operand OP with repeat
618 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
619 HOST_WIDE_INT repeat
)
621 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
624 oe
->rank
= get_rank (op
);
625 oe
->id
= next_operand_entry_id
++;
629 reassociate_stats
.pows_encountered
++;
632 /* Return true if STMT is reassociable operation containing a binary
633 operation with tree code CODE, and is inside LOOP. */
636 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
638 basic_block bb
= gimple_bb (stmt
);
640 if (gimple_bb (stmt
) == NULL
)
643 if (!flow_bb_inside_loop_p (loop
, bb
))
646 if (is_gimple_assign (stmt
)
647 && gimple_assign_rhs_code (stmt
) == code
648 && has_single_use (gimple_assign_lhs (stmt
)))
655 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
656 operand of the negate operation. Otherwise, return NULL. */
659 get_unary_op (tree name
, enum tree_code opcode
)
661 gimple stmt
= SSA_NAME_DEF_STMT (name
);
663 if (!is_gimple_assign (stmt
))
666 if (gimple_assign_rhs_code (stmt
) == opcode
)
667 return gimple_assign_rhs1 (stmt
);
671 /* If CURR and LAST are a pair of ops that OPCODE allows us to
672 eliminate through equivalences, do so, remove them from OPS, and
673 return true. Otherwise, return false. */
676 eliminate_duplicate_pair (enum tree_code opcode
,
677 vec
<operand_entry_t
> *ops
,
680 operand_entry_t curr
,
681 operand_entry_t last
)
684 /* If we have two of the same op, and the opcode is & |, min, or max,
685 we can eliminate one of them.
686 If we have two of the same op, and the opcode is ^, we can
687 eliminate both of them. */
689 if (last
&& last
->op
== curr
->op
)
697 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
699 fprintf (dump_file
, "Equivalence: ");
700 print_generic_expr (dump_file
, curr
->op
, 0);
701 fprintf (dump_file
, " [&|minmax] ");
702 print_generic_expr (dump_file
, last
->op
, 0);
703 fprintf (dump_file
, " -> ");
704 print_generic_stmt (dump_file
, last
->op
, 0);
707 ops
->ordered_remove (i
);
708 reassociate_stats
.ops_eliminated
++;
713 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
715 fprintf (dump_file
, "Equivalence: ");
716 print_generic_expr (dump_file
, curr
->op
, 0);
717 fprintf (dump_file
, " ^ ");
718 print_generic_expr (dump_file
, last
->op
, 0);
719 fprintf (dump_file
, " -> nothing\n");
722 reassociate_stats
.ops_eliminated
+= 2;
724 if (ops
->length () == 2)
727 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
732 ops
->ordered_remove (i
-1);
733 ops
->ordered_remove (i
-1);
745 static vec
<tree
> plus_negates
;
747 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
748 expression, look in OPS for a corresponding positive operation to cancel
749 it out. If we find one, remove the other from OPS, replace
750 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
754 eliminate_plus_minus_pair (enum tree_code opcode
,
755 vec
<operand_entry_t
> *ops
,
756 unsigned int currindex
,
757 operand_entry_t curr
)
764 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
767 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
768 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
769 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
772 /* Any non-negated version will have a rank that is one less than
773 the current rank. So once we hit those ranks, if we don't find
776 for (i
= currindex
+ 1;
777 ops
->iterate (i
, &oe
)
778 && oe
->rank
>= curr
->rank
- 1 ;
781 if (oe
->op
== negateop
)
784 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
786 fprintf (dump_file
, "Equivalence: ");
787 print_generic_expr (dump_file
, negateop
, 0);
788 fprintf (dump_file
, " + -");
789 print_generic_expr (dump_file
, oe
->op
, 0);
790 fprintf (dump_file
, " -> 0\n");
793 ops
->ordered_remove (i
);
794 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
795 ops
->ordered_remove (currindex
);
796 reassociate_stats
.ops_eliminated
++;
800 else if (oe
->op
== notop
)
802 tree op_type
= TREE_TYPE (oe
->op
);
804 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
806 fprintf (dump_file
, "Equivalence: ");
807 print_generic_expr (dump_file
, notop
, 0);
808 fprintf (dump_file
, " + ~");
809 print_generic_expr (dump_file
, oe
->op
, 0);
810 fprintf (dump_file
, " -> -1\n");
813 ops
->ordered_remove (i
);
814 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
815 ops
->ordered_remove (currindex
);
816 reassociate_stats
.ops_eliminated
++;
822 /* CURR->OP is a negate expr in a plus expr: save it for later
823 inspection in repropagate_negates(). */
824 if (negateop
!= NULL_TREE
)
825 plus_negates
.safe_push (curr
->op
);
830 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
831 bitwise not expression, look in OPS for a corresponding operand to
832 cancel it out. If we find one, remove the other from OPS, replace
833 OPS[CURRINDEX] with 0, and return true. Otherwise, return
837 eliminate_not_pairs (enum tree_code opcode
,
838 vec
<operand_entry_t
> *ops
,
839 unsigned int currindex
,
840 operand_entry_t curr
)
846 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
847 || TREE_CODE (curr
->op
) != SSA_NAME
)
850 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
851 if (notop
== NULL_TREE
)
854 /* Any non-not version will have a rank that is one less than
855 the current rank. So once we hit those ranks, if we don't find
858 for (i
= currindex
+ 1;
859 ops
->iterate (i
, &oe
)
860 && oe
->rank
>= curr
->rank
- 1;
865 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
867 fprintf (dump_file
, "Equivalence: ");
868 print_generic_expr (dump_file
, notop
, 0);
869 if (opcode
== BIT_AND_EXPR
)
870 fprintf (dump_file
, " & ~");
871 else if (opcode
== BIT_IOR_EXPR
)
872 fprintf (dump_file
, " | ~");
873 print_generic_expr (dump_file
, oe
->op
, 0);
874 if (opcode
== BIT_AND_EXPR
)
875 fprintf (dump_file
, " -> 0\n");
876 else if (opcode
== BIT_IOR_EXPR
)
877 fprintf (dump_file
, " -> -1\n");
880 if (opcode
== BIT_AND_EXPR
)
881 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
882 else if (opcode
== BIT_IOR_EXPR
)
883 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
885 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
887 ops
->quick_push (oe
);
895 /* Use constant value that may be present in OPS to try to eliminate
896 operands. Note that this function is only really used when we've
897 eliminated ops for other reasons, or merged constants. Across
898 single statements, fold already does all of this, plus more. There
899 is little point in duplicating logic, so I've only included the
900 identities that I could ever construct testcases to trigger. */
903 eliminate_using_constants (enum tree_code opcode
,
904 vec
<operand_entry_t
> *ops
)
906 operand_entry_t oelast
= ops
->last ();
907 tree type
= TREE_TYPE (oelast
->op
);
909 if (oelast
->rank
== 0
910 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
915 if (integer_zerop (oelast
->op
))
917 if (ops
->length () != 1)
919 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
920 fprintf (dump_file
, "Found & 0, removing all other ops\n");
922 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
925 ops
->quick_push (oelast
);
929 else if (integer_all_onesp (oelast
->op
))
931 if (ops
->length () != 1)
933 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
934 fprintf (dump_file
, "Found & -1, removing\n");
936 reassociate_stats
.ops_eliminated
++;
941 if (integer_all_onesp (oelast
->op
))
943 if (ops
->length () != 1)
945 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
946 fprintf (dump_file
, "Found | -1, removing all other ops\n");
948 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
951 ops
->quick_push (oelast
);
955 else if (integer_zerop (oelast
->op
))
957 if (ops
->length () != 1)
959 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
960 fprintf (dump_file
, "Found | 0, removing\n");
962 reassociate_stats
.ops_eliminated
++;
967 if (integer_zerop (oelast
->op
)
968 || (FLOAT_TYPE_P (type
)
969 && !HONOR_NANS (type
)
970 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type
))
971 && real_zerop (oelast
->op
)))
973 if (ops
->length () != 1)
975 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
976 fprintf (dump_file
, "Found * 0, removing all other ops\n");
978 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
980 ops
->quick_push (oelast
);
984 else if (integer_onep (oelast
->op
)
985 || (FLOAT_TYPE_P (type
)
986 && !HONOR_SNANS (TYPE_MODE (type
))
987 && real_onep (oelast
->op
)))
989 if (ops
->length () != 1)
991 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
992 fprintf (dump_file
, "Found * 1, removing\n");
994 reassociate_stats
.ops_eliminated
++;
1002 if (integer_zerop (oelast
->op
)
1003 || (FLOAT_TYPE_P (type
)
1004 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1005 && fold_real_zero_addition_p (type
, oelast
->op
,
1006 opcode
== MINUS_EXPR
)))
1008 if (ops
->length () != 1)
1010 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1011 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1013 reassociate_stats
.ops_eliminated
++;
1025 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
1028 /* Structure for tracking and counting operands. */
1029 typedef struct oecount_s
{
1032 enum tree_code oecode
;
1037 /* The heap for the oecount hashtable and the sorted list of operands. */
1038 static vec
<oecount
> cvec
;
1041 /* Oecount hashtable helpers. */
1043 struct oecount_hasher
1045 typedef int value_type
;
1046 typedef int compare_type
;
1047 typedef int store_values_directly
;
1048 static inline hashval_t
hash (const value_type
&);
1049 static inline bool equal (const value_type
&, const compare_type
&);
1050 static bool is_deleted (int &v
) { return v
== 1; }
1051 static void mark_deleted (int &e
) { e
= 1; }
1052 static bool is_empty (int &v
) { return v
== 0; }
1053 static void mark_empty (int &e
) { e
= 0; }
1054 static void remove (int &) {}
1057 /* Hash function for oecount. */
1060 oecount_hasher::hash (const value_type
&p
)
1062 const oecount
*c
= &cvec
[p
- 42];
1063 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1066 /* Comparison function for oecount. */
1069 oecount_hasher::equal (const value_type
&p1
, const compare_type
&p2
)
1071 const oecount
*c1
= &cvec
[p1
- 42];
1072 const oecount
*c2
= &cvec
[p2
- 42];
1073 return (c1
->oecode
== c2
->oecode
1074 && c1
->op
== c2
->op
);
1077 /* Comparison function for qsort sorting oecount elements by count. */
1080 oecount_cmp (const void *p1
, const void *p2
)
1082 const oecount
*c1
= (const oecount
*)p1
;
1083 const oecount
*c2
= (const oecount
*)p2
;
1084 if (c1
->cnt
!= c2
->cnt
)
1085 return c1
->cnt
- c2
->cnt
;
1087 /* If counts are identical, use unique IDs to stabilize qsort. */
1088 return c1
->id
- c2
->id
;
1091 /* Return TRUE iff STMT represents a builtin call that raises OP
1092 to some exponent. */
1095 stmt_is_power_of_op (gimple stmt
, tree op
)
1099 if (!is_gimple_call (stmt
))
1102 fndecl
= gimple_call_fndecl (stmt
);
1105 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1108 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1110 CASE_FLT_FN (BUILT_IN_POW
):
1111 CASE_FLT_FN (BUILT_IN_POWI
):
1112 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1119 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1120 in place and return the result. Assumes that stmt_is_power_of_op
1121 was previously called for STMT and returned TRUE. */
1123 static HOST_WIDE_INT
1124 decrement_power (gimple stmt
)
1126 REAL_VALUE_TYPE c
, cint
;
1127 HOST_WIDE_INT power
;
1130 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1132 CASE_FLT_FN (BUILT_IN_POW
):
1133 arg1
= gimple_call_arg (stmt
, 1);
1134 c
= TREE_REAL_CST (arg1
);
1135 power
= real_to_integer (&c
) - 1;
1136 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1137 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1140 CASE_FLT_FN (BUILT_IN_POWI
):
1141 arg1
= gimple_call_arg (stmt
, 1);
1142 power
= TREE_INT_CST_LOW (arg1
) - 1;
1143 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1151 /* Find the single immediate use of STMT's LHS, and replace it
1152 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1153 replace *DEF with OP as well. */
1156 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1161 gimple_stmt_iterator gsi
;
1163 if (is_gimple_call (stmt
))
1164 lhs
= gimple_call_lhs (stmt
);
1166 lhs
= gimple_assign_lhs (stmt
);
1168 gcc_assert (has_single_use (lhs
));
1169 single_imm_use (lhs
, &use
, &use_stmt
);
1173 if (TREE_CODE (op
) != SSA_NAME
)
1174 update_stmt (use_stmt
);
1175 gsi
= gsi_for_stmt (stmt
);
1176 unlink_stmt_vdef (stmt
);
1177 reassoc_remove_stmt (&gsi
);
1178 release_defs (stmt
);
1181 /* Walks the linear chain with result *DEF searching for an operation
1182 with operand OP and code OPCODE removing that from the chain. *DEF
1183 is updated if there is only one operand but no operation left. */
1186 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1188 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1194 if (opcode
== MULT_EXPR
1195 && stmt_is_power_of_op (stmt
, op
))
1197 if (decrement_power (stmt
) == 1)
1198 propagate_op_to_single_use (op
, stmt
, def
);
1202 name
= gimple_assign_rhs1 (stmt
);
1204 /* If this is the operation we look for and one of the operands
1205 is ours simply propagate the other operand into the stmts
1207 if (gimple_assign_rhs_code (stmt
) == opcode
1209 || gimple_assign_rhs2 (stmt
) == op
))
1212 name
= gimple_assign_rhs2 (stmt
);
1213 propagate_op_to_single_use (name
, stmt
, def
);
1217 /* We might have a multiply of two __builtin_pow* calls, and
1218 the operand might be hiding in the rightmost one. */
1219 if (opcode
== MULT_EXPR
1220 && gimple_assign_rhs_code (stmt
) == opcode
1221 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1222 && has_single_use (gimple_assign_rhs2 (stmt
)))
1224 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1225 if (stmt_is_power_of_op (stmt2
, op
))
1227 if (decrement_power (stmt2
) == 1)
1228 propagate_op_to_single_use (op
, stmt2
, def
);
1233 /* Continue walking the chain. */
1234 gcc_assert (name
!= op
1235 && TREE_CODE (name
) == SSA_NAME
);
1236 stmt
= SSA_NAME_DEF_STMT (name
);
1241 /* Returns true if statement S1 dominates statement S2. Like
1242 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1245 reassoc_stmt_dominates_stmt_p (gimple s1
, gimple s2
)
1247 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1249 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1250 SSA_NAME. Assume it lives at the beginning of function and
1251 thus dominates everything. */
1252 if (!bb1
|| s1
== s2
)
1255 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1261 /* PHIs in the same basic block are assumed to be
1262 executed all in parallel, if only one stmt is a PHI,
1263 it dominates the other stmt in the same basic block. */
1264 if (gimple_code (s1
) == GIMPLE_PHI
)
1267 if (gimple_code (s2
) == GIMPLE_PHI
)
1270 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1272 if (gimple_uid (s1
) < gimple_uid (s2
))
1275 if (gimple_uid (s1
) > gimple_uid (s2
))
1278 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1279 unsigned int uid
= gimple_uid (s1
);
1280 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1282 gimple s
= gsi_stmt (gsi
);
1283 if (gimple_uid (s
) != uid
)
1292 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1295 /* Insert STMT after INSERT_POINT. */
1298 insert_stmt_after (gimple stmt
, gimple insert_point
)
1300 gimple_stmt_iterator gsi
;
1303 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1304 bb
= gimple_bb (insert_point
);
1305 else if (!stmt_ends_bb_p (insert_point
))
1307 gsi
= gsi_for_stmt (insert_point
);
1308 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1309 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1313 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1314 thus if it must end a basic block, it should be a call that can
1315 throw, or some assignment that can throw. If it throws, the LHS
1316 of it will not be initialized though, so only valid places using
1317 the SSA_NAME should be dominated by the fallthru edge. */
1318 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1319 gsi
= gsi_after_labels (bb
);
1320 if (gsi_end_p (gsi
))
1322 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1323 gimple_set_uid (stmt
,
1324 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1327 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1328 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1331 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1332 the result. Places the statement after the definition of either
1333 OP1 or OP2. Returns the new statement. */
1336 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1338 gimple op1def
= NULL
, op2def
= NULL
;
1339 gimple_stmt_iterator gsi
;
1343 /* Create the addition statement. */
1344 op
= make_ssa_name (type
);
1345 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1347 /* Find an insertion place and insert. */
1348 if (TREE_CODE (op1
) == SSA_NAME
)
1349 op1def
= SSA_NAME_DEF_STMT (op1
);
1350 if (TREE_CODE (op2
) == SSA_NAME
)
1351 op2def
= SSA_NAME_DEF_STMT (op2
);
1352 if ((!op1def
|| gimple_nop_p (op1def
))
1353 && (!op2def
|| gimple_nop_p (op2def
)))
1355 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1356 if (gsi_end_p (gsi
))
1358 gimple_stmt_iterator gsi2
1359 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1360 gimple_set_uid (sum
,
1361 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1364 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1365 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1369 gimple insert_point
;
1370 if ((!op1def
|| gimple_nop_p (op1def
))
1371 || (op2def
&& !gimple_nop_p (op2def
)
1372 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1373 insert_point
= op2def
;
1375 insert_point
= op1def
;
1376 insert_stmt_after (sum
, insert_point
);
1383 /* Perform un-distribution of divisions and multiplications.
1384 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1385 to (A + B) / X for real X.
1387 The algorithm is organized as follows.
1389 - First we walk the addition chain *OPS looking for summands that
1390 are defined by a multiplication or a real division. This results
1391 in the candidates bitmap with relevant indices into *OPS.
1393 - Second we build the chains of multiplications or divisions for
1394 these candidates, counting the number of occurrences of (operand, code)
1395 pairs in all of the candidates chains.
1397 - Third we sort the (operand, code) pairs by number of occurrence and
1398 process them starting with the pair with the most uses.
1400 * For each such pair we walk the candidates again to build a
1401 second candidate bitmap noting all multiplication/division chains
1402 that have at least one occurrence of (operand, code).
1404 * We build an alternate addition chain only covering these
1405 candidates with one (operand, code) operation removed from their
1406 multiplication/division chain.
1408 * The first candidate gets replaced by the alternate addition chain
1409 multiplied/divided by the operand.
1411 * All candidate chains get disabled for further processing and
1412 processing of (operand, code) pairs continues.
1414 The alternate addition chains built are re-processed by the main
1415 reassociation algorithm which allows optimizing a * x * y + b * y * x
1416 to (a + b ) * x * y in one invocation of the reassociation pass. */
1419 undistribute_ops_list (enum tree_code opcode
,
1420 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1422 unsigned int length
= ops
->length ();
1423 operand_entry_t oe1
;
1425 sbitmap candidates
, candidates2
;
1426 unsigned nr_candidates
, nr_candidates2
;
1427 sbitmap_iterator sbi0
;
1428 vec
<operand_entry_t
> *subops
;
1429 bool changed
= false;
1430 int next_oecount_id
= 0;
1433 || opcode
!= PLUS_EXPR
)
1436 /* Build a list of candidates to process. */
1437 candidates
= sbitmap_alloc (length
);
1438 bitmap_clear (candidates
);
1440 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1442 enum tree_code dcode
;
1445 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1447 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1448 if (!is_gimple_assign (oe1def
))
1450 dcode
= gimple_assign_rhs_code (oe1def
);
1451 if ((dcode
!= MULT_EXPR
1452 && dcode
!= RDIV_EXPR
)
1453 || !is_reassociable_op (oe1def
, dcode
, loop
))
1456 bitmap_set_bit (candidates
, i
);
1460 if (nr_candidates
< 2)
1462 sbitmap_free (candidates
);
1466 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1468 fprintf (dump_file
, "searching for un-distribute opportunities ");
1469 print_generic_expr (dump_file
,
1470 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1471 fprintf (dump_file
, " %d\n", nr_candidates
);
1474 /* Build linearized sub-operand lists and the counting table. */
1477 hash_table
<oecount_hasher
> ctable (15);
1479 /* ??? Macro arguments cannot have multi-argument template types in
1480 them. This typedef is needed to workaround that limitation. */
1481 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1482 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1483 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1486 enum tree_code oecode
;
1489 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1490 oecode
= gimple_assign_rhs_code (oedef
);
1491 linearize_expr_tree (&subops
[i
], oedef
,
1492 associative_tree_code (oecode
), false);
1494 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1501 c
.id
= next_oecount_id
++;
1504 idx
= cvec
.length () + 41;
1505 slot
= ctable
.find_slot (idx
, INSERT
);
1513 cvec
[*slot
- 42].cnt
++;
1518 /* Sort the counting table. */
1519 cvec
.qsort (oecount_cmp
);
1521 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1524 fprintf (dump_file
, "Candidates:\n");
1525 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1527 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1528 c
->oecode
== MULT_EXPR
1529 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1530 print_generic_expr (dump_file
, c
->op
, 0);
1531 fprintf (dump_file
, "\n");
1535 /* Process the (operand, code) pairs in order of most occurrence. */
1536 candidates2
= sbitmap_alloc (length
);
1537 while (!cvec
.is_empty ())
1539 oecount
*c
= &cvec
.last ();
1543 /* Now collect the operands in the outer chain that contain
1544 the common operand in their inner chain. */
1545 bitmap_clear (candidates2
);
1547 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1550 enum tree_code oecode
;
1552 tree op
= (*ops
)[i
]->op
;
1554 /* If we undistributed in this chain already this may be
1556 if (TREE_CODE (op
) != SSA_NAME
)
1559 oedef
= SSA_NAME_DEF_STMT (op
);
1560 oecode
= gimple_assign_rhs_code (oedef
);
1561 if (oecode
!= c
->oecode
)
1564 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1566 if (oe1
->op
== c
->op
)
1568 bitmap_set_bit (candidates2
, i
);
1575 if (nr_candidates2
>= 2)
1577 operand_entry_t oe1
, oe2
;
1579 int first
= bitmap_first_set_bit (candidates2
);
1581 /* Build the new addition chain. */
1582 oe1
= (*ops
)[first
];
1583 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1585 fprintf (dump_file
, "Building (");
1586 print_generic_expr (dump_file
, oe1
->op
, 0);
1588 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1589 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1593 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1595 fprintf (dump_file
, " + ");
1596 print_generic_expr (dump_file
, oe2
->op
, 0);
1598 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1599 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1600 oe1
->op
, oe2
->op
, opcode
);
1601 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1603 oe1
->op
= gimple_get_lhs (sum
);
1606 /* Apply the multiplication/division. */
1607 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1608 oe1
->op
, c
->op
, c
->oecode
);
1609 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1611 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1612 print_generic_expr (dump_file
, c
->op
, 0);
1613 fprintf (dump_file
, "\n");
1616 /* Record it in the addition chain and disable further
1617 undistribution with this op. */
1618 oe1
->op
= gimple_assign_lhs (prod
);
1619 oe1
->rank
= get_rank (oe1
->op
);
1620 subops
[first
].release ();
1628 for (i
= 0; i
< ops
->length (); ++i
)
1629 subops
[i
].release ();
1632 sbitmap_free (candidates
);
1633 sbitmap_free (candidates2
);
1638 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1639 expression, examine the other OPS to see if any of them are comparisons
1640 of the same values, which we may be able to combine or eliminate.
1641 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1644 eliminate_redundant_comparison (enum tree_code opcode
,
1645 vec
<operand_entry_t
> *ops
,
1646 unsigned int currindex
,
1647 operand_entry_t curr
)
1650 enum tree_code lcode
, rcode
;
1655 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1658 /* Check that CURR is a comparison. */
1659 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1661 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1662 if (!is_gimple_assign (def1
))
1664 lcode
= gimple_assign_rhs_code (def1
);
1665 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1667 op1
= gimple_assign_rhs1 (def1
);
1668 op2
= gimple_assign_rhs2 (def1
);
1670 /* Now look for a similar comparison in the remaining OPS. */
1671 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1675 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1677 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1678 if (!is_gimple_assign (def2
))
1680 rcode
= gimple_assign_rhs_code (def2
);
1681 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1684 /* If we got here, we have a match. See if we can combine the
1686 if (opcode
== BIT_IOR_EXPR
)
1687 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1688 rcode
, gimple_assign_rhs1 (def2
),
1689 gimple_assign_rhs2 (def2
));
1691 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1692 rcode
, gimple_assign_rhs1 (def2
),
1693 gimple_assign_rhs2 (def2
));
1697 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1698 always give us a boolean_type_node value back. If the original
1699 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1700 we need to convert. */
1701 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1702 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1704 if (TREE_CODE (t
) != INTEGER_CST
1705 && !operand_equal_p (t
, curr
->op
, 0))
1707 enum tree_code subcode
;
1708 tree newop1
, newop2
;
1709 if (!COMPARISON_CLASS_P (t
))
1711 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1712 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1713 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1714 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1718 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1720 fprintf (dump_file
, "Equivalence: ");
1721 print_generic_expr (dump_file
, curr
->op
, 0);
1722 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1723 print_generic_expr (dump_file
, oe
->op
, 0);
1724 fprintf (dump_file
, " -> ");
1725 print_generic_expr (dump_file
, t
, 0);
1726 fprintf (dump_file
, "\n");
1729 /* Now we can delete oe, as it has been subsumed by the new combined
1731 ops
->ordered_remove (i
);
1732 reassociate_stats
.ops_eliminated
++;
1734 /* If t is the same as curr->op, we're done. Otherwise we must
1735 replace curr->op with t. Special case is if we got a constant
1736 back, in which case we add it to the end instead of in place of
1737 the current entry. */
1738 if (TREE_CODE (t
) == INTEGER_CST
)
1740 ops
->ordered_remove (currindex
);
1741 add_to_ops_vec (ops
, t
);
1743 else if (!operand_equal_p (t
, curr
->op
, 0))
1746 enum tree_code subcode
;
1749 gcc_assert (COMPARISON_CLASS_P (t
));
1750 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1751 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1752 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1753 gcc_checking_assert (is_gimple_val (newop1
)
1754 && is_gimple_val (newop2
));
1755 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1756 curr
->op
= gimple_get_lhs (sum
);
1764 /* Perform various identities and other optimizations on the list of
1765 operand entries, stored in OPS. The tree code for the binary
1766 operation between all the operands is OPCODE. */
1769 optimize_ops_list (enum tree_code opcode
,
1770 vec
<operand_entry_t
> *ops
)
1772 unsigned int length
= ops
->length ();
1775 operand_entry_t oelast
= NULL
;
1776 bool iterate
= false;
1781 oelast
= ops
->last ();
1783 /* If the last two are constants, pop the constants off, merge them
1784 and try the next two. */
1785 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1787 operand_entry_t oelm1
= (*ops
)[length
- 2];
1789 if (oelm1
->rank
== 0
1790 && is_gimple_min_invariant (oelm1
->op
)
1791 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1792 TREE_TYPE (oelast
->op
)))
1794 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1795 oelm1
->op
, oelast
->op
);
1797 if (folded
&& is_gimple_min_invariant (folded
))
1799 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1800 fprintf (dump_file
, "Merging constants\n");
1805 add_to_ops_vec (ops
, folded
);
1806 reassociate_stats
.constants_eliminated
++;
1808 optimize_ops_list (opcode
, ops
);
1814 eliminate_using_constants (opcode
, ops
);
1817 for (i
= 0; ops
->iterate (i
, &oe
);)
1821 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1823 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1824 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1825 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1837 length
= ops
->length ();
1838 oelast
= ops
->last ();
1841 optimize_ops_list (opcode
, ops
);
1844 /* The following functions are subroutines to optimize_range_tests and allow
1845 it to try to change a logical combination of comparisons into a range
1849 X == 2 || X == 5 || X == 3 || X == 4
1853 (unsigned) (X - 2) <= 3
1855 For more information see comments above fold_test_range in fold-const.c,
1856 this implementation is for GIMPLE. */
1864 bool strict_overflow_p
;
1865 unsigned int idx
, next
;
1868 /* This is similar to make_range in fold-const.c, but on top of
1869 GIMPLE instead of trees. If EXP is non-NULL, it should be
1870 an SSA_NAME and STMT argument is ignored, otherwise STMT
1871 argument should be a GIMPLE_COND. */
1874 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1878 bool is_bool
, strict_overflow_p
;
1882 r
->strict_overflow_p
= false;
1884 r
->high
= NULL_TREE
;
1885 if (exp
!= NULL_TREE
1886 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1889 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1890 and see if we can refine the range. Some of the cases below may not
1891 happen, but it doesn't seem worth worrying about this. We "continue"
1892 the outer loop when we've changed something; otherwise we "break"
1893 the switch, which will "break" the while. */
1894 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1897 strict_overflow_p
= false;
1899 if (exp
== NULL_TREE
)
1901 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1903 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1908 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1913 enum tree_code code
;
1914 tree arg0
, arg1
, exp_type
;
1918 if (exp
!= NULL_TREE
)
1920 if (TREE_CODE (exp
) != SSA_NAME
1921 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
1924 stmt
= SSA_NAME_DEF_STMT (exp
);
1925 if (!is_gimple_assign (stmt
))
1928 code
= gimple_assign_rhs_code (stmt
);
1929 arg0
= gimple_assign_rhs1 (stmt
);
1930 arg1
= gimple_assign_rhs2 (stmt
);
1931 exp_type
= TREE_TYPE (exp
);
1935 code
= gimple_cond_code (stmt
);
1936 arg0
= gimple_cond_lhs (stmt
);
1937 arg1
= gimple_cond_rhs (stmt
);
1938 exp_type
= boolean_type_node
;
1941 if (TREE_CODE (arg0
) != SSA_NAME
)
1943 loc
= gimple_location (stmt
);
1947 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1948 /* Ensure the range is either +[-,0], +[0,0],
1949 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1950 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1951 or similar expression of unconditional true or
1952 false, it should not be negated. */
1953 && ((high
&& integer_zerop (high
))
1954 || (low
&& integer_onep (low
))))
1967 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1969 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1974 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1989 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1991 &strict_overflow_p
);
1992 if (nexp
!= NULL_TREE
)
1995 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2008 r
->strict_overflow_p
= strict_overflow_p
;
2012 /* Comparison function for qsort. Sort entries
2013 without SSA_NAME exp first, then with SSA_NAMEs sorted
2014 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2015 by increasing ->low and if ->low is the same, by increasing
2016 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2020 range_entry_cmp (const void *a
, const void *b
)
2022 const struct range_entry
*p
= (const struct range_entry
*) a
;
2023 const struct range_entry
*q
= (const struct range_entry
*) b
;
2025 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2027 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2029 /* Group range_entries for the same SSA_NAME together. */
2030 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2032 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2034 /* If ->low is different, NULL low goes first, then by
2036 if (p
->low
!= NULL_TREE
)
2038 if (q
->low
!= NULL_TREE
)
2040 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2042 if (tem
&& integer_onep (tem
))
2044 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2046 if (tem
&& integer_onep (tem
))
2052 else if (q
->low
!= NULL_TREE
)
2054 /* If ->high is different, NULL high goes last, before that by
2056 if (p
->high
!= NULL_TREE
)
2058 if (q
->high
!= NULL_TREE
)
2060 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2062 if (tem
&& integer_onep (tem
))
2064 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2066 if (tem
&& integer_onep (tem
))
2072 else if (q
->high
!= NULL_TREE
)
2074 /* If both ranges are the same, sort below by ascending idx. */
2079 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2082 if (p
->idx
< q
->idx
)
2086 gcc_checking_assert (p
->idx
> q
->idx
);
2091 /* Helper routine of optimize_range_test.
2092 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2093 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2094 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2095 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2096 an array of COUNT pointers to other ranges. Return
2097 true if the range merge has been successful.
2098 If OPCODE is ERROR_MARK, this is called from within
2099 maybe_optimize_range_tests and is performing inter-bb range optimization.
2100 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2104 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2105 struct range_entry
**otherrangep
,
2106 unsigned int count
, enum tree_code opcode
,
2107 vec
<operand_entry_t
> *ops
, tree exp
, gimple_seq seq
,
2108 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2110 operand_entry_t oe
= (*ops
)[range
->idx
];
2112 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) :
2113 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2114 location_t loc
= gimple_location (stmt
);
2115 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2116 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2118 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2119 gimple_stmt_iterator gsi
;
2122 if (tem
== NULL_TREE
)
2125 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2126 warning_at (loc
, OPT_Wstrict_overflow
,
2127 "assuming signed overflow does not occur "
2128 "when simplifying range test");
2130 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2132 struct range_entry
*r
;
2133 fprintf (dump_file
, "Optimizing range tests ");
2134 print_generic_expr (dump_file
, range
->exp
, 0);
2135 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2136 print_generic_expr (dump_file
, range
->low
, 0);
2137 fprintf (dump_file
, ", ");
2138 print_generic_expr (dump_file
, range
->high
, 0);
2139 fprintf (dump_file
, "]");
2140 for (i
= 0; i
< count
; i
++)
2146 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2147 print_generic_expr (dump_file
, r
->low
, 0);
2148 fprintf (dump_file
, ", ");
2149 print_generic_expr (dump_file
, r
->high
, 0);
2150 fprintf (dump_file
, "]");
2152 fprintf (dump_file
, "\n into ");
2153 print_generic_expr (dump_file
, tem
, 0);
2154 fprintf (dump_file
, "\n");
2157 if (opcode
== BIT_IOR_EXPR
2158 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2159 tem
= invert_truthvalue_loc (loc
, tem
);
2161 tem
= fold_convert_loc (loc
, optype
, tem
);
2162 gsi
= gsi_for_stmt (stmt
);
2163 /* In rare cases range->exp can be equal to lhs of stmt.
2164 In that case we have to insert after the stmt rather then before
2166 if (op
== range
->exp
)
2168 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2169 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2170 GSI_CONTINUE_LINKING
);
2174 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2175 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2179 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2180 if (gimple_uid (gsi_stmt (gsi
)))
2183 gimple_set_uid (gsi_stmt (gsi
), gimple_uid (stmt
));
2190 range
->strict_overflow_p
= false;
2192 for (i
= 0; i
< count
; i
++)
2195 range
= otherrange
+ i
;
2197 range
= otherrangep
[i
];
2198 oe
= (*ops
)[range
->idx
];
2199 /* Now change all the other range test immediate uses, so that
2200 those tests will be optimized away. */
2201 if (opcode
== ERROR_MARK
)
2204 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2205 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2207 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2208 ? boolean_false_node
: boolean_true_node
);
2211 oe
->op
= error_mark_node
;
2212 range
->exp
= NULL_TREE
;
2217 /* Optimize X == CST1 || X == CST2
2218 if popcount (CST1 ^ CST2) == 1 into
2219 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2220 Similarly for ranges. E.g.
2221 X != 2 && X != 3 && X != 10 && X != 11
2222 will be transformed by the previous optimization into
2223 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2224 and this loop can transform that into
2225 !(((X & ~8) - 2U) <= 1U). */
2228 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2229 tree lowi
, tree lowj
, tree highi
, tree highj
,
2230 vec
<operand_entry_t
> *ops
,
2231 struct range_entry
*rangei
,
2232 struct range_entry
*rangej
)
2234 tree lowxor
, highxor
, tem
, exp
;
2235 /* Check lowi ^ lowj == highi ^ highj and
2236 popcount (lowi ^ lowj) == 1. */
2237 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2238 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2240 if (!integer_pow2p (lowxor
))
2242 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2243 if (!tree_int_cst_equal (lowxor
, highxor
))
2246 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2247 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2248 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2249 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2250 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2251 NULL
, rangei
->in_p
, lowj
, highj
,
2252 rangei
->strict_overflow_p
2253 || rangej
->strict_overflow_p
))
2258 /* Optimize X == CST1 || X == CST2
2259 if popcount (CST2 - CST1) == 1 into
2260 ((X - CST1) & ~(CST2 - CST1)) == 0.
2261 Similarly for ranges. E.g.
2262 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2263 || X == 75 || X == 45
2264 will be transformed by the previous optimization into
2265 (X - 43U) <= 3U || (X - 75U) <= 3U
2266 and this loop can transform that into
2267 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2269 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2270 tree lowi
, tree lowj
, tree highi
, tree highj
,
2271 vec
<operand_entry_t
> *ops
,
2272 struct range_entry
*rangei
,
2273 struct range_entry
*rangej
)
2275 tree tem1
, tem2
, mask
;
2276 /* Check highi - lowi == highj - lowj. */
2277 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2278 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2280 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2281 if (!tree_int_cst_equal (tem1
, tem2
))
2283 /* Check popcount (lowj - lowi) == 1. */
2284 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2285 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2287 if (!integer_pow2p (tem1
))
2290 type
= unsigned_type_for (type
);
2291 tem1
= fold_convert (type
, tem1
);
2292 tem2
= fold_convert (type
, tem2
);
2293 lowi
= fold_convert (type
, lowi
);
2294 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2295 tem1
= fold_binary (MINUS_EXPR
, type
,
2296 fold_convert (type
, rangei
->exp
), lowi
);
2297 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2298 lowj
= build_int_cst (type
, 0);
2299 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2300 NULL
, rangei
->in_p
, lowj
, tem2
,
2301 rangei
->strict_overflow_p
2302 || rangej
->strict_overflow_p
))
2307 /* It does some common checks for function optimize_range_tests_xor and
2308 optimize_range_tests_diff.
2309 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2310 Else it calls optimize_range_tests_diff. */
2313 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2314 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2315 struct range_entry
*ranges
)
2318 bool any_changes
= false;
2319 for (i
= first
; i
< length
; i
++)
2321 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2323 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2325 type
= TREE_TYPE (ranges
[i
].exp
);
2326 if (!INTEGRAL_TYPE_P (type
))
2328 lowi
= ranges
[i
].low
;
2329 if (lowi
== NULL_TREE
)
2330 lowi
= TYPE_MIN_VALUE (type
);
2331 highi
= ranges
[i
].high
;
2332 if (highi
== NULL_TREE
)
2334 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2337 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2339 lowj
= ranges
[j
].low
;
2340 if (lowj
== NULL_TREE
)
2342 highj
= ranges
[j
].high
;
2343 if (highj
== NULL_TREE
)
2344 highj
= TYPE_MAX_VALUE (type
);
2345 /* Check lowj > highi. */
2346 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2348 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2351 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2353 ranges
+ i
, ranges
+ j
);
2355 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2357 ranges
+ i
, ranges
+ j
);
2368 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2369 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2370 EXP on success, NULL otherwise. */
2373 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2374 wide_int
*mask
, tree
*totallowp
)
2376 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2377 if (tem
== NULL_TREE
2378 || TREE_CODE (tem
) != INTEGER_CST
2379 || TREE_OVERFLOW (tem
)
2380 || tree_int_cst_sgn (tem
) == -1
2381 || compare_tree_int (tem
, prec
) != -1)
2384 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2385 *mask
= wi::shifted_mask (0, max
, false, prec
);
2386 if (TREE_CODE (exp
) == BIT_AND_EXPR
2387 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2389 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2390 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2391 if (wi::popcount (msk
) == 1
2392 && wi::ltu_p (msk
, prec
- max
))
2394 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2395 max
+= msk
.to_uhwi ();
2396 exp
= TREE_OPERAND (exp
, 0);
2397 if (integer_zerop (low
)
2398 && TREE_CODE (exp
) == PLUS_EXPR
2399 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2402 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2403 TYPE_PRECISION (TREE_TYPE (low
))));
2404 tree tbias
= wide_int_to_tree (TREE_TYPE (low
), bias
);
2408 exp
= TREE_OPERAND (exp
, 0);
2412 else if (!tree_int_cst_lt (totallow
, tbias
))
2414 bias
-= wi::to_widest (totallow
);
2415 if (wi::ges_p (bias
, 0) && wi::lts_p (bias
, prec
- max
))
2417 *mask
= wi::lshift (*mask
, bias
);
2418 exp
= TREE_OPERAND (exp
, 0);
2427 if (!tree_int_cst_lt (totallow
, low
))
2429 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2430 if (tem
== NULL_TREE
2431 || TREE_CODE (tem
) != INTEGER_CST
2432 || TREE_OVERFLOW (tem
)
2433 || compare_tree_int (tem
, prec
- max
) == 1)
2436 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2440 /* Attempt to optimize small range tests using bit test.
2442 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2443 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2444 has been by earlier optimizations optimized into:
2445 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2446 As all the 43 through 82 range is less than 64 numbers,
2447 for 64-bit word targets optimize that into:
2448 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2451 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2452 vec
<operand_entry_t
> *ops
,
2453 struct range_entry
*ranges
)
2456 bool any_changes
= false;
2457 int prec
= GET_MODE_BITSIZE (word_mode
);
2458 auto_vec
<struct range_entry
*, 64> candidates
;
2460 for (i
= first
; i
< length
- 2; i
++)
2462 tree lowi
, highi
, lowj
, highj
, type
;
2464 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2466 type
= TREE_TYPE (ranges
[i
].exp
);
2467 if (!INTEGRAL_TYPE_P (type
))
2469 lowi
= ranges
[i
].low
;
2470 if (lowi
== NULL_TREE
)
2471 lowi
= TYPE_MIN_VALUE (type
);
2472 highi
= ranges
[i
].high
;
2473 if (highi
== NULL_TREE
)
2476 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2477 highi
, &mask
, &lowi
);
2478 if (exp
== NULL_TREE
)
2480 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2481 candidates
.truncate (0);
2482 int end
= MIN (i
+ 64, length
);
2483 for (j
= i
+ 1; j
< end
; j
++)
2486 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2488 if (ranges
[j
].exp
== exp
)
2490 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2492 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2495 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2497 exp2
= TREE_OPERAND (exp2
, 0);
2507 lowj
= ranges
[j
].low
;
2508 if (lowj
== NULL_TREE
)
2510 highj
= ranges
[j
].high
;
2511 if (highj
== NULL_TREE
)
2512 highj
= TYPE_MAX_VALUE (type
);
2514 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2515 highj
, &mask2
, NULL
);
2519 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2520 candidates
.safe_push (&ranges
[j
]);
2523 /* If we need otherwise 3 or more comparisons, use a bit test. */
2524 if (candidates
.length () >= 2)
2526 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2527 wi::to_widest (lowi
)
2528 + prec
- 1 - wi::clz (mask
));
2529 operand_entry_t oe
= (*ops
)[ranges
[i
].idx
];
2531 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
)
2532 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2533 location_t loc
= gimple_location (stmt
);
2534 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2536 /* See if it isn't cheaper to pretend the minimum value of the
2537 range is 0, if maximum value is small enough.
2538 We can avoid then subtraction of the minimum value, but the
2539 mask constant could be perhaps more expensive. */
2540 if (compare_tree_int (lowi
, 0) > 0
2541 && compare_tree_int (high
, prec
) < 0)
2544 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2545 rtx reg
= gen_raw_REG (word_mode
, 10000);
2546 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2547 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2548 GEN_INT (-m
)), speed_p
);
2549 rtx r
= immed_wide_int_const (mask
, word_mode
);
2550 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2552 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2553 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2557 mask
= wi::lshift (mask
, m
);
2558 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2562 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2564 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2566 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2567 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2568 fold_convert_loc (loc
, etype
, exp
),
2569 fold_convert_loc (loc
, etype
, lowi
));
2570 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2571 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2572 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2573 build_int_cst (word_type
, 1), exp
);
2574 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2575 wide_int_to_tree (word_type
, mask
));
2576 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2577 build_zero_cst (word_type
));
2578 if (is_gimple_val (exp
))
2581 /* The shift might have undefined behavior if TEM is true,
2582 but reassociate_bb isn't prepared to have basic blocks
2583 split when it is running. So, temporarily emit a code
2584 with BIT_IOR_EXPR instead of &&, and fix it up in
2587 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2588 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2589 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2591 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2592 gimple_seq_add_seq_without_update (&seq
, seq2
);
2593 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2594 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2595 gimple g
= gimple_build_assign (make_ssa_name (optype
),
2596 BIT_IOR_EXPR
, tem
, exp
);
2597 gimple_set_location (g
, loc
);
2598 gimple_seq_add_stmt_without_update (&seq
, g
);
2599 exp
= gimple_assign_lhs (g
);
2600 tree val
= build_zero_cst (optype
);
2601 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2602 candidates
.length (), opcode
, ops
, exp
,
2603 seq
, false, val
, val
, strict_overflow_p
))
2606 reassoc_branch_fixups
.safe_push (tem
);
2609 gimple_seq_discard (seq
);
2615 /* Optimize range tests, similarly how fold_range_test optimizes
2616 it on trees. The tree code for the binary
2617 operation between all the operands is OPCODE.
2618 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2619 maybe_optimize_range_tests for inter-bb range optimization.
2620 In that case if oe->op is NULL, oe->id is bb->index whose
2621 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2622 the actual opcode. */
2625 optimize_range_tests (enum tree_code opcode
,
2626 vec
<operand_entry_t
> *ops
)
2628 unsigned int length
= ops
->length (), i
, j
, first
;
2630 struct range_entry
*ranges
;
2631 bool any_changes
= false;
2636 ranges
= XNEWVEC (struct range_entry
, length
);
2637 for (i
= 0; i
< length
; i
++)
2641 init_range_entry (ranges
+ i
, oe
->op
,
2643 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2644 /* For | invert it now, we will invert it again before emitting
2645 the optimized expression. */
2646 if (opcode
== BIT_IOR_EXPR
2647 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2648 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2651 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2652 for (i
= 0; i
< length
; i
++)
2653 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2656 /* Try to merge ranges. */
2657 for (first
= i
; i
< length
; i
++)
2659 tree low
= ranges
[i
].low
;
2660 tree high
= ranges
[i
].high
;
2661 int in_p
= ranges
[i
].in_p
;
2662 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2663 int update_fail_count
= 0;
2665 for (j
= i
+ 1; j
< length
; j
++)
2667 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2669 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2670 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2672 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2678 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
2679 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
2680 low
, high
, strict_overflow_p
))
2685 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2686 while update_range_test would fail. */
2687 else if (update_fail_count
== 64)
2690 ++update_fail_count
;
2693 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2696 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2697 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2699 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
2700 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
2703 if (any_changes
&& opcode
!= ERROR_MARK
)
2706 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2708 if (oe
->op
== error_mark_node
)
2717 XDELETEVEC (ranges
);
2721 /* Return true if STMT is a cast like:
2727 # _345 = PHI <_123(N), 1(...), 1(...)>
2728 where _234 has bool type, _123 has single use and
2729 bb N has a single successor M. This is commonly used in
2730 the last block of a range test. */
2733 final_range_test_p (gimple stmt
)
2735 basic_block bb
, rhs_bb
;
2738 use_operand_p use_p
;
2741 if (!gimple_assign_cast_p (stmt
))
2743 bb
= gimple_bb (stmt
);
2744 if (!single_succ_p (bb
))
2746 e
= single_succ_edge (bb
);
2747 if (e
->flags
& EDGE_COMPLEX
)
2750 lhs
= gimple_assign_lhs (stmt
);
2751 rhs
= gimple_assign_rhs1 (stmt
);
2752 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2753 || TREE_CODE (rhs
) != SSA_NAME
2754 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2757 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2758 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2761 if (gimple_code (use_stmt
) != GIMPLE_PHI
2762 || gimple_bb (use_stmt
) != e
->dest
)
2765 /* And that the rhs is defined in the same loop. */
2766 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2768 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2774 /* Return true if BB is suitable basic block for inter-bb range test
2775 optimization. If BACKWARD is true, BB should be the only predecessor
2776 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2777 or compared with to find a common basic block to which all conditions
2778 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2779 be the only predecessor of BB. */
2782 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2785 edge_iterator ei
, ei2
;
2789 bool other_edge_seen
= false;
2794 /* Check last stmt first. */
2795 stmt
= last_stmt (bb
);
2797 || (gimple_code (stmt
) != GIMPLE_COND
2798 && (backward
|| !final_range_test_p (stmt
)))
2799 || gimple_visited_p (stmt
)
2800 || stmt_could_throw_p (stmt
)
2803 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2806 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2807 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2808 to *OTHER_BB (if not set yet, try to find it out). */
2809 if (EDGE_COUNT (bb
->succs
) != 2)
2811 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2813 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2815 if (e
->dest
== test_bb
)
2824 if (*other_bb
== NULL
)
2826 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2827 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2829 else if (e
->dest
== e2
->dest
)
2830 *other_bb
= e
->dest
;
2831 if (*other_bb
== NULL
)
2834 if (e
->dest
== *other_bb
)
2835 other_edge_seen
= true;
2839 if (*other_bb
== NULL
|| !other_edge_seen
)
2842 else if (single_succ (bb
) != *other_bb
)
2845 /* Now check all PHIs of *OTHER_BB. */
2846 e
= find_edge (bb
, *other_bb
);
2847 e2
= find_edge (test_bb
, *other_bb
);
2848 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2850 gphi
*phi
= gsi
.phi ();
2851 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2852 corresponding to BB and TEST_BB predecessor must be the same. */
2853 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2854 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2856 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2857 one of the PHIs should have the lhs of the last stmt in
2858 that block as PHI arg and that PHI should have 0 or 1
2859 corresponding to it in all other range test basic blocks
2863 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2864 == gimple_assign_lhs (stmt
)
2865 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2866 || integer_onep (gimple_phi_arg_def (phi
,
2872 gimple test_last
= last_stmt (test_bb
);
2873 if (gimple_code (test_last
) != GIMPLE_COND
2874 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2875 == gimple_assign_lhs (test_last
)
2876 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2877 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2887 /* Return true if BB doesn't have side-effects that would disallow
2888 range test optimization, all SSA_NAMEs set in the bb are consumed
2889 in the bb and there are no PHIs. */
2892 no_side_effect_bb (basic_block bb
)
2894 gimple_stmt_iterator gsi
;
2897 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2899 last
= last_stmt (bb
);
2900 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2902 gimple stmt
= gsi_stmt (gsi
);
2904 imm_use_iterator imm_iter
;
2905 use_operand_p use_p
;
2907 if (is_gimple_debug (stmt
))
2909 if (gimple_has_side_effects (stmt
))
2913 if (!is_gimple_assign (stmt
))
2915 lhs
= gimple_assign_lhs (stmt
);
2916 if (TREE_CODE (lhs
) != SSA_NAME
)
2918 if (gimple_assign_rhs_could_trap_p (stmt
))
2920 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2922 gimple use_stmt
= USE_STMT (use_p
);
2923 if (is_gimple_debug (use_stmt
))
2925 if (gimple_bb (use_stmt
) != bb
)
2932 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2933 return true and fill in *OPS recursively. */
2936 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2939 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2943 if (!is_reassociable_op (stmt
, code
, loop
))
2946 rhs
[0] = gimple_assign_rhs1 (stmt
);
2947 rhs
[1] = gimple_assign_rhs2 (stmt
);
2948 gimple_set_visited (stmt
, true);
2949 for (i
= 0; i
< 2; i
++)
2950 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2951 && !get_ops (rhs
[i
], code
, ops
, loop
)
2952 && has_single_use (rhs
[i
]))
2954 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
2960 ops
->safe_push (oe
);
2965 /* Find the ops that were added by get_ops starting from VAR, see if
2966 they were changed during update_range_test and if yes, create new
2970 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
2971 unsigned int *pidx
, struct loop
*loop
)
2973 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2977 if (!is_reassociable_op (stmt
, code
, loop
))
2980 rhs
[0] = gimple_assign_rhs1 (stmt
);
2981 rhs
[1] = gimple_assign_rhs2 (stmt
);
2984 for (i
= 0; i
< 2; i
++)
2985 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
2987 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
2988 if (rhs
[2 + i
] == NULL_TREE
)
2990 if (has_single_use (rhs
[i
]))
2991 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
2993 rhs
[2 + i
] = rhs
[i
];
2996 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
2997 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
2999 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3000 var
= make_ssa_name (TREE_TYPE (var
));
3001 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
3003 gimple_set_uid (g
, gimple_uid (stmt
));
3004 gimple_set_visited (g
, true);
3005 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3010 /* Structure to track the initial value passed to get_ops and
3011 the range in the ops vector for each basic block. */
3013 struct inter_bb_range_test_entry
3016 unsigned int first_idx
, last_idx
;
3019 /* Inter-bb range test optimization. */
3022 maybe_optimize_range_tests (gimple stmt
)
3024 basic_block first_bb
= gimple_bb (stmt
);
3025 basic_block last_bb
= first_bb
;
3026 basic_block other_bb
= NULL
;
3030 auto_vec
<operand_entry_t
> ops
;
3031 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3032 bool any_changes
= false;
3034 /* Consider only basic blocks that end with GIMPLE_COND or
3035 a cast statement satisfying final_range_test_p. All
3036 but the last bb in the first_bb .. last_bb range
3037 should end with GIMPLE_COND. */
3038 if (gimple_code (stmt
) == GIMPLE_COND
)
3040 if (EDGE_COUNT (first_bb
->succs
) != 2)
3043 else if (final_range_test_p (stmt
))
3044 other_bb
= single_succ (first_bb
);
3048 if (stmt_could_throw_p (stmt
))
3051 /* As relative ordering of post-dominator sons isn't fixed,
3052 maybe_optimize_range_tests can be called first on any
3053 bb in the range we want to optimize. So, start searching
3054 backwards, if first_bb can be set to a predecessor. */
3055 while (single_pred_p (first_bb
))
3057 basic_block pred_bb
= single_pred (first_bb
);
3058 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3060 if (!no_side_effect_bb (first_bb
))
3064 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3065 Before starting forward search in last_bb successors, find
3066 out the other_bb. */
3067 if (first_bb
== last_bb
)
3070 /* As non-GIMPLE_COND last stmt always terminates the range,
3071 if forward search didn't discover anything, just give up. */
3072 if (gimple_code (stmt
) != GIMPLE_COND
)
3074 /* Look at both successors. Either it ends with a GIMPLE_COND
3075 and satisfies suitable_cond_bb, or ends with a cast and
3076 other_bb is that cast's successor. */
3077 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
3078 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
3079 || e
->dest
== first_bb
)
3081 else if (single_pred_p (e
->dest
))
3083 stmt
= last_stmt (e
->dest
);
3085 && gimple_code (stmt
) == GIMPLE_COND
3086 && EDGE_COUNT (e
->dest
->succs
) == 2)
3088 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
3094 && final_range_test_p (stmt
)
3095 && find_edge (first_bb
, single_succ (e
->dest
)))
3097 other_bb
= single_succ (e
->dest
);
3098 if (other_bb
== first_bb
)
3102 if (other_bb
== NULL
)
3105 /* Now do the forward search, moving last_bb to successor bbs
3106 that aren't other_bb. */
3107 while (EDGE_COUNT (last_bb
->succs
) == 2)
3109 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
3110 if (e
->dest
!= other_bb
)
3114 if (!single_pred_p (e
->dest
))
3116 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
3118 if (!no_side_effect_bb (e
->dest
))
3122 if (first_bb
== last_bb
)
3124 /* Here basic blocks first_bb through last_bb's predecessor
3125 end with GIMPLE_COND, all of them have one of the edges to
3126 other_bb and another to another block in the range,
3127 all blocks except first_bb don't have side-effects and
3128 last_bb ends with either GIMPLE_COND, or cast satisfying
3129 final_range_test_p. */
3130 for (bb
= last_bb
; ; bb
= single_pred (bb
))
3132 enum tree_code code
;
3134 inter_bb_range_test_entry bb_ent
;
3136 bb_ent
.op
= NULL_TREE
;
3137 bb_ent
.first_idx
= ops
.length ();
3138 bb_ent
.last_idx
= bb_ent
.first_idx
;
3139 e
= find_edge (bb
, other_bb
);
3140 stmt
= last_stmt (bb
);
3141 gimple_set_visited (stmt
, true);
3142 if (gimple_code (stmt
) != GIMPLE_COND
)
3144 use_operand_p use_p
;
3149 lhs
= gimple_assign_lhs (stmt
);
3150 rhs
= gimple_assign_rhs1 (stmt
);
3151 gcc_assert (bb
== last_bb
);
3158 # _345 = PHI <_123(N), 1(...), 1(...)>
3160 or 0 instead of 1. If it is 0, the _234
3161 range test is anded together with all the
3162 other range tests, if it is 1, it is ored with
3164 single_imm_use (lhs
, &use_p
, &phi
);
3165 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
3166 e2
= find_edge (first_bb
, other_bb
);
3168 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
3169 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
3170 code
= BIT_AND_EXPR
;
3173 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
3174 code
= BIT_IOR_EXPR
;
3177 /* If _234 SSA_NAME_DEF_STMT is
3179 (or &, corresponding to 1/0 in the phi arguments,
3180 push into ops the individual range test arguments
3181 of the bitwise or resp. and, recursively. */
3182 if (!get_ops (rhs
, code
, &ops
,
3183 loop_containing_stmt (stmt
))
3184 && has_single_use (rhs
))
3186 /* Otherwise, push the _234 range test itself. */
3188 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
3198 bb_ent
.last_idx
= ops
.length ();
3200 bbinfo
.safe_push (bb_ent
);
3203 /* Otherwise stmt is GIMPLE_COND. */
3204 code
= gimple_cond_code (stmt
);
3205 lhs
= gimple_cond_lhs (stmt
);
3206 rhs
= gimple_cond_rhs (stmt
);
3207 if (TREE_CODE (lhs
) == SSA_NAME
3208 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3209 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
3210 || rhs
!= boolean_false_node
3211 /* Either push into ops the individual bitwise
3212 or resp. and operands, depending on which
3213 edge is other_bb. */
3214 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
3215 ^ (code
== EQ_EXPR
))
3216 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
3217 loop_containing_stmt (stmt
))))
3219 /* Or push the GIMPLE_COND stmt itself. */
3221 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
3224 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
3225 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3226 /* oe->op = NULL signs that there is no SSA_NAME
3227 for the range test, and oe->id instead is the
3228 basic block number, at which's end the GIMPLE_COND
3236 else if (ops
.length () > bb_ent
.first_idx
)
3239 bb_ent
.last_idx
= ops
.length ();
3241 bbinfo
.safe_push (bb_ent
);
3245 if (ops
.length () > 1)
3246 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
3250 /* update_ops relies on has_single_use predicates returning the
3251 same values as it did during get_ops earlier. Additionally it
3252 never removes statements, only adds new ones and it should walk
3253 from the single imm use and check the predicate already before
3254 making those changes.
3255 On the other side, the handling of GIMPLE_COND directly can turn
3256 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3257 it needs to be done in a separate loop afterwards. */
3258 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3260 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3261 && bbinfo
[idx
].op
!= NULL_TREE
)
3265 stmt
= last_stmt (bb
);
3266 new_op
= update_ops (bbinfo
[idx
].op
,
3268 ops
[bbinfo
[idx
].first_idx
]->rank
,
3269 ops
, &bbinfo
[idx
].first_idx
,
3270 loop_containing_stmt (stmt
));
3271 if (new_op
== NULL_TREE
)
3273 gcc_assert (bb
== last_bb
);
3274 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
3276 if (bbinfo
[idx
].op
!= new_op
)
3278 imm_use_iterator iter
;
3279 use_operand_p use_p
;
3280 gimple use_stmt
, cast_stmt
= NULL
;
3282 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
3283 if (is_gimple_debug (use_stmt
))
3285 else if (gimple_code (use_stmt
) == GIMPLE_COND
3286 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3287 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3288 SET_USE (use_p
, new_op
);
3289 else if (gimple_assign_cast_p (use_stmt
))
3290 cast_stmt
= use_stmt
;
3295 gcc_assert (bb
== last_bb
);
3296 tree lhs
= gimple_assign_lhs (cast_stmt
);
3297 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
3298 enum tree_code rhs_code
3299 = gimple_assign_rhs_code (cast_stmt
);
3301 if (is_gimple_min_invariant (new_op
))
3303 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
3304 g
= gimple_build_assign (new_lhs
, new_op
);
3307 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
3308 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
3309 gimple_set_uid (g
, gimple_uid (cast_stmt
));
3310 gimple_set_visited (g
, true);
3311 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3312 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3313 if (is_gimple_debug (use_stmt
))
3315 else if (gimple_code (use_stmt
) == GIMPLE_COND
3316 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3317 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3318 SET_USE (use_p
, new_lhs
);
3327 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3329 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3330 && bbinfo
[idx
].op
== NULL_TREE
3331 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
3333 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
3334 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
3335 gimple_cond_make_false (cond_stmt
);
3336 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
3337 gimple_cond_make_true (cond_stmt
);
3340 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
3341 gimple_cond_set_lhs (cond_stmt
,
3342 ops
[bbinfo
[idx
].first_idx
]->op
);
3343 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
3345 update_stmt (cond_stmt
);
3353 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3354 of STMT in it's operands. This is also known as a "destructive
3355 update" operation. */
3358 is_phi_for_stmt (gimple stmt
, tree operand
)
3363 use_operand_p arg_p
;
3366 if (TREE_CODE (operand
) != SSA_NAME
)
3369 lhs
= gimple_assign_lhs (stmt
);
3371 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3372 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
3376 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
3377 if (lhs
== USE_FROM_PTR (arg_p
))
3382 /* Remove def stmt of VAR if VAR has zero uses and recurse
3383 on rhs1 operand if so. */
3386 remove_visited_stmt_chain (tree var
)
3389 gimple_stmt_iterator gsi
;
3393 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3395 stmt
= SSA_NAME_DEF_STMT (var
);
3396 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3398 var
= gimple_assign_rhs1 (stmt
);
3399 gsi
= gsi_for_stmt (stmt
);
3400 reassoc_remove_stmt (&gsi
);
3401 release_defs (stmt
);
3408 /* This function checks three consequtive operands in
3409 passed operands vector OPS starting from OPINDEX and
3410 swaps two operands if it is profitable for binary operation
3411 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3413 We pair ops with the same rank if possible.
3415 The alternative we try is to see if STMT is a destructive
3416 update style statement, which is like:
3419 In that case, we want to use the destructive update form to
3420 expose the possible vectorizer sum reduction opportunity.
3421 In that case, the third operand will be the phi node. This
3422 check is not performed if STMT is null.
3424 We could, of course, try to be better as noted above, and do a
3425 lot of work to try to find these opportunities in >3 operand
3426 cases, but it is unlikely to be worth it. */
3429 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3430 unsigned int opindex
, gimple stmt
)
3432 operand_entry_t oe1
, oe2
, oe3
;
3435 oe2
= ops
[opindex
+ 1];
3436 oe3
= ops
[opindex
+ 2];
3438 if ((oe1
->rank
== oe2
->rank
3439 && oe2
->rank
!= oe3
->rank
)
3440 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3441 && !is_phi_for_stmt (stmt
, oe1
->op
)
3442 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3444 struct operand_entry temp
= *oe3
;
3446 oe3
->rank
= oe1
->rank
;
3448 oe1
->rank
= temp
.rank
;
3450 else if ((oe1
->rank
== oe3
->rank
3451 && oe2
->rank
!= oe3
->rank
)
3452 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3453 && !is_phi_for_stmt (stmt
, oe1
->op
)
3454 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3456 struct operand_entry temp
= *oe2
;
3458 oe2
->rank
= oe1
->rank
;
3460 oe1
->rank
= temp
.rank
;
3464 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3465 two definitions, otherwise return STMT. */
3467 static inline gimple
3468 find_insert_point (gimple stmt
, tree rhs1
, tree rhs2
)
3470 if (TREE_CODE (rhs1
) == SSA_NAME
3471 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3472 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3473 if (TREE_CODE (rhs2
) == SSA_NAME
3474 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3475 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3479 /* Recursively rewrite our linearized statements so that the operators
3480 match those in OPS[OPINDEX], putting the computation in rank
3481 order. Return new lhs. */
3484 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3485 vec
<operand_entry_t
> ops
, bool changed
)
3487 tree rhs1
= gimple_assign_rhs1 (stmt
);
3488 tree rhs2
= gimple_assign_rhs2 (stmt
);
3489 tree lhs
= gimple_assign_lhs (stmt
);
3492 /* The final recursion case for this function is that you have
3493 exactly two operations left.
3494 If we had one exactly one op in the entire list to start with, we
3495 would have never called this function, and the tail recursion
3496 rewrites them one at a time. */
3497 if (opindex
+ 2 == ops
.length ())
3499 operand_entry_t oe1
, oe2
;
3502 oe2
= ops
[opindex
+ 1];
3504 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3506 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3507 unsigned int uid
= gimple_uid (stmt
);
3509 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3511 fprintf (dump_file
, "Transforming ");
3512 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3517 gimple insert_point
= find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3518 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3520 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3522 gimple_set_uid (stmt
, uid
);
3523 gimple_set_visited (stmt
, true);
3524 if (insert_point
== gsi_stmt (gsi
))
3525 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3527 insert_stmt_after (stmt
, insert_point
);
3531 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3533 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3534 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3538 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3539 remove_visited_stmt_chain (rhs1
);
3541 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3543 fprintf (dump_file
, " into ");
3544 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3550 /* If we hit here, we should have 3 or more ops left. */
3551 gcc_assert (opindex
+ 2 < ops
.length ());
3553 /* Rewrite the next operator. */
3556 /* Recurse on the LHS of the binary operator, which is guaranteed to
3557 be the non-leaf side. */
3559 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3560 changed
|| oe
->op
!= rhs2
);
3562 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3564 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3566 fprintf (dump_file
, "Transforming ");
3567 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3570 /* If changed is false, this is either opindex == 0
3571 or all outer rhs2's were equal to corresponding oe->op,
3572 and powi_result is NULL.
3573 That means lhs is equivalent before and after reassociation.
3574 Otherwise ensure the old lhs SSA_NAME is not reused and
3575 create a new stmt as well, so that any debug stmts will be
3576 properly adjusted. */
3579 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3580 unsigned int uid
= gimple_uid (stmt
);
3581 gimple insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3583 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3584 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3586 gimple_set_uid (stmt
, uid
);
3587 gimple_set_visited (stmt
, true);
3588 if (insert_point
== gsi_stmt (gsi
))
3589 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3591 insert_stmt_after (stmt
, insert_point
);
3595 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3597 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3598 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3602 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3604 fprintf (dump_file
, " into ");
3605 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3611 /* Find out how many cycles we need to compute statements chain.
3612 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3613 maximum number of independent statements we may execute per cycle. */
3616 get_required_cycles (int ops_num
, int cpu_width
)
3622 /* While we have more than 2 * cpu_width operands
3623 we may reduce number of operands by cpu_width
3625 res
= ops_num
/ (2 * cpu_width
);
3627 /* Remained operands count may be reduced twice per cycle
3628 until we have only one operand. */
3629 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3630 elog
= exact_log2 (rest
);
3634 res
+= floor_log2 (rest
) + 1;
3639 /* Returns an optimal number of registers to use for computation of
3640 given statements. */
3643 get_reassociation_width (int ops_num
, enum tree_code opc
,
3646 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3651 if (param_width
> 0)
3652 width
= param_width
;
3654 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3659 /* Get the minimal time required for sequence computation. */
3660 cycles_best
= get_required_cycles (ops_num
, width
);
3662 /* Check if we may use less width and still compute sequence for
3663 the same time. It will allow us to reduce registers usage.
3664 get_required_cycles is monotonically increasing with lower width
3665 so we can perform a binary search for the minimal width that still
3666 results in the optimal cycle count. */
3668 while (width
> width_min
)
3670 int width_mid
= (width
+ width_min
) / 2;
3672 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3674 else if (width_min
< width_mid
)
3675 width_min
= width_mid
;
3683 /* Recursively rewrite our linearized statements so that the operators
3684 match those in OPS[OPINDEX], putting the computation in rank
3685 order and trying to allow operations to be executed in
3689 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
3690 vec
<operand_entry_t
> ops
)
3692 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3693 int op_num
= ops
.length ();
3694 int stmt_num
= op_num
- 1;
3695 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3696 int op_index
= op_num
- 1;
3698 int ready_stmts_end
= 0;
3700 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3702 /* We start expression rewriting from the top statements.
3703 So, in this loop we create a full list of statements
3704 we will work with. */
3705 stmts
[stmt_num
- 1] = stmt
;
3706 for (i
= stmt_num
- 2; i
>= 0; i
--)
3707 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3709 for (i
= 0; i
< stmt_num
; i
++)
3713 /* Determine whether we should use results of
3714 already handled statements or not. */
3715 if (ready_stmts_end
== 0
3716 && (i
- stmt_index
>= width
|| op_index
< 1))
3717 ready_stmts_end
= i
;
3719 /* Now we choose operands for the next statement. Non zero
3720 value in ready_stmts_end means here that we should use
3721 the result of already generated statements as new operand. */
3722 if (ready_stmts_end
> 0)
3724 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3725 if (ready_stmts_end
> stmt_index
)
3726 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3727 else if (op_index
>= 0)
3728 op2
= ops
[op_index
--]->op
;
3731 gcc_assert (stmt_index
< i
);
3732 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3735 if (stmt_index
>= ready_stmts_end
)
3736 ready_stmts_end
= 0;
3741 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3742 op2
= ops
[op_index
--]->op
;
3743 op1
= ops
[op_index
--]->op
;
3746 /* If we emit the last statement then we should put
3747 operands into the last statement. It will also
3749 if (op_index
< 0 && stmt_index
== i
)
3752 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3754 fprintf (dump_file
, "Transforming ");
3755 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3758 /* We keep original statement only for the last one. All
3759 others are recreated. */
3760 if (i
== stmt_num
- 1)
3762 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3763 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3764 update_stmt (stmts
[i
]);
3767 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3769 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3771 fprintf (dump_file
, " into ");
3772 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3776 remove_visited_stmt_chain (last_rhs1
);
3779 /* Transform STMT, which is really (A +B) + (C + D) into the left
3780 linear form, ((A+B)+C)+D.
3781 Recurse on D if necessary. */
3784 linearize_expr (gimple stmt
)
3786 gimple_stmt_iterator gsi
;
3787 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3788 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3789 gimple oldbinrhs
= binrhs
;
3790 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3791 gimple newbinrhs
= NULL
;
3792 struct loop
*loop
= loop_containing_stmt (stmt
);
3793 tree lhs
= gimple_assign_lhs (stmt
);
3795 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3796 && is_reassociable_op (binrhs
, rhscode
, loop
));
3798 gsi
= gsi_for_stmt (stmt
);
3800 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3801 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
3802 gimple_assign_rhs_code (binrhs
),
3803 gimple_assign_lhs (binlhs
),
3804 gimple_assign_rhs2 (binrhs
));
3805 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3806 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3807 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3809 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3810 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3812 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3814 fprintf (dump_file
, "Linearized: ");
3815 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3818 reassociate_stats
.linearized
++;
3821 gsi
= gsi_for_stmt (oldbinrhs
);
3822 reassoc_remove_stmt (&gsi
);
3823 release_defs (oldbinrhs
);
3825 gimple_set_visited (stmt
, true);
3826 gimple_set_visited (binlhs
, true);
3827 gimple_set_visited (binrhs
, true);
3829 /* Tail recurse on the new rhs if it still needs reassociation. */
3830 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3831 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3832 want to change the algorithm while converting to tuples. */
3833 linearize_expr (stmt
);
3836 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3837 it. Otherwise, return NULL. */
3840 get_single_immediate_use (tree lhs
)
3842 use_operand_p immuse
;
3845 if (TREE_CODE (lhs
) == SSA_NAME
3846 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3847 && is_gimple_assign (immusestmt
))
3853 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3854 representing the negated value. Insertions of any necessary
3855 instructions go before GSI.
3856 This function is recursive in that, if you hand it "a_5" as the
3857 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3858 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3861 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3863 gimple negatedefstmt
= NULL
;
3864 tree resultofnegate
;
3865 gimple_stmt_iterator gsi
;
3868 /* If we are trying to negate a name, defined by an add, negate the
3869 add operands instead. */
3870 if (TREE_CODE (tonegate
) == SSA_NAME
)
3871 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3872 if (TREE_CODE (tonegate
) == SSA_NAME
3873 && is_gimple_assign (negatedefstmt
)
3874 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3875 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3876 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3878 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3879 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3880 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3883 gsi
= gsi_for_stmt (negatedefstmt
);
3884 rhs1
= negate_value (rhs1
, &gsi
);
3886 gsi
= gsi_for_stmt (negatedefstmt
);
3887 rhs2
= negate_value (rhs2
, &gsi
);
3889 gsi
= gsi_for_stmt (negatedefstmt
);
3890 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3891 gimple_set_visited (negatedefstmt
, true);
3892 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
3893 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3894 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3898 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3899 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3900 NULL_TREE
, true, GSI_SAME_STMT
);
3902 uid
= gimple_uid (gsi_stmt (gsi
));
3903 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3905 gimple stmt
= gsi_stmt (gsi
);
3906 if (gimple_uid (stmt
) != 0)
3908 gimple_set_uid (stmt
, uid
);
3910 return resultofnegate
;
3913 /* Return true if we should break up the subtract in STMT into an add
3914 with negate. This is true when we the subtract operands are really
3915 adds, or the subtract itself is used in an add expression. In
3916 either case, breaking up the subtract into an add with negate
3917 exposes the adds to reassociation. */
3920 should_break_up_subtract (gimple stmt
)
3922 tree lhs
= gimple_assign_lhs (stmt
);
3923 tree binlhs
= gimple_assign_rhs1 (stmt
);
3924 tree binrhs
= gimple_assign_rhs2 (stmt
);
3926 struct loop
*loop
= loop_containing_stmt (stmt
);
3928 if (TREE_CODE (binlhs
) == SSA_NAME
3929 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3932 if (TREE_CODE (binrhs
) == SSA_NAME
3933 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3936 if (TREE_CODE (lhs
) == SSA_NAME
3937 && (immusestmt
= get_single_immediate_use (lhs
))
3938 && is_gimple_assign (immusestmt
)
3939 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3940 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3945 /* Transform STMT from A - B into A + -B. */
3948 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3950 tree rhs1
= gimple_assign_rhs1 (stmt
);
3951 tree rhs2
= gimple_assign_rhs2 (stmt
);
3953 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3955 fprintf (dump_file
, "Breaking up subtract ");
3956 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3959 rhs2
= negate_value (rhs2
, gsip
);
3960 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3964 /* Determine whether STMT is a builtin call that raises an SSA name
3965 to an integer power and has only one use. If so, and this is early
3966 reassociation and unsafe math optimizations are permitted, place
3967 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3968 If any of these conditions does not hold, return FALSE. */
3971 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3974 REAL_VALUE_TYPE c
, cint
;
3976 if (!first_pass_instance
3977 || !flag_unsafe_math_optimizations
3978 || !is_gimple_call (stmt
)
3979 || !has_single_use (gimple_call_lhs (stmt
)))
3982 fndecl
= gimple_call_fndecl (stmt
);
3985 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3988 switch (DECL_FUNCTION_CODE (fndecl
))
3990 CASE_FLT_FN (BUILT_IN_POW
):
3991 if (flag_errno_math
)
3994 *base
= gimple_call_arg (stmt
, 0);
3995 arg1
= gimple_call_arg (stmt
, 1);
3997 if (TREE_CODE (arg1
) != REAL_CST
)
4000 c
= TREE_REAL_CST (arg1
);
4002 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
4005 *exponent
= real_to_integer (&c
);
4006 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
4007 if (!real_identical (&c
, &cint
))
4012 CASE_FLT_FN (BUILT_IN_POWI
):
4013 *base
= gimple_call_arg (stmt
, 0);
4014 arg1
= gimple_call_arg (stmt
, 1);
4016 if (!tree_fits_shwi_p (arg1
))
4019 *exponent
= tree_to_shwi (arg1
);
4026 /* Expanding negative exponents is generally unproductive, so we don't
4027 complicate matters with those. Exponents of zero and one should
4028 have been handled by expression folding. */
4029 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
4035 /* Recursively linearize a binary expression that is the RHS of STMT.
4036 Place the operands of the expression tree in the vector named OPS. */
4039 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
4040 bool is_associative
, bool set_visited
)
4042 tree binlhs
= gimple_assign_rhs1 (stmt
);
4043 tree binrhs
= gimple_assign_rhs2 (stmt
);
4044 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
4045 bool binlhsisreassoc
= false;
4046 bool binrhsisreassoc
= false;
4047 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4048 struct loop
*loop
= loop_containing_stmt (stmt
);
4049 tree base
= NULL_TREE
;
4050 HOST_WIDE_INT exponent
= 0;
4053 gimple_set_visited (stmt
, true);
4055 if (TREE_CODE (binlhs
) == SSA_NAME
)
4057 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
4058 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
4059 && !stmt_could_throw_p (binlhsdef
));
4062 if (TREE_CODE (binrhs
) == SSA_NAME
)
4064 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
4065 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
4066 && !stmt_could_throw_p (binrhsdef
));
4069 /* If the LHS is not reassociable, but the RHS is, we need to swap
4070 them. If neither is reassociable, there is nothing we can do, so
4071 just put them in the ops vector. If the LHS is reassociable,
4072 linearize it. If both are reassociable, then linearize the RHS
4075 if (!binlhsisreassoc
)
4079 /* If this is not a associative operation like division, give up. */
4080 if (!is_associative
)
4082 add_to_ops_vec (ops
, binrhs
);
4086 if (!binrhsisreassoc
)
4088 if (rhscode
== MULT_EXPR
4089 && TREE_CODE (binrhs
) == SSA_NAME
4090 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
4092 add_repeat_to_ops_vec (ops
, base
, exponent
);
4093 gimple_set_visited (binrhsdef
, true);
4096 add_to_ops_vec (ops
, binrhs
);
4098 if (rhscode
== MULT_EXPR
4099 && TREE_CODE (binlhs
) == SSA_NAME
4100 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
4102 add_repeat_to_ops_vec (ops
, base
, exponent
);
4103 gimple_set_visited (binlhsdef
, true);
4106 add_to_ops_vec (ops
, binlhs
);
4111 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4113 fprintf (dump_file
, "swapping operands of ");
4114 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4117 swap_ssa_operands (stmt
,
4118 gimple_assign_rhs1_ptr (stmt
),
4119 gimple_assign_rhs2_ptr (stmt
));
4122 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4124 fprintf (dump_file
, " is now ");
4125 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4128 /* We want to make it so the lhs is always the reassociative op,
4134 else if (binrhsisreassoc
)
4136 linearize_expr (stmt
);
4137 binlhs
= gimple_assign_rhs1 (stmt
);
4138 binrhs
= gimple_assign_rhs2 (stmt
);
4141 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
4142 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
4144 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
4145 is_associative
, set_visited
);
4147 if (rhscode
== MULT_EXPR
4148 && TREE_CODE (binrhs
) == SSA_NAME
4149 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
4151 add_repeat_to_ops_vec (ops
, base
, exponent
);
4152 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
4155 add_to_ops_vec (ops
, binrhs
);
4158 /* Repropagate the negates back into subtracts, since no other pass
4159 currently does it. */
4162 repropagate_negates (void)
4167 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
4169 gimple user
= get_single_immediate_use (negate
);
4171 if (!user
|| !is_gimple_assign (user
))
4174 /* The negate operand can be either operand of a PLUS_EXPR
4175 (it can be the LHS if the RHS is a constant for example).
4177 Force the negate operand to the RHS of the PLUS_EXPR, then
4178 transform the PLUS_EXPR into a MINUS_EXPR. */
4179 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
4181 /* If the negated operand appears on the LHS of the
4182 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4183 to force the negated operand to the RHS of the PLUS_EXPR. */
4184 if (gimple_assign_rhs1 (user
) == negate
)
4186 swap_ssa_operands (user
,
4187 gimple_assign_rhs1_ptr (user
),
4188 gimple_assign_rhs2_ptr (user
));
4191 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4192 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4193 if (gimple_assign_rhs2 (user
) == negate
)
4195 tree rhs1
= gimple_assign_rhs1 (user
);
4196 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
4197 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4198 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
4202 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
4204 if (gimple_assign_rhs1 (user
) == negate
)
4209 which we transform into
4212 This pushes down the negate which we possibly can merge
4213 into some other operation, hence insert it into the
4214 plus_negates vector. */
4215 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4216 tree a
= gimple_assign_rhs1 (feed
);
4217 tree b
= gimple_assign_rhs2 (user
);
4218 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
4219 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
4220 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
4221 gimple g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
4222 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
4223 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
4224 user
= gsi_stmt (gsi2
);
4226 reassoc_remove_stmt (&gsi
);
4227 release_defs (feed
);
4228 plus_negates
.safe_push (gimple_assign_lhs (user
));
4232 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4233 rid of one operation. */
4234 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4235 tree a
= gimple_assign_rhs1 (feed
);
4236 tree rhs1
= gimple_assign_rhs1 (user
);
4237 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4238 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
4239 update_stmt (gsi_stmt (gsi
));
4245 /* Returns true if OP is of a type for which we can do reassociation.
4246 That is for integral or non-saturating fixed-point types, and for
4247 floating point type when associative-math is enabled. */
4250 can_reassociate_p (tree op
)
4252 tree type
= TREE_TYPE (op
);
4253 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
4254 || NON_SAT_FIXED_POINT_TYPE_P (type
)
4255 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
4260 /* Break up subtract operations in block BB.
4262 We do this top down because we don't know whether the subtract is
4263 part of a possible chain of reassociation except at the top.
4272 we want to break up k = t - q, but we won't until we've transformed q
4273 = b - r, which won't be broken up until we transform b = c - d.
4275 En passant, clear the GIMPLE visited flag on every statement
4276 and set UIDs within each basic block. */
4279 break_up_subtract_bb (basic_block bb
)
4281 gimple_stmt_iterator gsi
;
4283 unsigned int uid
= 1;
4285 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4287 gimple stmt
= gsi_stmt (gsi
);
4288 gimple_set_visited (stmt
, false);
4289 gimple_set_uid (stmt
, uid
++);
4291 if (!is_gimple_assign (stmt
)
4292 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
4295 /* Look for simple gimple subtract operations. */
4296 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
4298 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
4299 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
4302 /* Check for a subtract used only in an addition. If this
4303 is the case, transform it into add of a negate for better
4304 reassociation. IE transform C = A-B into C = A + -B if C
4305 is only used in an addition. */
4306 if (should_break_up_subtract (stmt
))
4307 break_up_subtract (stmt
, &gsi
);
4309 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
4310 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
4311 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
4313 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
4315 son
= next_dom_son (CDI_DOMINATORS
, son
))
4316 break_up_subtract_bb (son
);
4319 /* Used for repeated factor analysis. */
4320 struct repeat_factor_d
4322 /* An SSA name that occurs in a multiply chain. */
4325 /* Cached rank of the factor. */
4328 /* Number of occurrences of the factor in the chain. */
4329 HOST_WIDE_INT count
;
4331 /* An SSA name representing the product of this factor and
4332 all factors appearing later in the repeated factor vector. */
4336 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
4337 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
4340 static vec
<repeat_factor
> repeat_factor_vec
;
4342 /* Used for sorting the repeat factor vector. Sort primarily by
4343 ascending occurrence count, secondarily by descending rank. */
4346 compare_repeat_factors (const void *x1
, const void *x2
)
4348 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
4349 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
4351 if (rf1
->count
!= rf2
->count
)
4352 return rf1
->count
- rf2
->count
;
4354 return rf2
->rank
- rf1
->rank
;
4357 /* Look for repeated operands in OPS in the multiply tree rooted at
4358 STMT. Replace them with an optimal sequence of multiplies and powi
4359 builtin calls, and remove the used operands from OPS. Return an
4360 SSA name representing the value of the replacement sequence. */
4363 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
4365 unsigned i
, j
, vec_len
;
4368 repeat_factor_t rf1
, rf2
;
4369 repeat_factor rfnew
;
4370 tree result
= NULL_TREE
;
4371 tree target_ssa
, iter_result
;
4372 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4373 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4374 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4375 gimple mul_stmt
, pow_stmt
;
4377 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4382 /* Allocate the repeated factor vector. */
4383 repeat_factor_vec
.create (10);
4385 /* Scan the OPS vector for all SSA names in the product and build
4386 up a vector of occurrence counts for each factor. */
4387 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4389 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4391 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4393 if (rf1
->factor
== oe
->op
)
4395 rf1
->count
+= oe
->count
;
4400 if (j
>= repeat_factor_vec
.length ())
4402 rfnew
.factor
= oe
->op
;
4403 rfnew
.rank
= oe
->rank
;
4404 rfnew
.count
= oe
->count
;
4405 rfnew
.repr
= NULL_TREE
;
4406 repeat_factor_vec
.safe_push (rfnew
);
4411 /* Sort the repeated factor vector by (a) increasing occurrence count,
4412 and (b) decreasing rank. */
4413 repeat_factor_vec
.qsort (compare_repeat_factors
);
4415 /* It is generally best to combine as many base factors as possible
4416 into a product before applying __builtin_powi to the result.
4417 However, the sort order chosen for the repeated factor vector
4418 allows us to cache partial results for the product of the base
4419 factors for subsequent use. When we already have a cached partial
4420 result from a previous iteration, it is best to make use of it
4421 before looking for another __builtin_pow opportunity.
4423 As an example, consider x * x * y * y * y * z * z * z * z.
4424 We want to first compose the product x * y * z, raise it to the
4425 second power, then multiply this by y * z, and finally multiply
4426 by z. This can be done in 5 multiplies provided we cache y * z
4427 for use in both expressions:
4435 If we instead ignored the cached y * z and first multiplied by
4436 the __builtin_pow opportunity z * z, we would get the inferior:
4445 vec_len
= repeat_factor_vec
.length ();
4447 /* Repeatedly look for opportunities to create a builtin_powi call. */
4450 HOST_WIDE_INT power
;
4452 /* First look for the largest cached product of factors from
4453 preceding iterations. If found, create a builtin_powi for
4454 it if the minimum occurrence count for its factors is at
4455 least 2, or just use this cached product as our next
4456 multiplicand if the minimum occurrence count is 1. */
4457 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4459 if (rf1
->repr
&& rf1
->count
> 0)
4469 iter_result
= rf1
->repr
;
4471 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4475 fputs ("Multiplying by cached product ", dump_file
);
4476 for (elt
= j
; elt
< vec_len
; elt
++)
4478 rf
= &repeat_factor_vec
[elt
];
4479 print_generic_expr (dump_file
, rf
->factor
, 0);
4480 if (elt
< vec_len
- 1)
4481 fputs (" * ", dump_file
);
4483 fputs ("\n", dump_file
);
4488 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4489 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4490 build_int_cst (integer_type_node
,
4492 gimple_call_set_lhs (pow_stmt
, iter_result
);
4493 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4494 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4496 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4500 fputs ("Building __builtin_pow call for cached product (",
4502 for (elt
= j
; elt
< vec_len
; elt
++)
4504 rf
= &repeat_factor_vec
[elt
];
4505 print_generic_expr (dump_file
, rf
->factor
, 0);
4506 if (elt
< vec_len
- 1)
4507 fputs (" * ", dump_file
);
4509 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n",
4516 /* Otherwise, find the first factor in the repeated factor
4517 vector whose occurrence count is at least 2. If no such
4518 factor exists, there are no builtin_powi opportunities
4520 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4522 if (rf1
->count
>= 2)
4531 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4535 fputs ("Building __builtin_pow call for (", dump_file
);
4536 for (elt
= j
; elt
< vec_len
; elt
++)
4538 rf
= &repeat_factor_vec
[elt
];
4539 print_generic_expr (dump_file
, rf
->factor
, 0);
4540 if (elt
< vec_len
- 1)
4541 fputs (" * ", dump_file
);
4543 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4546 reassociate_stats
.pows_created
++;
4548 /* Visit each element of the vector in reverse order (so that
4549 high-occurrence elements are visited first, and within the
4550 same occurrence count, lower-ranked elements are visited
4551 first). Form a linear product of all elements in this order
4552 whose occurrencce count is at least that of element J.
4553 Record the SSA name representing the product of each element
4554 with all subsequent elements in the vector. */
4555 if (j
== vec_len
- 1)
4556 rf1
->repr
= rf1
->factor
;
4559 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4563 rf1
= &repeat_factor_vec
[ii
];
4564 rf2
= &repeat_factor_vec
[ii
+ 1];
4566 /* Init the last factor's representative to be itself. */
4568 rf2
->repr
= rf2
->factor
;
4573 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4574 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
4576 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4577 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4578 rf1
->repr
= target_ssa
;
4580 /* Don't reprocess the multiply we just introduced. */
4581 gimple_set_visited (mul_stmt
, true);
4585 /* Form a call to __builtin_powi for the maximum product
4586 just formed, raised to the power obtained earlier. */
4587 rf1
= &repeat_factor_vec
[j
];
4588 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4589 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4590 build_int_cst (integer_type_node
,
4592 gimple_call_set_lhs (pow_stmt
, iter_result
);
4593 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4594 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4597 /* If we previously formed at least one other builtin_powi call,
4598 form the product of this one and those others. */
4601 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4602 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
4603 result
, iter_result
);
4604 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4605 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4606 gimple_set_visited (mul_stmt
, true);
4607 result
= new_result
;
4610 result
= iter_result
;
4612 /* Decrement the occurrence count of each element in the product
4613 by the count found above, and remove this many copies of each
4615 for (i
= j
; i
< vec_len
; i
++)
4620 rf1
= &repeat_factor_vec
[i
];
4621 rf1
->count
-= power
;
4623 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4625 if (oe
->op
== rf1
->factor
)
4629 ops
->ordered_remove (n
);
4645 /* At this point all elements in the repeated factor vector have a
4646 remaining occurrence count of 0 or 1, and those with a count of 1
4647 don't have cached representatives. Re-sort the ops vector and
4649 ops
->qsort (sort_by_operand_rank
);
4650 repeat_factor_vec
.release ();
4652 /* Return the final product computed herein. Note that there may
4653 still be some elements with single occurrence count left in OPS;
4654 those will be handled by the normal reassociation logic. */
4658 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4661 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4665 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4667 fprintf (dump_file
, "Transforming ");
4668 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4671 rhs1
= gimple_assign_rhs1 (stmt
);
4672 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4674 remove_visited_stmt_chain (rhs1
);
4676 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4678 fprintf (dump_file
, " into ");
4679 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4683 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4686 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4687 tree rhs1
, tree rhs2
)
4689 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4691 fprintf (dump_file
, "Transforming ");
4692 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4695 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4696 update_stmt (gsi_stmt (*gsi
));
4697 remove_visited_stmt_chain (rhs1
);
4699 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4701 fprintf (dump_file
, " into ");
4702 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4706 /* Reassociate expressions in basic block BB and its post-dominator as
4710 reassociate_bb (basic_block bb
)
4712 gimple_stmt_iterator gsi
;
4714 gimple stmt
= last_stmt (bb
);
4716 if (stmt
&& !gimple_visited_p (stmt
))
4717 maybe_optimize_range_tests (stmt
);
4719 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4721 stmt
= gsi_stmt (gsi
);
4723 if (is_gimple_assign (stmt
)
4724 && !stmt_could_throw_p (stmt
))
4726 tree lhs
, rhs1
, rhs2
;
4727 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4729 /* If this is not a gimple binary expression, there is
4730 nothing for us to do with it. */
4731 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4734 /* If this was part of an already processed statement,
4735 we don't need to touch it again. */
4736 if (gimple_visited_p (stmt
))
4738 /* This statement might have become dead because of previous
4740 if (has_zero_uses (gimple_get_lhs (stmt
)))
4742 reassoc_remove_stmt (&gsi
);
4743 release_defs (stmt
);
4744 /* We might end up removing the last stmt above which
4745 places the iterator to the end of the sequence.
4746 Reset it to the last stmt in this case which might
4747 be the end of the sequence as well if we removed
4748 the last statement of the sequence. In which case
4749 we need to bail out. */
4750 if (gsi_end_p (gsi
))
4752 gsi
= gsi_last_bb (bb
);
4753 if (gsi_end_p (gsi
))
4760 lhs
= gimple_assign_lhs (stmt
);
4761 rhs1
= gimple_assign_rhs1 (stmt
);
4762 rhs2
= gimple_assign_rhs2 (stmt
);
4764 /* For non-bit or min/max operations we can't associate
4765 all types. Verify that here. */
4766 if (rhs_code
!= BIT_IOR_EXPR
4767 && rhs_code
!= BIT_AND_EXPR
4768 && rhs_code
!= BIT_XOR_EXPR
4769 && rhs_code
!= MIN_EXPR
4770 && rhs_code
!= MAX_EXPR
4771 && (!can_reassociate_p (lhs
)
4772 || !can_reassociate_p (rhs1
)
4773 || !can_reassociate_p (rhs2
)))
4776 if (associative_tree_code (rhs_code
))
4778 auto_vec
<operand_entry_t
> ops
;
4779 tree powi_result
= NULL_TREE
;
4781 /* There may be no immediate uses left by the time we
4782 get here because we may have eliminated them all. */
4783 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4786 gimple_set_visited (stmt
, true);
4787 linearize_expr_tree (&ops
, stmt
, true, true);
4788 ops
.qsort (sort_by_operand_rank
);
4789 optimize_ops_list (rhs_code
, &ops
);
4790 if (undistribute_ops_list (rhs_code
, &ops
,
4791 loop_containing_stmt (stmt
)))
4793 ops
.qsort (sort_by_operand_rank
);
4794 optimize_ops_list (rhs_code
, &ops
);
4797 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4798 optimize_range_tests (rhs_code
, &ops
);
4800 if (first_pass_instance
4801 && rhs_code
== MULT_EXPR
4802 && flag_unsafe_math_optimizations
)
4803 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4805 /* If the operand vector is now empty, all operands were
4806 consumed by the __builtin_powi optimization. */
4807 if (ops
.length () == 0)
4808 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4809 else if (ops
.length () == 1)
4811 tree last_op
= ops
.last ()->op
;
4814 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4817 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4821 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4822 int ops_num
= ops
.length ();
4823 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4826 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4828 "Width = %d was chosen for reassociation\n", width
);
4831 && ops
.length () > 3)
4832 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
4836 /* When there are three operands left, we want
4837 to make sure the ones that get the double
4838 binary op are chosen wisely. */
4839 int len
= ops
.length ();
4841 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4843 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4844 powi_result
!= NULL
);
4847 /* If we combined some repeated factors into a
4848 __builtin_powi call, multiply that result by the
4849 reassociated operands. */
4852 gimple mul_stmt
, lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4853 tree type
= TREE_TYPE (lhs
);
4854 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4856 gimple_set_lhs (lhs_stmt
, target_ssa
);
4857 update_stmt (lhs_stmt
);
4859 target_ssa
= new_lhs
;
4860 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
4861 powi_result
, target_ssa
);
4862 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4863 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4869 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4871 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4872 reassociate_bb (son
);
4875 /* Add jumps around shifts for range tests turned into bit tests.
4876 For each SSA_NAME VAR we have code like:
4877 VAR = ...; // final stmt of range comparison
4878 // bit test here...;
4879 OTHERVAR = ...; // final stmt of the bit test sequence
4880 RES = VAR | OTHERVAR;
4881 Turn the above into:
4888 // bit test here...;
4891 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4899 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
4901 gimple def_stmt
= SSA_NAME_DEF_STMT (var
);
4904 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
4906 && is_gimple_assign (use_stmt
)
4907 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
4908 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
4910 basic_block cond_bb
= gimple_bb (def_stmt
);
4911 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
4912 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
4914 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
4915 gimple g
= gimple_build_cond (NE_EXPR
, var
,
4916 build_zero_cst (TREE_TYPE (var
)),
4917 NULL_TREE
, NULL_TREE
);
4918 location_t loc
= gimple_location (use_stmt
);
4919 gimple_set_location (g
, loc
);
4920 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
4922 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
4923 etrue
->probability
= REG_BR_PROB_BASE
/ 2;
4924 etrue
->count
= cond_bb
->count
/ 2;
4925 edge efalse
= find_edge (cond_bb
, then_bb
);
4926 efalse
->flags
= EDGE_FALSE_VALUE
;
4927 efalse
->probability
-= etrue
->probability
;
4928 efalse
->count
-= etrue
->count
;
4929 then_bb
->count
-= etrue
->count
;
4931 tree othervar
= NULL_TREE
;
4932 if (gimple_assign_rhs1 (use_stmt
) == var
)
4933 othervar
= gimple_assign_rhs2 (use_stmt
);
4934 else if (gimple_assign_rhs2 (use_stmt
) == var
)
4935 othervar
= gimple_assign_rhs1 (use_stmt
);
4938 tree lhs
= gimple_assign_lhs (use_stmt
);
4939 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
4940 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
4941 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
4942 gsi
= gsi_for_stmt (use_stmt
);
4943 gsi_remove (&gsi
, true);
4945 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
4946 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
4948 reassoc_branch_fixups
.release ();
4951 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4952 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4954 /* Dump the operand entry vector OPS to FILE. */
4957 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4962 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4964 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4965 print_generic_expr (file
, oe
->op
, 0);
4969 /* Dump the operand entry vector OPS to STDERR. */
4972 debug_ops_vector (vec
<operand_entry_t
> ops
)
4974 dump_ops_vector (stderr
, ops
);
4980 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
4981 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
4984 /* Initialize the reassociation pass. */
4991 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
4993 /* Find the loops, so that we can prevent moving calculations in
4995 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
4997 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
4999 operand_entry_pool
= create_alloc_pool ("operand entry pool",
5000 sizeof (struct operand_entry
), 30);
5001 next_operand_entry_id
= 0;
5003 /* Reverse RPO (Reverse Post Order) will give us something where
5004 deeper loops come later. */
5005 pre_and_rev_post_order_compute (NULL
, bbs
, false);
5006 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
5007 operand_rank
= new hash_map
<tree
, long>;
5009 /* Give each default definition a distinct rank. This includes
5010 parameters and the static chain. Walk backwards over all
5011 SSA names so that we get proper rank ordering according
5012 to tree_swap_operands_p. */
5013 for (i
= num_ssa_names
- 1; i
> 0; --i
)
5015 tree name
= ssa_name (i
);
5016 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
5017 insert_operand_rank (name
, ++rank
);
5020 /* Set up rank for each BB */
5021 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
5022 bb_rank
[bbs
[i
]] = ++rank
<< 16;
5025 calculate_dominance_info (CDI_POST_DOMINATORS
);
5026 plus_negates
= vNULL
;
5029 /* Cleanup after the reassociation pass, and print stats if
5035 statistics_counter_event (cfun
, "Linearized",
5036 reassociate_stats
.linearized
);
5037 statistics_counter_event (cfun
, "Constants eliminated",
5038 reassociate_stats
.constants_eliminated
);
5039 statistics_counter_event (cfun
, "Ops eliminated",
5040 reassociate_stats
.ops_eliminated
);
5041 statistics_counter_event (cfun
, "Statements rewritten",
5042 reassociate_stats
.rewritten
);
5043 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
5044 reassociate_stats
.pows_encountered
);
5045 statistics_counter_event (cfun
, "Built-in powi calls created",
5046 reassociate_stats
.pows_created
);
5048 delete operand_rank
;
5049 free_alloc_pool (operand_entry_pool
);
5051 plus_negates
.release ();
5052 free_dominance_info (CDI_POST_DOMINATORS
);
5053 loop_optimizer_finalize ();
5056 /* Gate and execute functions for Reassociation. */
5059 execute_reassoc (void)
5064 repropagate_negates ();
5073 const pass_data pass_data_reassoc
=
5075 GIMPLE_PASS
, /* type */
5076 "reassoc", /* name */
5077 OPTGROUP_NONE
, /* optinfo_flags */
5078 TV_TREE_REASSOC
, /* tv_id */
5079 ( PROP_cfg
| PROP_ssa
), /* properties_required */
5080 0, /* properties_provided */
5081 0, /* properties_destroyed */
5082 0, /* todo_flags_start */
5083 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
5086 class pass_reassoc
: public gimple_opt_pass
5089 pass_reassoc (gcc::context
*ctxt
)
5090 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
5093 /* opt_pass methods: */
5094 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
5095 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
5096 virtual unsigned int execute (function
*) { return execute_reassoc (); }
5098 }; // class pass_reassoc
5103 make_pass_reassoc (gcc::context
*ctxt
)
5105 return new pass_reassoc (ctxt
);