1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
31 #include "fold-const.h"
32 #include "stor-layout.h"
34 #include "hard-reg-set.h"
36 #include "dominance.h"
39 #include "basic-block.h"
40 #include "gimple-pretty-print.h"
41 #include "tree-inline.h"
42 #include "tree-ssa-alias.h"
43 #include "internal-fn.h"
44 #include "gimple-fold.h"
46 #include "gimple-expr.h"
49 #include "gimple-iterator.h"
50 #include "gimplify-me.h"
51 #include "gimple-ssa.h"
53 #include "tree-phinodes.h"
54 #include "ssa-iterators.h"
55 #include "stringpool.h"
56 #include "tree-ssanames.h"
57 #include "tree-ssa-loop-niter.h"
58 #include "tree-ssa-loop.h"
60 #include "insn-config.h"
71 #include "tree-iterator.h"
72 #include "tree-pass.h"
73 #include "alloc-pool.h"
74 #include "langhooks.h"
78 #include "diagnostic-core.h"
81 #include "insn-codes.h"
84 /* This is a simple global reassociation pass. It is, in part, based
85 on the LLVM pass of the same name (They do some things more/less
86 than we do, in different orders, etc).
88 It consists of five steps:
90 1. Breaking up subtract operations into addition + negate, where
91 it would promote the reassociation of adds.
93 2. Left linearization of the expression trees, so that (A+B)+(C+D)
94 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
95 During linearization, we place the operands of the binary
96 expressions into a vector of operand_entry_t
98 3. Optimization of the operand lists, eliminating things like a +
101 3a. Combine repeated factors with the same occurrence counts
102 into a __builtin_powi call that will later be optimized into
103 an optimal number of multiplies.
105 4. Rewrite the expression trees we linearized and optimized so
106 they are in proper rank order.
108 5. Repropagate negates, as nothing else will clean it up ATM.
110 A bit of theory on #4, since nobody seems to write anything down
111 about why it makes sense to do it the way they do it:
113 We could do this much nicer theoretically, but don't (for reasons
114 explained after how to do it theoretically nice :P).
116 In order to promote the most redundancy elimination, you want
117 binary expressions whose operands are the same rank (or
118 preferably, the same value) exposed to the redundancy eliminator,
119 for possible elimination.
121 So the way to do this if we really cared, is to build the new op
122 tree from the leaves to the roots, merging as you go, and putting the
123 new op on the end of the worklist, until you are left with one
124 thing on the worklist.
126 IE if you have to rewrite the following set of operands (listed with
127 rank in parentheses), with opcode PLUS_EXPR:
129 a (1), b (1), c (1), d (2), e (2)
132 We start with our merge worklist empty, and the ops list with all of
135 You want to first merge all leaves of the same rank, as much as
138 So first build a binary op of
140 mergetmp = a + b, and put "mergetmp" on the merge worklist.
142 Because there is no three operand form of PLUS_EXPR, c is not going to
143 be exposed to redundancy elimination as a rank 1 operand.
145 So you might as well throw it on the merge worklist (you could also
146 consider it to now be a rank two operand, and merge it with d and e,
147 but in this case, you then have evicted e from a binary op. So at
148 least in this situation, you can't win.)
150 Then build a binary op of d + e
153 and put mergetmp2 on the merge worklist.
155 so merge worklist = {mergetmp, c, mergetmp2}
157 Continue building binary ops of these operations until you have only
158 one operation left on the worklist.
163 mergetmp3 = mergetmp + c
165 worklist = {mergetmp2, mergetmp3}
167 mergetmp4 = mergetmp2 + mergetmp3
169 worklist = {mergetmp4}
171 because we have one operation left, we can now just set the original
172 statement equal to the result of that operation.
174 This will at least expose a + b and d + e to redundancy elimination
175 as binary operations.
177 For extra points, you can reuse the old statements to build the
178 mergetmps, since you shouldn't run out.
180 So why don't we do this?
182 Because it's expensive, and rarely will help. Most trees we are
183 reassociating have 3 or less ops. If they have 2 ops, they already
184 will be written into a nice single binary op. If you have 3 ops, a
185 single simple check suffices to tell you whether the first two are of the
186 same rank. If so, you know to order it
189 newstmt = mergetmp + op3
193 newstmt = mergetmp + op1
195 If all three are of the same rank, you can't expose them all in a
196 single binary operator anyway, so the above is *still* the best you
199 Thus, this is what we do. When we have three ops left, we check to see
200 what order to put them in, and call it a day. As a nod to vector sum
201 reduction, we check if any of the ops are really a phi node that is a
202 destructive update for the associating op, and keep the destructive
203 update together for vector sum reduction recognition. */
210 int constants_eliminated
;
213 int pows_encountered
;
217 /* Operator, rank pair. */
218 typedef struct operand_entry
226 static pool_allocator
<operand_entry
> operand_entry_pool ("operand entry pool",
229 /* This is used to assign a unique ID to each struct operand_entry
230 so that qsort results are identical on different hosts. */
231 static int next_operand_entry_id
;
233 /* Starting rank number for a given basic block, so that we can rank
234 operations using unmovable instructions in that BB based on the bb
236 static long *bb_rank
;
238 /* Operand->rank hashtable. */
239 static hash_map
<tree
, long> *operand_rank
;
241 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
242 all basic blocks the CFG should be adjusted - basic blocks
243 split right after that SSA_NAME's definition statement and before
244 the only use, which must be a bit ior. */
245 static vec
<tree
> reassoc_branch_fixups
;
248 static long get_rank (tree
);
249 static bool reassoc_stmt_dominates_stmt_p (gimple
, gimple
);
251 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
252 possibly added by gsi_remove. */
255 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
257 gimple stmt
= gsi_stmt (*gsi
);
259 if (!MAY_HAVE_DEBUG_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
260 return gsi_remove (gsi
, true);
262 gimple_stmt_iterator prev
= *gsi
;
264 unsigned uid
= gimple_uid (stmt
);
265 basic_block bb
= gimple_bb (stmt
);
266 bool ret
= gsi_remove (gsi
, true);
267 if (!gsi_end_p (prev
))
270 prev
= gsi_start_bb (bb
);
271 gimple end_stmt
= gsi_stmt (*gsi
);
272 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
274 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
275 gimple_set_uid (stmt
, uid
);
281 /* Bias amount for loop-carried phis. We want this to be larger than
282 the depth of any reassociation tree we can see, but not larger than
283 the rank difference between two blocks. */
284 #define PHI_LOOP_BIAS (1 << 15)
286 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
287 an innermost loop, and the phi has only a single use which is inside
288 the loop, then the rank is the block rank of the loop latch plus an
289 extra bias for the loop-carried dependence. This causes expressions
290 calculated into an accumulator variable to be independent for each
291 iteration of the loop. If STMT is some other phi, the rank is the
292 block rank of its containing block. */
294 phi_rank (gimple stmt
)
296 basic_block bb
= gimple_bb (stmt
);
297 struct loop
*father
= bb
->loop_father
;
303 /* We only care about real loops (those with a latch). */
305 return bb_rank
[bb
->index
];
307 /* Interesting phis must be in headers of innermost loops. */
308 if (bb
!= father
->header
310 return bb_rank
[bb
->index
];
312 /* Ignore virtual SSA_NAMEs. */
313 res
= gimple_phi_result (stmt
);
314 if (virtual_operand_p (res
))
315 return bb_rank
[bb
->index
];
317 /* The phi definition must have a single use, and that use must be
318 within the loop. Otherwise this isn't an accumulator pattern. */
319 if (!single_imm_use (res
, &use
, &use_stmt
)
320 || gimple_bb (use_stmt
)->loop_father
!= father
)
321 return bb_rank
[bb
->index
];
323 /* Look for phi arguments from within the loop. If found, bias this phi. */
324 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
326 tree arg
= gimple_phi_arg_def (stmt
, i
);
327 if (TREE_CODE (arg
) == SSA_NAME
328 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
330 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
331 if (gimple_bb (def_stmt
)->loop_father
== father
)
332 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
336 /* Must be an uninteresting phi. */
337 return bb_rank
[bb
->index
];
340 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
341 loop-carried dependence of an innermost loop, return TRUE; else
344 loop_carried_phi (tree exp
)
349 if (TREE_CODE (exp
) != SSA_NAME
350 || SSA_NAME_IS_DEFAULT_DEF (exp
))
353 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
355 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
358 /* Non-loop-carried phis have block rank. Loop-carried phis have
359 an additional bias added in. If this phi doesn't have block rank,
360 it's biased and should not be propagated. */
361 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
363 if (phi_rank (phi_stmt
) != block_rank
)
369 /* Return the maximum of RANK and the rank that should be propagated
370 from expression OP. For most operands, this is just the rank of OP.
371 For loop-carried phis, the value is zero to avoid undoing the bias
372 in favor of the phi. */
374 propagate_rank (long rank
, tree op
)
378 if (loop_carried_phi (op
))
381 op_rank
= get_rank (op
);
383 return MAX (rank
, op_rank
);
386 /* Look up the operand rank structure for expression E. */
389 find_operand_rank (tree e
)
391 long *slot
= operand_rank
->get (e
);
392 return slot
? *slot
: -1;
395 /* Insert {E,RANK} into the operand rank hashtable. */
398 insert_operand_rank (tree e
, long rank
)
400 gcc_assert (rank
> 0);
401 gcc_assert (!operand_rank
->put (e
, rank
));
404 /* Given an expression E, return the rank of the expression. */
409 /* SSA_NAME's have the rank of the expression they are the result
411 For globals and uninitialized values, the rank is 0.
412 For function arguments, use the pre-setup rank.
413 For PHI nodes, stores, asm statements, etc, we use the rank of
415 For simple operations, the rank is the maximum rank of any of
416 its operands, or the bb_rank, whichever is less.
417 I make no claims that this is optimal, however, it gives good
420 /* We make an exception to the normal ranking system to break
421 dependences of accumulator variables in loops. Suppose we
422 have a simple one-block loop containing:
429 As shown, each iteration of the calculation into x is fully
430 dependent upon the iteration before it. We would prefer to
431 see this in the form:
438 If the loop is unrolled, the calculations of b and c from
439 different iterations can be interleaved.
441 To obtain this result during reassociation, we bias the rank
442 of the phi definition x_1 upward, when it is recognized as an
443 accumulator pattern. The artificial rank causes it to be
444 added last, providing the desired independence. */
446 if (TREE_CODE (e
) == SSA_NAME
)
453 if (SSA_NAME_IS_DEFAULT_DEF (e
))
454 return find_operand_rank (e
);
456 stmt
= SSA_NAME_DEF_STMT (e
);
457 if (gimple_code (stmt
) == GIMPLE_PHI
)
458 return phi_rank (stmt
);
460 if (!is_gimple_assign (stmt
))
461 return bb_rank
[gimple_bb (stmt
)->index
];
463 /* If we already have a rank for this expression, use that. */
464 rank
= find_operand_rank (e
);
468 /* Otherwise, find the maximum rank for the operands. As an
469 exception, remove the bias from loop-carried phis when propagating
470 the rank so that dependent operations are not also biased. */
471 /* Simply walk over all SSA uses - this takes advatage of the
472 fact that non-SSA operands are is_gimple_min_invariant and
475 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
476 rank
= propagate_rank (rank
, op
);
478 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
480 fprintf (dump_file
, "Rank for ");
481 print_generic_expr (dump_file
, e
, 0);
482 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
485 /* Note the rank in the hashtable so we don't recompute it. */
486 insert_operand_rank (e
, (rank
+ 1));
490 /* Constants, globals, etc., are rank 0 */
495 /* We want integer ones to end up last no matter what, since they are
496 the ones we can do the most with. */
497 #define INTEGER_CONST_TYPE 1 << 3
498 #define FLOAT_CONST_TYPE 1 << 2
499 #define OTHER_CONST_TYPE 1 << 1
501 /* Classify an invariant tree into integer, float, or other, so that
502 we can sort them to be near other constants of the same type. */
504 constant_type (tree t
)
506 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
507 return INTEGER_CONST_TYPE
;
508 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
509 return FLOAT_CONST_TYPE
;
511 return OTHER_CONST_TYPE
;
514 /* qsort comparison function to sort operand entries PA and PB by rank
515 so that the sorted array is ordered by rank in decreasing order. */
517 sort_by_operand_rank (const void *pa
, const void *pb
)
519 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
520 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
522 /* It's nicer for optimize_expression if constants that are likely
523 to fold when added/multiplied//whatever are put next to each
524 other. Since all constants have rank 0, order them by type. */
525 if (oeb
->rank
== 0 && oea
->rank
== 0)
527 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
528 return constant_type (oeb
->op
) - constant_type (oea
->op
);
530 /* To make sorting result stable, we use unique IDs to determine
532 return oeb
->id
- oea
->id
;
535 /* Lastly, make sure the versions that are the same go next to each
537 if ((oeb
->rank
- oea
->rank
== 0)
538 && TREE_CODE (oea
->op
) == SSA_NAME
539 && TREE_CODE (oeb
->op
) == SSA_NAME
)
541 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
542 versions of removed SSA_NAMEs, so if possible, prefer to sort
543 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
545 if (!SSA_NAME_IS_DEFAULT_DEF (oea
->op
)
546 && !SSA_NAME_IS_DEFAULT_DEF (oeb
->op
)
547 && SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
549 gimple stmta
= SSA_NAME_DEF_STMT (oea
->op
);
550 gimple stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
551 basic_block bba
= gimple_bb (stmta
);
552 basic_block bbb
= gimple_bb (stmtb
);
555 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
556 return bb_rank
[bbb
->index
] - bb_rank
[bba
->index
];
560 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
561 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
567 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
568 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
570 return oeb
->id
- oea
->id
;
573 if (oeb
->rank
!= oea
->rank
)
574 return oeb
->rank
- oea
->rank
;
576 return oeb
->id
- oea
->id
;
579 /* Add an operand entry to *OPS for the tree operand OP. */
582 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
584 operand_entry_t oe
= operand_entry_pool
.allocate ();
587 oe
->rank
= get_rank (op
);
588 oe
->id
= next_operand_entry_id
++;
593 /* Add an operand entry to *OPS for the tree operand OP with repeat
597 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
598 HOST_WIDE_INT repeat
)
600 operand_entry_t oe
= operand_entry_pool
.allocate ();
603 oe
->rank
= get_rank (op
);
604 oe
->id
= next_operand_entry_id
++;
608 reassociate_stats
.pows_encountered
++;
611 /* Return true if STMT is reassociable operation containing a binary
612 operation with tree code CODE, and is inside LOOP. */
615 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
617 basic_block bb
= gimple_bb (stmt
);
619 if (gimple_bb (stmt
) == NULL
)
622 if (!flow_bb_inside_loop_p (loop
, bb
))
625 if (is_gimple_assign (stmt
)
626 && gimple_assign_rhs_code (stmt
) == code
627 && has_single_use (gimple_assign_lhs (stmt
)))
634 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
635 operand of the negate operation. Otherwise, return NULL. */
638 get_unary_op (tree name
, enum tree_code opcode
)
640 gimple stmt
= SSA_NAME_DEF_STMT (name
);
642 if (!is_gimple_assign (stmt
))
645 if (gimple_assign_rhs_code (stmt
) == opcode
)
646 return gimple_assign_rhs1 (stmt
);
650 /* If CURR and LAST are a pair of ops that OPCODE allows us to
651 eliminate through equivalences, do so, remove them from OPS, and
652 return true. Otherwise, return false. */
655 eliminate_duplicate_pair (enum tree_code opcode
,
656 vec
<operand_entry_t
> *ops
,
659 operand_entry_t curr
,
660 operand_entry_t last
)
663 /* If we have two of the same op, and the opcode is & |, min, or max,
664 we can eliminate one of them.
665 If we have two of the same op, and the opcode is ^, we can
666 eliminate both of them. */
668 if (last
&& last
->op
== curr
->op
)
676 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
678 fprintf (dump_file
, "Equivalence: ");
679 print_generic_expr (dump_file
, curr
->op
, 0);
680 fprintf (dump_file
, " [&|minmax] ");
681 print_generic_expr (dump_file
, last
->op
, 0);
682 fprintf (dump_file
, " -> ");
683 print_generic_stmt (dump_file
, last
->op
, 0);
686 ops
->ordered_remove (i
);
687 reassociate_stats
.ops_eliminated
++;
692 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
694 fprintf (dump_file
, "Equivalence: ");
695 print_generic_expr (dump_file
, curr
->op
, 0);
696 fprintf (dump_file
, " ^ ");
697 print_generic_expr (dump_file
, last
->op
, 0);
698 fprintf (dump_file
, " -> nothing\n");
701 reassociate_stats
.ops_eliminated
+= 2;
703 if (ops
->length () == 2)
706 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
711 ops
->ordered_remove (i
-1);
712 ops
->ordered_remove (i
-1);
724 static vec
<tree
> plus_negates
;
726 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
727 expression, look in OPS for a corresponding positive operation to cancel
728 it out. If we find one, remove the other from OPS, replace
729 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
733 eliminate_plus_minus_pair (enum tree_code opcode
,
734 vec
<operand_entry_t
> *ops
,
735 unsigned int currindex
,
736 operand_entry_t curr
)
743 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
746 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
747 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
748 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
751 /* Any non-negated version will have a rank that is one less than
752 the current rank. So once we hit those ranks, if we don't find
755 for (i
= currindex
+ 1;
756 ops
->iterate (i
, &oe
)
757 && oe
->rank
>= curr
->rank
- 1 ;
760 if (oe
->op
== negateop
)
763 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
765 fprintf (dump_file
, "Equivalence: ");
766 print_generic_expr (dump_file
, negateop
, 0);
767 fprintf (dump_file
, " + -");
768 print_generic_expr (dump_file
, oe
->op
, 0);
769 fprintf (dump_file
, " -> 0\n");
772 ops
->ordered_remove (i
);
773 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
774 ops
->ordered_remove (currindex
);
775 reassociate_stats
.ops_eliminated
++;
779 else if (oe
->op
== notop
)
781 tree op_type
= TREE_TYPE (oe
->op
);
783 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
785 fprintf (dump_file
, "Equivalence: ");
786 print_generic_expr (dump_file
, notop
, 0);
787 fprintf (dump_file
, " + ~");
788 print_generic_expr (dump_file
, oe
->op
, 0);
789 fprintf (dump_file
, " -> -1\n");
792 ops
->ordered_remove (i
);
793 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
794 ops
->ordered_remove (currindex
);
795 reassociate_stats
.ops_eliminated
++;
801 /* CURR->OP is a negate expr in a plus expr: save it for later
802 inspection in repropagate_negates(). */
803 if (negateop
!= NULL_TREE
)
804 plus_negates
.safe_push (curr
->op
);
809 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
810 bitwise not expression, look in OPS for a corresponding operand to
811 cancel it out. If we find one, remove the other from OPS, replace
812 OPS[CURRINDEX] with 0, and return true. Otherwise, return
816 eliminate_not_pairs (enum tree_code opcode
,
817 vec
<operand_entry_t
> *ops
,
818 unsigned int currindex
,
819 operand_entry_t curr
)
825 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
826 || TREE_CODE (curr
->op
) != SSA_NAME
)
829 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
830 if (notop
== NULL_TREE
)
833 /* Any non-not version will have a rank that is one less than
834 the current rank. So once we hit those ranks, if we don't find
837 for (i
= currindex
+ 1;
838 ops
->iterate (i
, &oe
)
839 && oe
->rank
>= curr
->rank
- 1;
844 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
846 fprintf (dump_file
, "Equivalence: ");
847 print_generic_expr (dump_file
, notop
, 0);
848 if (opcode
== BIT_AND_EXPR
)
849 fprintf (dump_file
, " & ~");
850 else if (opcode
== BIT_IOR_EXPR
)
851 fprintf (dump_file
, " | ~");
852 print_generic_expr (dump_file
, oe
->op
, 0);
853 if (opcode
== BIT_AND_EXPR
)
854 fprintf (dump_file
, " -> 0\n");
855 else if (opcode
== BIT_IOR_EXPR
)
856 fprintf (dump_file
, " -> -1\n");
859 if (opcode
== BIT_AND_EXPR
)
860 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
861 else if (opcode
== BIT_IOR_EXPR
)
862 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
864 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
866 ops
->quick_push (oe
);
874 /* Use constant value that may be present in OPS to try to eliminate
875 operands. Note that this function is only really used when we've
876 eliminated ops for other reasons, or merged constants. Across
877 single statements, fold already does all of this, plus more. There
878 is little point in duplicating logic, so I've only included the
879 identities that I could ever construct testcases to trigger. */
882 eliminate_using_constants (enum tree_code opcode
,
883 vec
<operand_entry_t
> *ops
)
885 operand_entry_t oelast
= ops
->last ();
886 tree type
= TREE_TYPE (oelast
->op
);
888 if (oelast
->rank
== 0
889 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
894 if (integer_zerop (oelast
->op
))
896 if (ops
->length () != 1)
898 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
899 fprintf (dump_file
, "Found & 0, removing all other ops\n");
901 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
904 ops
->quick_push (oelast
);
908 else if (integer_all_onesp (oelast
->op
))
910 if (ops
->length () != 1)
912 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
913 fprintf (dump_file
, "Found & -1, removing\n");
915 reassociate_stats
.ops_eliminated
++;
920 if (integer_all_onesp (oelast
->op
))
922 if (ops
->length () != 1)
924 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
925 fprintf (dump_file
, "Found | -1, removing all other ops\n");
927 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
930 ops
->quick_push (oelast
);
934 else if (integer_zerop (oelast
->op
))
936 if (ops
->length () != 1)
938 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
939 fprintf (dump_file
, "Found | 0, removing\n");
941 reassociate_stats
.ops_eliminated
++;
946 if (integer_zerop (oelast
->op
)
947 || (FLOAT_TYPE_P (type
)
948 && !HONOR_NANS (type
)
949 && !HONOR_SIGNED_ZEROS (type
)
950 && real_zerop (oelast
->op
)))
952 if (ops
->length () != 1)
954 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
955 fprintf (dump_file
, "Found * 0, removing all other ops\n");
957 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
959 ops
->quick_push (oelast
);
963 else if (integer_onep (oelast
->op
)
964 || (FLOAT_TYPE_P (type
)
965 && !HONOR_SNANS (type
)
966 && real_onep (oelast
->op
)))
968 if (ops
->length () != 1)
970 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
971 fprintf (dump_file
, "Found * 1, removing\n");
973 reassociate_stats
.ops_eliminated
++;
981 if (integer_zerop (oelast
->op
)
982 || (FLOAT_TYPE_P (type
)
983 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
984 && fold_real_zero_addition_p (type
, oelast
->op
,
985 opcode
== MINUS_EXPR
)))
987 if (ops
->length () != 1)
989 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
990 fprintf (dump_file
, "Found [|^+] 0, removing\n");
992 reassociate_stats
.ops_eliminated
++;
1004 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
1007 /* Structure for tracking and counting operands. */
1008 typedef struct oecount_s
{
1011 enum tree_code oecode
;
1016 /* The heap for the oecount hashtable and the sorted list of operands. */
1017 static vec
<oecount
> cvec
;
1020 /* Oecount hashtable helpers. */
1022 struct oecount_hasher
1024 typedef int value_type
;
1025 typedef int compare_type
;
1026 static inline hashval_t
hash (const value_type
&);
1027 static inline bool equal (const value_type
&, const compare_type
&);
1028 static bool is_deleted (int &v
) { return v
== 1; }
1029 static void mark_deleted (int &e
) { e
= 1; }
1030 static bool is_empty (int &v
) { return v
== 0; }
1031 static void mark_empty (int &e
) { e
= 0; }
1032 static void remove (int &) {}
1035 /* Hash function for oecount. */
1038 oecount_hasher::hash (const value_type
&p
)
1040 const oecount
*c
= &cvec
[p
- 42];
1041 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1044 /* Comparison function for oecount. */
1047 oecount_hasher::equal (const value_type
&p1
, const compare_type
&p2
)
1049 const oecount
*c1
= &cvec
[p1
- 42];
1050 const oecount
*c2
= &cvec
[p2
- 42];
1051 return (c1
->oecode
== c2
->oecode
1052 && c1
->op
== c2
->op
);
1055 /* Comparison function for qsort sorting oecount elements by count. */
1058 oecount_cmp (const void *p1
, const void *p2
)
1060 const oecount
*c1
= (const oecount
*)p1
;
1061 const oecount
*c2
= (const oecount
*)p2
;
1062 if (c1
->cnt
!= c2
->cnt
)
1063 return c1
->cnt
- c2
->cnt
;
1065 /* If counts are identical, use unique IDs to stabilize qsort. */
1066 return c1
->id
- c2
->id
;
1069 /* Return TRUE iff STMT represents a builtin call that raises OP
1070 to some exponent. */
1073 stmt_is_power_of_op (gimple stmt
, tree op
)
1077 if (!is_gimple_call (stmt
))
1080 fndecl
= gimple_call_fndecl (stmt
);
1083 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1086 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1088 CASE_FLT_FN (BUILT_IN_POW
):
1089 CASE_FLT_FN (BUILT_IN_POWI
):
1090 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1097 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1098 in place and return the result. Assumes that stmt_is_power_of_op
1099 was previously called for STMT and returned TRUE. */
1101 static HOST_WIDE_INT
1102 decrement_power (gimple stmt
)
1104 REAL_VALUE_TYPE c
, cint
;
1105 HOST_WIDE_INT power
;
1108 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1110 CASE_FLT_FN (BUILT_IN_POW
):
1111 arg1
= gimple_call_arg (stmt
, 1);
1112 c
= TREE_REAL_CST (arg1
);
1113 power
= real_to_integer (&c
) - 1;
1114 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1115 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1118 CASE_FLT_FN (BUILT_IN_POWI
):
1119 arg1
= gimple_call_arg (stmt
, 1);
1120 power
= TREE_INT_CST_LOW (arg1
) - 1;
1121 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1129 /* Find the single immediate use of STMT's LHS, and replace it
1130 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1131 replace *DEF with OP as well. */
1134 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1139 gimple_stmt_iterator gsi
;
1141 if (is_gimple_call (stmt
))
1142 lhs
= gimple_call_lhs (stmt
);
1144 lhs
= gimple_assign_lhs (stmt
);
1146 gcc_assert (has_single_use (lhs
));
1147 single_imm_use (lhs
, &use
, &use_stmt
);
1151 if (TREE_CODE (op
) != SSA_NAME
)
1152 update_stmt (use_stmt
);
1153 gsi
= gsi_for_stmt (stmt
);
1154 unlink_stmt_vdef (stmt
);
1155 reassoc_remove_stmt (&gsi
);
1156 release_defs (stmt
);
1159 /* Walks the linear chain with result *DEF searching for an operation
1160 with operand OP and code OPCODE removing that from the chain. *DEF
1161 is updated if there is only one operand but no operation left. */
1164 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1166 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1172 if (opcode
== MULT_EXPR
1173 && stmt_is_power_of_op (stmt
, op
))
1175 if (decrement_power (stmt
) == 1)
1176 propagate_op_to_single_use (op
, stmt
, def
);
1180 name
= gimple_assign_rhs1 (stmt
);
1182 /* If this is the operation we look for and one of the operands
1183 is ours simply propagate the other operand into the stmts
1185 if (gimple_assign_rhs_code (stmt
) == opcode
1187 || gimple_assign_rhs2 (stmt
) == op
))
1190 name
= gimple_assign_rhs2 (stmt
);
1191 propagate_op_to_single_use (name
, stmt
, def
);
1195 /* We might have a multiply of two __builtin_pow* calls, and
1196 the operand might be hiding in the rightmost one. */
1197 if (opcode
== MULT_EXPR
1198 && gimple_assign_rhs_code (stmt
) == opcode
1199 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1200 && has_single_use (gimple_assign_rhs2 (stmt
)))
1202 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1203 if (stmt_is_power_of_op (stmt2
, op
))
1205 if (decrement_power (stmt2
) == 1)
1206 propagate_op_to_single_use (op
, stmt2
, def
);
1211 /* Continue walking the chain. */
1212 gcc_assert (name
!= op
1213 && TREE_CODE (name
) == SSA_NAME
);
1214 stmt
= SSA_NAME_DEF_STMT (name
);
1219 /* Returns true if statement S1 dominates statement S2. Like
1220 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1223 reassoc_stmt_dominates_stmt_p (gimple s1
, gimple s2
)
1225 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1227 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1228 SSA_NAME. Assume it lives at the beginning of function and
1229 thus dominates everything. */
1230 if (!bb1
|| s1
== s2
)
1233 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1239 /* PHIs in the same basic block are assumed to be
1240 executed all in parallel, if only one stmt is a PHI,
1241 it dominates the other stmt in the same basic block. */
1242 if (gimple_code (s1
) == GIMPLE_PHI
)
1245 if (gimple_code (s2
) == GIMPLE_PHI
)
1248 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1250 if (gimple_uid (s1
) < gimple_uid (s2
))
1253 if (gimple_uid (s1
) > gimple_uid (s2
))
1256 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1257 unsigned int uid
= gimple_uid (s1
);
1258 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1260 gimple s
= gsi_stmt (gsi
);
1261 if (gimple_uid (s
) != uid
)
1270 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1273 /* Insert STMT after INSERT_POINT. */
1276 insert_stmt_after (gimple stmt
, gimple insert_point
)
1278 gimple_stmt_iterator gsi
;
1281 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1282 bb
= gimple_bb (insert_point
);
1283 else if (!stmt_ends_bb_p (insert_point
))
1285 gsi
= gsi_for_stmt (insert_point
);
1286 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1287 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1291 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1292 thus if it must end a basic block, it should be a call that can
1293 throw, or some assignment that can throw. If it throws, the LHS
1294 of it will not be initialized though, so only valid places using
1295 the SSA_NAME should be dominated by the fallthru edge. */
1296 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1297 gsi
= gsi_after_labels (bb
);
1298 if (gsi_end_p (gsi
))
1300 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1301 gimple_set_uid (stmt
,
1302 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1305 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1306 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1309 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1310 the result. Places the statement after the definition of either
1311 OP1 or OP2. Returns the new statement. */
1314 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1316 gimple op1def
= NULL
, op2def
= NULL
;
1317 gimple_stmt_iterator gsi
;
1321 /* Create the addition statement. */
1322 op
= make_ssa_name (type
);
1323 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1325 /* Find an insertion place and insert. */
1326 if (TREE_CODE (op1
) == SSA_NAME
)
1327 op1def
= SSA_NAME_DEF_STMT (op1
);
1328 if (TREE_CODE (op2
) == SSA_NAME
)
1329 op2def
= SSA_NAME_DEF_STMT (op2
);
1330 if ((!op1def
|| gimple_nop_p (op1def
))
1331 && (!op2def
|| gimple_nop_p (op2def
)))
1333 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1334 if (gsi_end_p (gsi
))
1336 gimple_stmt_iterator gsi2
1337 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1338 gimple_set_uid (sum
,
1339 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1342 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1343 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1347 gimple insert_point
;
1348 if ((!op1def
|| gimple_nop_p (op1def
))
1349 || (op2def
&& !gimple_nop_p (op2def
)
1350 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1351 insert_point
= op2def
;
1353 insert_point
= op1def
;
1354 insert_stmt_after (sum
, insert_point
);
1361 /* Perform un-distribution of divisions and multiplications.
1362 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1363 to (A + B) / X for real X.
1365 The algorithm is organized as follows.
1367 - First we walk the addition chain *OPS looking for summands that
1368 are defined by a multiplication or a real division. This results
1369 in the candidates bitmap with relevant indices into *OPS.
1371 - Second we build the chains of multiplications or divisions for
1372 these candidates, counting the number of occurrences of (operand, code)
1373 pairs in all of the candidates chains.
1375 - Third we sort the (operand, code) pairs by number of occurrence and
1376 process them starting with the pair with the most uses.
1378 * For each such pair we walk the candidates again to build a
1379 second candidate bitmap noting all multiplication/division chains
1380 that have at least one occurrence of (operand, code).
1382 * We build an alternate addition chain only covering these
1383 candidates with one (operand, code) operation removed from their
1384 multiplication/division chain.
1386 * The first candidate gets replaced by the alternate addition chain
1387 multiplied/divided by the operand.
1389 * All candidate chains get disabled for further processing and
1390 processing of (operand, code) pairs continues.
1392 The alternate addition chains built are re-processed by the main
1393 reassociation algorithm which allows optimizing a * x * y + b * y * x
1394 to (a + b ) * x * y in one invocation of the reassociation pass. */
1397 undistribute_ops_list (enum tree_code opcode
,
1398 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1400 unsigned int length
= ops
->length ();
1401 operand_entry_t oe1
;
1403 sbitmap candidates
, candidates2
;
1404 unsigned nr_candidates
, nr_candidates2
;
1405 sbitmap_iterator sbi0
;
1406 vec
<operand_entry_t
> *subops
;
1407 bool changed
= false;
1408 int next_oecount_id
= 0;
1411 || opcode
!= PLUS_EXPR
)
1414 /* Build a list of candidates to process. */
1415 candidates
= sbitmap_alloc (length
);
1416 bitmap_clear (candidates
);
1418 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1420 enum tree_code dcode
;
1423 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1425 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1426 if (!is_gimple_assign (oe1def
))
1428 dcode
= gimple_assign_rhs_code (oe1def
);
1429 if ((dcode
!= MULT_EXPR
1430 && dcode
!= RDIV_EXPR
)
1431 || !is_reassociable_op (oe1def
, dcode
, loop
))
1434 bitmap_set_bit (candidates
, i
);
1438 if (nr_candidates
< 2)
1440 sbitmap_free (candidates
);
1444 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1446 fprintf (dump_file
, "searching for un-distribute opportunities ");
1447 print_generic_expr (dump_file
,
1448 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1449 fprintf (dump_file
, " %d\n", nr_candidates
);
1452 /* Build linearized sub-operand lists and the counting table. */
1455 hash_table
<oecount_hasher
> ctable (15);
1457 /* ??? Macro arguments cannot have multi-argument template types in
1458 them. This typedef is needed to workaround that limitation. */
1459 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1460 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1461 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1464 enum tree_code oecode
;
1467 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1468 oecode
= gimple_assign_rhs_code (oedef
);
1469 linearize_expr_tree (&subops
[i
], oedef
,
1470 associative_tree_code (oecode
), false);
1472 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1479 c
.id
= next_oecount_id
++;
1482 idx
= cvec
.length () + 41;
1483 slot
= ctable
.find_slot (idx
, INSERT
);
1491 cvec
[*slot
- 42].cnt
++;
1496 /* Sort the counting table. */
1497 cvec
.qsort (oecount_cmp
);
1499 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1502 fprintf (dump_file
, "Candidates:\n");
1503 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1505 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1506 c
->oecode
== MULT_EXPR
1507 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1508 print_generic_expr (dump_file
, c
->op
, 0);
1509 fprintf (dump_file
, "\n");
1513 /* Process the (operand, code) pairs in order of most occurrence. */
1514 candidates2
= sbitmap_alloc (length
);
1515 while (!cvec
.is_empty ())
1517 oecount
*c
= &cvec
.last ();
1521 /* Now collect the operands in the outer chain that contain
1522 the common operand in their inner chain. */
1523 bitmap_clear (candidates2
);
1525 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1528 enum tree_code oecode
;
1530 tree op
= (*ops
)[i
]->op
;
1532 /* If we undistributed in this chain already this may be
1534 if (TREE_CODE (op
) != SSA_NAME
)
1537 oedef
= SSA_NAME_DEF_STMT (op
);
1538 oecode
= gimple_assign_rhs_code (oedef
);
1539 if (oecode
!= c
->oecode
)
1542 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1544 if (oe1
->op
== c
->op
)
1546 bitmap_set_bit (candidates2
, i
);
1553 if (nr_candidates2
>= 2)
1555 operand_entry_t oe1
, oe2
;
1557 int first
= bitmap_first_set_bit (candidates2
);
1559 /* Build the new addition chain. */
1560 oe1
= (*ops
)[first
];
1561 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1563 fprintf (dump_file
, "Building (");
1564 print_generic_expr (dump_file
, oe1
->op
, 0);
1566 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1567 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1571 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1573 fprintf (dump_file
, " + ");
1574 print_generic_expr (dump_file
, oe2
->op
, 0);
1576 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1577 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1578 oe1
->op
, oe2
->op
, opcode
);
1579 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1581 oe1
->op
= gimple_get_lhs (sum
);
1584 /* Apply the multiplication/division. */
1585 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1586 oe1
->op
, c
->op
, c
->oecode
);
1587 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1589 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1590 print_generic_expr (dump_file
, c
->op
, 0);
1591 fprintf (dump_file
, "\n");
1594 /* Record it in the addition chain and disable further
1595 undistribution with this op. */
1596 oe1
->op
= gimple_assign_lhs (prod
);
1597 oe1
->rank
= get_rank (oe1
->op
);
1598 subops
[first
].release ();
1606 for (i
= 0; i
< ops
->length (); ++i
)
1607 subops
[i
].release ();
1610 sbitmap_free (candidates
);
1611 sbitmap_free (candidates2
);
1616 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1617 expression, examine the other OPS to see if any of them are comparisons
1618 of the same values, which we may be able to combine or eliminate.
1619 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1622 eliminate_redundant_comparison (enum tree_code opcode
,
1623 vec
<operand_entry_t
> *ops
,
1624 unsigned int currindex
,
1625 operand_entry_t curr
)
1628 enum tree_code lcode
, rcode
;
1633 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1636 /* Check that CURR is a comparison. */
1637 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1639 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1640 if (!is_gimple_assign (def1
))
1642 lcode
= gimple_assign_rhs_code (def1
);
1643 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1645 op1
= gimple_assign_rhs1 (def1
);
1646 op2
= gimple_assign_rhs2 (def1
);
1648 /* Now look for a similar comparison in the remaining OPS. */
1649 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1653 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1655 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1656 if (!is_gimple_assign (def2
))
1658 rcode
= gimple_assign_rhs_code (def2
);
1659 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1662 /* If we got here, we have a match. See if we can combine the
1664 if (opcode
== BIT_IOR_EXPR
)
1665 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1666 rcode
, gimple_assign_rhs1 (def2
),
1667 gimple_assign_rhs2 (def2
));
1669 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1670 rcode
, gimple_assign_rhs1 (def2
),
1671 gimple_assign_rhs2 (def2
));
1675 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1676 always give us a boolean_type_node value back. If the original
1677 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1678 we need to convert. */
1679 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1680 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1682 if (TREE_CODE (t
) != INTEGER_CST
1683 && !operand_equal_p (t
, curr
->op
, 0))
1685 enum tree_code subcode
;
1686 tree newop1
, newop2
;
1687 if (!COMPARISON_CLASS_P (t
))
1689 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1690 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1691 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1692 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1696 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1698 fprintf (dump_file
, "Equivalence: ");
1699 print_generic_expr (dump_file
, curr
->op
, 0);
1700 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1701 print_generic_expr (dump_file
, oe
->op
, 0);
1702 fprintf (dump_file
, " -> ");
1703 print_generic_expr (dump_file
, t
, 0);
1704 fprintf (dump_file
, "\n");
1707 /* Now we can delete oe, as it has been subsumed by the new combined
1709 ops
->ordered_remove (i
);
1710 reassociate_stats
.ops_eliminated
++;
1712 /* If t is the same as curr->op, we're done. Otherwise we must
1713 replace curr->op with t. Special case is if we got a constant
1714 back, in which case we add it to the end instead of in place of
1715 the current entry. */
1716 if (TREE_CODE (t
) == INTEGER_CST
)
1718 ops
->ordered_remove (currindex
);
1719 add_to_ops_vec (ops
, t
);
1721 else if (!operand_equal_p (t
, curr
->op
, 0))
1724 enum tree_code subcode
;
1727 gcc_assert (COMPARISON_CLASS_P (t
));
1728 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1729 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1730 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1731 gcc_checking_assert (is_gimple_val (newop1
)
1732 && is_gimple_val (newop2
));
1733 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1734 curr
->op
= gimple_get_lhs (sum
);
1742 /* Perform various identities and other optimizations on the list of
1743 operand entries, stored in OPS. The tree code for the binary
1744 operation between all the operands is OPCODE. */
1747 optimize_ops_list (enum tree_code opcode
,
1748 vec
<operand_entry_t
> *ops
)
1750 unsigned int length
= ops
->length ();
1753 operand_entry_t oelast
= NULL
;
1754 bool iterate
= false;
1759 oelast
= ops
->last ();
1761 /* If the last two are constants, pop the constants off, merge them
1762 and try the next two. */
1763 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1765 operand_entry_t oelm1
= (*ops
)[length
- 2];
1767 if (oelm1
->rank
== 0
1768 && is_gimple_min_invariant (oelm1
->op
)
1769 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1770 TREE_TYPE (oelast
->op
)))
1772 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1773 oelm1
->op
, oelast
->op
);
1775 if (folded
&& is_gimple_min_invariant (folded
))
1777 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1778 fprintf (dump_file
, "Merging constants\n");
1783 add_to_ops_vec (ops
, folded
);
1784 reassociate_stats
.constants_eliminated
++;
1786 optimize_ops_list (opcode
, ops
);
1792 eliminate_using_constants (opcode
, ops
);
1795 for (i
= 0; ops
->iterate (i
, &oe
);)
1799 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1801 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1802 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1803 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1815 length
= ops
->length ();
1816 oelast
= ops
->last ();
1819 optimize_ops_list (opcode
, ops
);
1822 /* The following functions are subroutines to optimize_range_tests and allow
1823 it to try to change a logical combination of comparisons into a range
1827 X == 2 || X == 5 || X == 3 || X == 4
1831 (unsigned) (X - 2) <= 3
1833 For more information see comments above fold_test_range in fold-const.c,
1834 this implementation is for GIMPLE. */
1842 bool strict_overflow_p
;
1843 unsigned int idx
, next
;
1846 /* This is similar to make_range in fold-const.c, but on top of
1847 GIMPLE instead of trees. If EXP is non-NULL, it should be
1848 an SSA_NAME and STMT argument is ignored, otherwise STMT
1849 argument should be a GIMPLE_COND. */
1852 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1856 bool is_bool
, strict_overflow_p
;
1860 r
->strict_overflow_p
= false;
1862 r
->high
= NULL_TREE
;
1863 if (exp
!= NULL_TREE
1864 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1867 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1868 and see if we can refine the range. Some of the cases below may not
1869 happen, but it doesn't seem worth worrying about this. We "continue"
1870 the outer loop when we've changed something; otherwise we "break"
1871 the switch, which will "break" the while. */
1872 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1875 strict_overflow_p
= false;
1877 if (exp
== NULL_TREE
)
1879 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1881 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1886 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1891 enum tree_code code
;
1892 tree arg0
, arg1
, exp_type
;
1896 if (exp
!= NULL_TREE
)
1898 if (TREE_CODE (exp
) != SSA_NAME
1899 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
1902 stmt
= SSA_NAME_DEF_STMT (exp
);
1903 if (!is_gimple_assign (stmt
))
1906 code
= gimple_assign_rhs_code (stmt
);
1907 arg0
= gimple_assign_rhs1 (stmt
);
1908 arg1
= gimple_assign_rhs2 (stmt
);
1909 exp_type
= TREE_TYPE (exp
);
1913 code
= gimple_cond_code (stmt
);
1914 arg0
= gimple_cond_lhs (stmt
);
1915 arg1
= gimple_cond_rhs (stmt
);
1916 exp_type
= boolean_type_node
;
1919 if (TREE_CODE (arg0
) != SSA_NAME
)
1921 loc
= gimple_location (stmt
);
1925 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1926 /* Ensure the range is either +[-,0], +[0,0],
1927 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1928 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1929 or similar expression of unconditional true or
1930 false, it should not be negated. */
1931 && ((high
&& integer_zerop (high
))
1932 || (low
&& integer_onep (low
))))
1945 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1947 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1952 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1967 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1969 &strict_overflow_p
);
1970 if (nexp
!= NULL_TREE
)
1973 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
1986 r
->strict_overflow_p
= strict_overflow_p
;
1990 /* Comparison function for qsort. Sort entries
1991 without SSA_NAME exp first, then with SSA_NAMEs sorted
1992 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1993 by increasing ->low and if ->low is the same, by increasing
1994 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1998 range_entry_cmp (const void *a
, const void *b
)
2000 const struct range_entry
*p
= (const struct range_entry
*) a
;
2001 const struct range_entry
*q
= (const struct range_entry
*) b
;
2003 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2005 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2007 /* Group range_entries for the same SSA_NAME together. */
2008 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2010 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2012 /* If ->low is different, NULL low goes first, then by
2014 if (p
->low
!= NULL_TREE
)
2016 if (q
->low
!= NULL_TREE
)
2018 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2020 if (tem
&& integer_onep (tem
))
2022 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2024 if (tem
&& integer_onep (tem
))
2030 else if (q
->low
!= NULL_TREE
)
2032 /* If ->high is different, NULL high goes last, before that by
2034 if (p
->high
!= NULL_TREE
)
2036 if (q
->high
!= NULL_TREE
)
2038 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2040 if (tem
&& integer_onep (tem
))
2042 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2044 if (tem
&& integer_onep (tem
))
2050 else if (q
->high
!= NULL_TREE
)
2052 /* If both ranges are the same, sort below by ascending idx. */
2057 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2060 if (p
->idx
< q
->idx
)
2064 gcc_checking_assert (p
->idx
> q
->idx
);
2069 /* Helper routine of optimize_range_test.
2070 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2071 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2072 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2073 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2074 an array of COUNT pointers to other ranges. Return
2075 true if the range merge has been successful.
2076 If OPCODE is ERROR_MARK, this is called from within
2077 maybe_optimize_range_tests and is performing inter-bb range optimization.
2078 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2082 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2083 struct range_entry
**otherrangep
,
2084 unsigned int count
, enum tree_code opcode
,
2085 vec
<operand_entry_t
> *ops
, tree exp
, gimple_seq seq
,
2086 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2088 operand_entry_t oe
= (*ops
)[range
->idx
];
2090 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) :
2091 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2092 location_t loc
= gimple_location (stmt
);
2093 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2094 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2096 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2097 gimple_stmt_iterator gsi
;
2100 if (tem
== NULL_TREE
)
2103 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2104 warning_at (loc
, OPT_Wstrict_overflow
,
2105 "assuming signed overflow does not occur "
2106 "when simplifying range test");
2108 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2110 struct range_entry
*r
;
2111 fprintf (dump_file
, "Optimizing range tests ");
2112 print_generic_expr (dump_file
, range
->exp
, 0);
2113 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2114 print_generic_expr (dump_file
, range
->low
, 0);
2115 fprintf (dump_file
, ", ");
2116 print_generic_expr (dump_file
, range
->high
, 0);
2117 fprintf (dump_file
, "]");
2118 for (i
= 0; i
< count
; i
++)
2124 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2125 print_generic_expr (dump_file
, r
->low
, 0);
2126 fprintf (dump_file
, ", ");
2127 print_generic_expr (dump_file
, r
->high
, 0);
2128 fprintf (dump_file
, "]");
2130 fprintf (dump_file
, "\n into ");
2131 print_generic_expr (dump_file
, tem
, 0);
2132 fprintf (dump_file
, "\n");
2135 if (opcode
== BIT_IOR_EXPR
2136 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2137 tem
= invert_truthvalue_loc (loc
, tem
);
2139 tem
= fold_convert_loc (loc
, optype
, tem
);
2140 gsi
= gsi_for_stmt (stmt
);
2141 unsigned int uid
= gimple_uid (stmt
);
2142 /* In rare cases range->exp can be equal to lhs of stmt.
2143 In that case we have to insert after the stmt rather then before
2144 it. If stmt is a PHI, insert it at the start of the basic block. */
2145 if (op
!= range
->exp
)
2147 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2148 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2152 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2154 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2155 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2156 GSI_CONTINUE_LINKING
);
2160 gsi
= gsi_after_labels (gimple_bb (stmt
));
2161 if (!gsi_end_p (gsi
))
2162 uid
= gimple_uid (gsi_stmt (gsi
));
2165 gsi
= gsi_start_bb (gimple_bb (stmt
));
2167 while (!gsi_end_p (gsi
))
2169 uid
= gimple_uid (gsi_stmt (gsi
));
2173 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2174 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2176 if (gsi_end_p (gsi
))
2177 gsi
= gsi_last_bb (gimple_bb (stmt
));
2181 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2182 if (gimple_uid (gsi_stmt (gsi
)))
2185 gimple_set_uid (gsi_stmt (gsi
), uid
);
2192 range
->strict_overflow_p
= false;
2194 for (i
= 0; i
< count
; i
++)
2197 range
= otherrange
+ i
;
2199 range
= otherrangep
[i
];
2200 oe
= (*ops
)[range
->idx
];
2201 /* Now change all the other range test immediate uses, so that
2202 those tests will be optimized away. */
2203 if (opcode
== ERROR_MARK
)
2206 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2207 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2209 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2210 ? boolean_false_node
: boolean_true_node
);
2213 oe
->op
= error_mark_node
;
2214 range
->exp
= NULL_TREE
;
2219 /* Optimize X == CST1 || X == CST2
2220 if popcount (CST1 ^ CST2) == 1 into
2221 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2222 Similarly for ranges. E.g.
2223 X != 2 && X != 3 && X != 10 && X != 11
2224 will be transformed by the previous optimization into
2225 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2226 and this loop can transform that into
2227 !(((X & ~8) - 2U) <= 1U). */
2230 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2231 tree lowi
, tree lowj
, tree highi
, tree highj
,
2232 vec
<operand_entry_t
> *ops
,
2233 struct range_entry
*rangei
,
2234 struct range_entry
*rangej
)
2236 tree lowxor
, highxor
, tem
, exp
;
2237 /* Check lowi ^ lowj == highi ^ highj and
2238 popcount (lowi ^ lowj) == 1. */
2239 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2240 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2242 if (!integer_pow2p (lowxor
))
2244 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2245 if (!tree_int_cst_equal (lowxor
, highxor
))
2248 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2249 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2250 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2251 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2252 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2253 NULL
, rangei
->in_p
, lowj
, highj
,
2254 rangei
->strict_overflow_p
2255 || rangej
->strict_overflow_p
))
2260 /* Optimize X == CST1 || X == CST2
2261 if popcount (CST2 - CST1) == 1 into
2262 ((X - CST1) & ~(CST2 - CST1)) == 0.
2263 Similarly for ranges. E.g.
2264 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2265 || X == 75 || X == 45
2266 will be transformed by the previous optimization into
2267 (X - 43U) <= 3U || (X - 75U) <= 3U
2268 and this loop can transform that into
2269 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2271 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2272 tree lowi
, tree lowj
, tree highi
, tree highj
,
2273 vec
<operand_entry_t
> *ops
,
2274 struct range_entry
*rangei
,
2275 struct range_entry
*rangej
)
2277 tree tem1
, tem2
, mask
;
2278 /* Check highi - lowi == highj - lowj. */
2279 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2280 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2282 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2283 if (!tree_int_cst_equal (tem1
, tem2
))
2285 /* Check popcount (lowj - lowi) == 1. */
2286 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2287 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2289 if (!integer_pow2p (tem1
))
2292 type
= unsigned_type_for (type
);
2293 tem1
= fold_convert (type
, tem1
);
2294 tem2
= fold_convert (type
, tem2
);
2295 lowi
= fold_convert (type
, lowi
);
2296 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2297 tem1
= fold_binary (MINUS_EXPR
, type
,
2298 fold_convert (type
, rangei
->exp
), lowi
);
2299 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2300 lowj
= build_int_cst (type
, 0);
2301 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2302 NULL
, rangei
->in_p
, lowj
, tem2
,
2303 rangei
->strict_overflow_p
2304 || rangej
->strict_overflow_p
))
2309 /* It does some common checks for function optimize_range_tests_xor and
2310 optimize_range_tests_diff.
2311 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2312 Else it calls optimize_range_tests_diff. */
2315 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2316 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2317 struct range_entry
*ranges
)
2320 bool any_changes
= false;
2321 for (i
= first
; i
< length
; i
++)
2323 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2325 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2327 type
= TREE_TYPE (ranges
[i
].exp
);
2328 if (!INTEGRAL_TYPE_P (type
))
2330 lowi
= ranges
[i
].low
;
2331 if (lowi
== NULL_TREE
)
2332 lowi
= TYPE_MIN_VALUE (type
);
2333 highi
= ranges
[i
].high
;
2334 if (highi
== NULL_TREE
)
2336 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2339 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2341 lowj
= ranges
[j
].low
;
2342 if (lowj
== NULL_TREE
)
2344 highj
= ranges
[j
].high
;
2345 if (highj
== NULL_TREE
)
2346 highj
= TYPE_MAX_VALUE (type
);
2347 /* Check lowj > highi. */
2348 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2350 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2353 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2355 ranges
+ i
, ranges
+ j
);
2357 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2359 ranges
+ i
, ranges
+ j
);
2370 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2371 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2372 EXP on success, NULL otherwise. */
2375 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2376 wide_int
*mask
, tree
*totallowp
)
2378 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2379 if (tem
== NULL_TREE
2380 || TREE_CODE (tem
) != INTEGER_CST
2381 || TREE_OVERFLOW (tem
)
2382 || tree_int_cst_sgn (tem
) == -1
2383 || compare_tree_int (tem
, prec
) != -1)
2386 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2387 *mask
= wi::shifted_mask (0, max
, false, prec
);
2388 if (TREE_CODE (exp
) == BIT_AND_EXPR
2389 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2391 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2392 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2393 if (wi::popcount (msk
) == 1
2394 && wi::ltu_p (msk
, prec
- max
))
2396 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2397 max
+= msk
.to_uhwi ();
2398 exp
= TREE_OPERAND (exp
, 0);
2399 if (integer_zerop (low
)
2400 && TREE_CODE (exp
) == PLUS_EXPR
2401 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2403 tree ret
= TREE_OPERAND (exp
, 0);
2406 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2407 TYPE_PRECISION (TREE_TYPE (low
))));
2408 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
2414 else if (!tree_int_cst_lt (totallow
, tbias
))
2416 bias
= wi::to_widest (tbias
);
2417 bias
-= wi::to_widest (totallow
);
2418 if (wi::ges_p (bias
, 0) && wi::lts_p (bias
, prec
- max
))
2420 *mask
= wi::lshift (*mask
, bias
);
2428 if (!tree_int_cst_lt (totallow
, low
))
2430 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2431 if (tem
== NULL_TREE
2432 || TREE_CODE (tem
) != INTEGER_CST
2433 || TREE_OVERFLOW (tem
)
2434 || compare_tree_int (tem
, prec
- max
) == 1)
2437 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2441 /* Attempt to optimize small range tests using bit test.
2443 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2444 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2445 has been by earlier optimizations optimized into:
2446 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2447 As all the 43 through 82 range is less than 64 numbers,
2448 for 64-bit word targets optimize that into:
2449 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2452 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2453 vec
<operand_entry_t
> *ops
,
2454 struct range_entry
*ranges
)
2457 bool any_changes
= false;
2458 int prec
= GET_MODE_BITSIZE (word_mode
);
2459 auto_vec
<struct range_entry
*, 64> candidates
;
2461 for (i
= first
; i
< length
- 2; i
++)
2463 tree lowi
, highi
, lowj
, highj
, type
;
2465 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2467 type
= TREE_TYPE (ranges
[i
].exp
);
2468 if (!INTEGRAL_TYPE_P (type
))
2470 lowi
= ranges
[i
].low
;
2471 if (lowi
== NULL_TREE
)
2472 lowi
= TYPE_MIN_VALUE (type
);
2473 highi
= ranges
[i
].high
;
2474 if (highi
== NULL_TREE
)
2477 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2478 highi
, &mask
, &lowi
);
2479 if (exp
== NULL_TREE
)
2481 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2482 candidates
.truncate (0);
2483 int end
= MIN (i
+ 64, length
);
2484 for (j
= i
+ 1; j
< end
; j
++)
2487 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2489 if (ranges
[j
].exp
== exp
)
2491 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2493 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2496 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2498 exp2
= TREE_OPERAND (exp2
, 0);
2508 lowj
= ranges
[j
].low
;
2509 if (lowj
== NULL_TREE
)
2511 highj
= ranges
[j
].high
;
2512 if (highj
== NULL_TREE
)
2513 highj
= TYPE_MAX_VALUE (type
);
2515 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2516 highj
, &mask2
, NULL
);
2520 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2521 candidates
.safe_push (&ranges
[j
]);
2524 /* If we need otherwise 3 or more comparisons, use a bit test. */
2525 if (candidates
.length () >= 2)
2527 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2528 wi::to_widest (lowi
)
2529 + prec
- 1 - wi::clz (mask
));
2530 operand_entry_t oe
= (*ops
)[ranges
[i
].idx
];
2532 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
)
2533 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2534 location_t loc
= gimple_location (stmt
);
2535 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2537 /* See if it isn't cheaper to pretend the minimum value of the
2538 range is 0, if maximum value is small enough.
2539 We can avoid then subtraction of the minimum value, but the
2540 mask constant could be perhaps more expensive. */
2541 if (compare_tree_int (lowi
, 0) > 0
2542 && compare_tree_int (high
, prec
) < 0)
2545 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2546 rtx reg
= gen_raw_REG (word_mode
, 10000);
2547 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2548 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2549 GEN_INT (-m
)), speed_p
);
2550 rtx r
= immed_wide_int_const (mask
, word_mode
);
2551 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2553 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2554 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2558 mask
= wi::lshift (mask
, m
);
2559 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2563 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2565 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2567 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2568 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2569 fold_convert_loc (loc
, etype
, exp
),
2570 fold_convert_loc (loc
, etype
, lowi
));
2571 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2572 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2573 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2574 build_int_cst (word_type
, 1), exp
);
2575 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2576 wide_int_to_tree (word_type
, mask
));
2577 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2578 build_zero_cst (word_type
));
2579 if (is_gimple_val (exp
))
2582 /* The shift might have undefined behavior if TEM is true,
2583 but reassociate_bb isn't prepared to have basic blocks
2584 split when it is running. So, temporarily emit a code
2585 with BIT_IOR_EXPR instead of &&, and fix it up in
2588 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2589 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2590 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2592 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2593 gimple_seq_add_seq_without_update (&seq
, seq2
);
2594 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2595 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2596 gimple g
= gimple_build_assign (make_ssa_name (optype
),
2597 BIT_IOR_EXPR
, tem
, exp
);
2598 gimple_set_location (g
, loc
);
2599 gimple_seq_add_stmt_without_update (&seq
, g
);
2600 exp
= gimple_assign_lhs (g
);
2601 tree val
= build_zero_cst (optype
);
2602 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2603 candidates
.length (), opcode
, ops
, exp
,
2604 seq
, false, val
, val
, strict_overflow_p
))
2607 reassoc_branch_fixups
.safe_push (tem
);
2610 gimple_seq_discard (seq
);
2616 /* Optimize range tests, similarly how fold_range_test optimizes
2617 it on trees. The tree code for the binary
2618 operation between all the operands is OPCODE.
2619 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2620 maybe_optimize_range_tests for inter-bb range optimization.
2621 In that case if oe->op is NULL, oe->id is bb->index whose
2622 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2623 the actual opcode. */
2626 optimize_range_tests (enum tree_code opcode
,
2627 vec
<operand_entry_t
> *ops
)
2629 unsigned int length
= ops
->length (), i
, j
, first
;
2631 struct range_entry
*ranges
;
2632 bool any_changes
= false;
2637 ranges
= XNEWVEC (struct range_entry
, length
);
2638 for (i
= 0; i
< length
; i
++)
2642 init_range_entry (ranges
+ i
, oe
->op
,
2644 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2645 /* For | invert it now, we will invert it again before emitting
2646 the optimized expression. */
2647 if (opcode
== BIT_IOR_EXPR
2648 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2649 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2652 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2653 for (i
= 0; i
< length
; i
++)
2654 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2657 /* Try to merge ranges. */
2658 for (first
= i
; i
< length
; i
++)
2660 tree low
= ranges
[i
].low
;
2661 tree high
= ranges
[i
].high
;
2662 int in_p
= ranges
[i
].in_p
;
2663 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2664 int update_fail_count
= 0;
2666 for (j
= i
+ 1; j
< length
; j
++)
2668 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2670 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2671 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2673 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2679 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
2680 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
2681 low
, high
, strict_overflow_p
))
2686 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2687 while update_range_test would fail. */
2688 else if (update_fail_count
== 64)
2691 ++update_fail_count
;
2694 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2697 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2698 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2700 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
2701 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
2704 if (any_changes
&& opcode
!= ERROR_MARK
)
2707 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2709 if (oe
->op
== error_mark_node
)
2718 XDELETEVEC (ranges
);
2722 /* Return true if STMT is a cast like:
2728 # _345 = PHI <_123(N), 1(...), 1(...)>
2729 where _234 has bool type, _123 has single use and
2730 bb N has a single successor M. This is commonly used in
2731 the last block of a range test. */
2734 final_range_test_p (gimple stmt
)
2736 basic_block bb
, rhs_bb
;
2739 use_operand_p use_p
;
2742 if (!gimple_assign_cast_p (stmt
))
2744 bb
= gimple_bb (stmt
);
2745 if (!single_succ_p (bb
))
2747 e
= single_succ_edge (bb
);
2748 if (e
->flags
& EDGE_COMPLEX
)
2751 lhs
= gimple_assign_lhs (stmt
);
2752 rhs
= gimple_assign_rhs1 (stmt
);
2753 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2754 || TREE_CODE (rhs
) != SSA_NAME
2755 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2758 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2759 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2762 if (gimple_code (use_stmt
) != GIMPLE_PHI
2763 || gimple_bb (use_stmt
) != e
->dest
)
2766 /* And that the rhs is defined in the same loop. */
2767 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2769 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2775 /* Return true if BB is suitable basic block for inter-bb range test
2776 optimization. If BACKWARD is true, BB should be the only predecessor
2777 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2778 or compared with to find a common basic block to which all conditions
2779 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2780 be the only predecessor of BB. */
2783 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2786 edge_iterator ei
, ei2
;
2790 bool other_edge_seen
= false;
2795 /* Check last stmt first. */
2796 stmt
= last_stmt (bb
);
2798 || (gimple_code (stmt
) != GIMPLE_COND
2799 && (backward
|| !final_range_test_p (stmt
)))
2800 || gimple_visited_p (stmt
)
2801 || stmt_could_throw_p (stmt
)
2804 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2807 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2808 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2809 to *OTHER_BB (if not set yet, try to find it out). */
2810 if (EDGE_COUNT (bb
->succs
) != 2)
2812 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2814 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2816 if (e
->dest
== test_bb
)
2825 if (*other_bb
== NULL
)
2827 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2828 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2830 else if (e
->dest
== e2
->dest
)
2831 *other_bb
= e
->dest
;
2832 if (*other_bb
== NULL
)
2835 if (e
->dest
== *other_bb
)
2836 other_edge_seen
= true;
2840 if (*other_bb
== NULL
|| !other_edge_seen
)
2843 else if (single_succ (bb
) != *other_bb
)
2846 /* Now check all PHIs of *OTHER_BB. */
2847 e
= find_edge (bb
, *other_bb
);
2848 e2
= find_edge (test_bb
, *other_bb
);
2849 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2851 gphi
*phi
= gsi
.phi ();
2852 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2853 corresponding to BB and TEST_BB predecessor must be the same. */
2854 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2855 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2857 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2858 one of the PHIs should have the lhs of the last stmt in
2859 that block as PHI arg and that PHI should have 0 or 1
2860 corresponding to it in all other range test basic blocks
2864 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2865 == gimple_assign_lhs (stmt
)
2866 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2867 || integer_onep (gimple_phi_arg_def (phi
,
2873 gimple test_last
= last_stmt (test_bb
);
2874 if (gimple_code (test_last
) != GIMPLE_COND
2875 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2876 == gimple_assign_lhs (test_last
)
2877 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2878 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2888 /* Return true if BB doesn't have side-effects that would disallow
2889 range test optimization, all SSA_NAMEs set in the bb are consumed
2890 in the bb and there are no PHIs. */
2893 no_side_effect_bb (basic_block bb
)
2895 gimple_stmt_iterator gsi
;
2898 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2900 last
= last_stmt (bb
);
2901 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2903 gimple stmt
= gsi_stmt (gsi
);
2905 imm_use_iterator imm_iter
;
2906 use_operand_p use_p
;
2908 if (is_gimple_debug (stmt
))
2910 if (gimple_has_side_effects (stmt
))
2914 if (!is_gimple_assign (stmt
))
2916 lhs
= gimple_assign_lhs (stmt
);
2917 if (TREE_CODE (lhs
) != SSA_NAME
)
2919 if (gimple_assign_rhs_could_trap_p (stmt
))
2921 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2923 gimple use_stmt
= USE_STMT (use_p
);
2924 if (is_gimple_debug (use_stmt
))
2926 if (gimple_bb (use_stmt
) != bb
)
2933 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2934 return true and fill in *OPS recursively. */
2937 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2940 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2944 if (!is_reassociable_op (stmt
, code
, loop
))
2947 rhs
[0] = gimple_assign_rhs1 (stmt
);
2948 rhs
[1] = gimple_assign_rhs2 (stmt
);
2949 gimple_set_visited (stmt
, true);
2950 for (i
= 0; i
< 2; i
++)
2951 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2952 && !get_ops (rhs
[i
], code
, ops
, loop
)
2953 && has_single_use (rhs
[i
]))
2955 operand_entry_t oe
= operand_entry_pool
.allocate ();
2961 ops
->safe_push (oe
);
2966 /* Find the ops that were added by get_ops starting from VAR, see if
2967 they were changed during update_range_test and if yes, create new
2971 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
2972 unsigned int *pidx
, struct loop
*loop
)
2974 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2978 if (!is_reassociable_op (stmt
, code
, loop
))
2981 rhs
[0] = gimple_assign_rhs1 (stmt
);
2982 rhs
[1] = gimple_assign_rhs2 (stmt
);
2985 for (i
= 0; i
< 2; i
++)
2986 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
2988 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
2989 if (rhs
[2 + i
] == NULL_TREE
)
2991 if (has_single_use (rhs
[i
]))
2992 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
2994 rhs
[2 + i
] = rhs
[i
];
2997 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
2998 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
3000 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3001 var
= make_ssa_name (TREE_TYPE (var
));
3002 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
3004 gimple_set_uid (g
, gimple_uid (stmt
));
3005 gimple_set_visited (g
, true);
3006 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3011 /* Structure to track the initial value passed to get_ops and
3012 the range in the ops vector for each basic block. */
3014 struct inter_bb_range_test_entry
3017 unsigned int first_idx
, last_idx
;
3020 /* Inter-bb range test optimization. */
3023 maybe_optimize_range_tests (gimple stmt
)
3025 basic_block first_bb
= gimple_bb (stmt
);
3026 basic_block last_bb
= first_bb
;
3027 basic_block other_bb
= NULL
;
3031 auto_vec
<operand_entry_t
> ops
;
3032 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3033 bool any_changes
= false;
3035 /* Consider only basic blocks that end with GIMPLE_COND or
3036 a cast statement satisfying final_range_test_p. All
3037 but the last bb in the first_bb .. last_bb range
3038 should end with GIMPLE_COND. */
3039 if (gimple_code (stmt
) == GIMPLE_COND
)
3041 if (EDGE_COUNT (first_bb
->succs
) != 2)
3044 else if (final_range_test_p (stmt
))
3045 other_bb
= single_succ (first_bb
);
3049 if (stmt_could_throw_p (stmt
))
3052 /* As relative ordering of post-dominator sons isn't fixed,
3053 maybe_optimize_range_tests can be called first on any
3054 bb in the range we want to optimize. So, start searching
3055 backwards, if first_bb can be set to a predecessor. */
3056 while (single_pred_p (first_bb
))
3058 basic_block pred_bb
= single_pred (first_bb
);
3059 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3061 if (!no_side_effect_bb (first_bb
))
3065 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3066 Before starting forward search in last_bb successors, find
3067 out the other_bb. */
3068 if (first_bb
== last_bb
)
3071 /* As non-GIMPLE_COND last stmt always terminates the range,
3072 if forward search didn't discover anything, just give up. */
3073 if (gimple_code (stmt
) != GIMPLE_COND
)
3075 /* Look at both successors. Either it ends with a GIMPLE_COND
3076 and satisfies suitable_cond_bb, or ends with a cast and
3077 other_bb is that cast's successor. */
3078 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
3079 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
3080 || e
->dest
== first_bb
)
3082 else if (single_pred_p (e
->dest
))
3084 stmt
= last_stmt (e
->dest
);
3086 && gimple_code (stmt
) == GIMPLE_COND
3087 && EDGE_COUNT (e
->dest
->succs
) == 2)
3089 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
3095 && final_range_test_p (stmt
)
3096 && find_edge (first_bb
, single_succ (e
->dest
)))
3098 other_bb
= single_succ (e
->dest
);
3099 if (other_bb
== first_bb
)
3103 if (other_bb
== NULL
)
3106 /* Now do the forward search, moving last_bb to successor bbs
3107 that aren't other_bb. */
3108 while (EDGE_COUNT (last_bb
->succs
) == 2)
3110 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
3111 if (e
->dest
!= other_bb
)
3115 if (!single_pred_p (e
->dest
))
3117 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
3119 if (!no_side_effect_bb (e
->dest
))
3123 if (first_bb
== last_bb
)
3125 /* Here basic blocks first_bb through last_bb's predecessor
3126 end with GIMPLE_COND, all of them have one of the edges to
3127 other_bb and another to another block in the range,
3128 all blocks except first_bb don't have side-effects and
3129 last_bb ends with either GIMPLE_COND, or cast satisfying
3130 final_range_test_p. */
3131 for (bb
= last_bb
; ; bb
= single_pred (bb
))
3133 enum tree_code code
;
3135 inter_bb_range_test_entry bb_ent
;
3137 bb_ent
.op
= NULL_TREE
;
3138 bb_ent
.first_idx
= ops
.length ();
3139 bb_ent
.last_idx
= bb_ent
.first_idx
;
3140 e
= find_edge (bb
, other_bb
);
3141 stmt
= last_stmt (bb
);
3142 gimple_set_visited (stmt
, true);
3143 if (gimple_code (stmt
) != GIMPLE_COND
)
3145 use_operand_p use_p
;
3150 lhs
= gimple_assign_lhs (stmt
);
3151 rhs
= gimple_assign_rhs1 (stmt
);
3152 gcc_assert (bb
== last_bb
);
3159 # _345 = PHI <_123(N), 1(...), 1(...)>
3161 or 0 instead of 1. If it is 0, the _234
3162 range test is anded together with all the
3163 other range tests, if it is 1, it is ored with
3165 single_imm_use (lhs
, &use_p
, &phi
);
3166 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
3167 e2
= find_edge (first_bb
, other_bb
);
3169 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
3170 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
3171 code
= BIT_AND_EXPR
;
3174 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
3175 code
= BIT_IOR_EXPR
;
3178 /* If _234 SSA_NAME_DEF_STMT is
3180 (or &, corresponding to 1/0 in the phi arguments,
3181 push into ops the individual range test arguments
3182 of the bitwise or resp. and, recursively. */
3183 if (!get_ops (rhs
, code
, &ops
,
3184 loop_containing_stmt (stmt
))
3185 && has_single_use (rhs
))
3187 /* Otherwise, push the _234 range test itself. */
3188 operand_entry_t oe
= operand_entry_pool
.allocate ();
3198 bb_ent
.last_idx
= ops
.length ();
3200 bbinfo
.safe_push (bb_ent
);
3203 /* Otherwise stmt is GIMPLE_COND. */
3204 code
= gimple_cond_code (stmt
);
3205 lhs
= gimple_cond_lhs (stmt
);
3206 rhs
= gimple_cond_rhs (stmt
);
3207 if (TREE_CODE (lhs
) == SSA_NAME
3208 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3209 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
3210 || rhs
!= boolean_false_node
3211 /* Either push into ops the individual bitwise
3212 or resp. and operands, depending on which
3213 edge is other_bb. */
3214 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
3215 ^ (code
== EQ_EXPR
))
3216 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
3217 loop_containing_stmt (stmt
))))
3219 /* Or push the GIMPLE_COND stmt itself. */
3220 operand_entry_t oe
= operand_entry_pool
.allocate ();
3223 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
3224 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3225 /* oe->op = NULL signs that there is no SSA_NAME
3226 for the range test, and oe->id instead is the
3227 basic block number, at which's end the GIMPLE_COND
3235 else if (ops
.length () > bb_ent
.first_idx
)
3238 bb_ent
.last_idx
= ops
.length ();
3240 bbinfo
.safe_push (bb_ent
);
3244 if (ops
.length () > 1)
3245 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
3249 /* update_ops relies on has_single_use predicates returning the
3250 same values as it did during get_ops earlier. Additionally it
3251 never removes statements, only adds new ones and it should walk
3252 from the single imm use and check the predicate already before
3253 making those changes.
3254 On the other side, the handling of GIMPLE_COND directly can turn
3255 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3256 it needs to be done in a separate loop afterwards. */
3257 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3259 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3260 && bbinfo
[idx
].op
!= NULL_TREE
)
3264 stmt
= last_stmt (bb
);
3265 new_op
= update_ops (bbinfo
[idx
].op
,
3267 ops
[bbinfo
[idx
].first_idx
]->rank
,
3268 ops
, &bbinfo
[idx
].first_idx
,
3269 loop_containing_stmt (stmt
));
3270 if (new_op
== NULL_TREE
)
3272 gcc_assert (bb
== last_bb
);
3273 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
3275 if (bbinfo
[idx
].op
!= new_op
)
3277 imm_use_iterator iter
;
3278 use_operand_p use_p
;
3279 gimple use_stmt
, cast_stmt
= NULL
;
3281 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
3282 if (is_gimple_debug (use_stmt
))
3284 else if (gimple_code (use_stmt
) == GIMPLE_COND
3285 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3286 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3287 SET_USE (use_p
, new_op
);
3288 else if (gimple_assign_cast_p (use_stmt
))
3289 cast_stmt
= use_stmt
;
3294 gcc_assert (bb
== last_bb
);
3295 tree lhs
= gimple_assign_lhs (cast_stmt
);
3296 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
3297 enum tree_code rhs_code
3298 = gimple_assign_rhs_code (cast_stmt
);
3300 if (is_gimple_min_invariant (new_op
))
3302 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
3303 g
= gimple_build_assign (new_lhs
, new_op
);
3306 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
3307 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
3308 gimple_set_uid (g
, gimple_uid (cast_stmt
));
3309 gimple_set_visited (g
, true);
3310 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3311 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3312 if (is_gimple_debug (use_stmt
))
3314 else if (gimple_code (use_stmt
) == GIMPLE_COND
3315 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3316 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3317 SET_USE (use_p
, new_lhs
);
3326 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3328 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3329 && bbinfo
[idx
].op
== NULL_TREE
3330 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
3332 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
3333 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
3334 gimple_cond_make_false (cond_stmt
);
3335 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
3336 gimple_cond_make_true (cond_stmt
);
3339 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
3340 gimple_cond_set_lhs (cond_stmt
,
3341 ops
[bbinfo
[idx
].first_idx
]->op
);
3342 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
3344 update_stmt (cond_stmt
);
3352 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3353 of STMT in it's operands. This is also known as a "destructive
3354 update" operation. */
3357 is_phi_for_stmt (gimple stmt
, tree operand
)
3362 use_operand_p arg_p
;
3365 if (TREE_CODE (operand
) != SSA_NAME
)
3368 lhs
= gimple_assign_lhs (stmt
);
3370 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3371 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
3375 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
3376 if (lhs
== USE_FROM_PTR (arg_p
))
3381 /* Remove def stmt of VAR if VAR has zero uses and recurse
3382 on rhs1 operand if so. */
3385 remove_visited_stmt_chain (tree var
)
3388 gimple_stmt_iterator gsi
;
3392 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3394 stmt
= SSA_NAME_DEF_STMT (var
);
3395 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3397 var
= gimple_assign_rhs1 (stmt
);
3398 gsi
= gsi_for_stmt (stmt
);
3399 reassoc_remove_stmt (&gsi
);
3400 release_defs (stmt
);
3407 /* This function checks three consequtive operands in
3408 passed operands vector OPS starting from OPINDEX and
3409 swaps two operands if it is profitable for binary operation
3410 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3412 We pair ops with the same rank if possible.
3414 The alternative we try is to see if STMT is a destructive
3415 update style statement, which is like:
3418 In that case, we want to use the destructive update form to
3419 expose the possible vectorizer sum reduction opportunity.
3420 In that case, the third operand will be the phi node. This
3421 check is not performed if STMT is null.
3423 We could, of course, try to be better as noted above, and do a
3424 lot of work to try to find these opportunities in >3 operand
3425 cases, but it is unlikely to be worth it. */
3428 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3429 unsigned int opindex
, gimple stmt
)
3431 operand_entry_t oe1
, oe2
, oe3
;
3434 oe2
= ops
[opindex
+ 1];
3435 oe3
= ops
[opindex
+ 2];
3437 if ((oe1
->rank
== oe2
->rank
3438 && oe2
->rank
!= oe3
->rank
)
3439 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3440 && !is_phi_for_stmt (stmt
, oe1
->op
)
3441 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3443 struct operand_entry temp
= *oe3
;
3445 oe3
->rank
= oe1
->rank
;
3447 oe1
->rank
= temp
.rank
;
3449 else if ((oe1
->rank
== oe3
->rank
3450 && oe2
->rank
!= oe3
->rank
)
3451 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3452 && !is_phi_for_stmt (stmt
, oe1
->op
)
3453 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3455 struct operand_entry temp
= *oe2
;
3457 oe2
->rank
= oe1
->rank
;
3459 oe1
->rank
= temp
.rank
;
3463 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3464 two definitions, otherwise return STMT. */
3466 static inline gimple
3467 find_insert_point (gimple stmt
, tree rhs1
, tree rhs2
)
3469 if (TREE_CODE (rhs1
) == SSA_NAME
3470 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3471 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3472 if (TREE_CODE (rhs2
) == SSA_NAME
3473 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3474 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3478 /* Recursively rewrite our linearized statements so that the operators
3479 match those in OPS[OPINDEX], putting the computation in rank
3480 order. Return new lhs. */
3483 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3484 vec
<operand_entry_t
> ops
, bool changed
)
3486 tree rhs1
= gimple_assign_rhs1 (stmt
);
3487 tree rhs2
= gimple_assign_rhs2 (stmt
);
3488 tree lhs
= gimple_assign_lhs (stmt
);
3491 /* The final recursion case for this function is that you have
3492 exactly two operations left.
3493 If we had exactly one op in the entire list to start with, we
3494 would have never called this function, and the tail recursion
3495 rewrites them one at a time. */
3496 if (opindex
+ 2 == ops
.length ())
3498 operand_entry_t oe1
, oe2
;
3501 oe2
= ops
[opindex
+ 1];
3503 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3505 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3506 unsigned int uid
= gimple_uid (stmt
);
3508 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3510 fprintf (dump_file
, "Transforming ");
3511 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3514 /* Even when changed is false, reassociation could have e.g. removed
3515 some redundant operations, so unless we are just swapping the
3516 arguments or unless there is no change at all (then we just
3517 return lhs), force creation of a new SSA_NAME. */
3518 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
3520 gimple insert_point
= find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3521 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3523 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3525 gimple_set_uid (stmt
, uid
);
3526 gimple_set_visited (stmt
, true);
3527 if (insert_point
== gsi_stmt (gsi
))
3528 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3530 insert_stmt_after (stmt
, insert_point
);
3534 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3536 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3537 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3541 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3542 remove_visited_stmt_chain (rhs1
);
3544 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3546 fprintf (dump_file
, " into ");
3547 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3553 /* If we hit here, we should have 3 or more ops left. */
3554 gcc_assert (opindex
+ 2 < ops
.length ());
3556 /* Rewrite the next operator. */
3559 /* Recurse on the LHS of the binary operator, which is guaranteed to
3560 be the non-leaf side. */
3562 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3563 changed
|| oe
->op
!= rhs2
);
3565 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3567 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3569 fprintf (dump_file
, "Transforming ");
3570 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3573 /* If changed is false, this is either opindex == 0
3574 or all outer rhs2's were equal to corresponding oe->op,
3575 and powi_result is NULL.
3576 That means lhs is equivalent before and after reassociation.
3577 Otherwise ensure the old lhs SSA_NAME is not reused and
3578 create a new stmt as well, so that any debug stmts will be
3579 properly adjusted. */
3582 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3583 unsigned int uid
= gimple_uid (stmt
);
3584 gimple insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3586 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3587 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3589 gimple_set_uid (stmt
, uid
);
3590 gimple_set_visited (stmt
, true);
3591 if (insert_point
== gsi_stmt (gsi
))
3592 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3594 insert_stmt_after (stmt
, insert_point
);
3598 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3600 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3601 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3605 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3607 fprintf (dump_file
, " into ");
3608 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3614 /* Find out how many cycles we need to compute statements chain.
3615 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3616 maximum number of independent statements we may execute per cycle. */
3619 get_required_cycles (int ops_num
, int cpu_width
)
3625 /* While we have more than 2 * cpu_width operands
3626 we may reduce number of operands by cpu_width
3628 res
= ops_num
/ (2 * cpu_width
);
3630 /* Remained operands count may be reduced twice per cycle
3631 until we have only one operand. */
3632 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3633 elog
= exact_log2 (rest
);
3637 res
+= floor_log2 (rest
) + 1;
3642 /* Returns an optimal number of registers to use for computation of
3643 given statements. */
3646 get_reassociation_width (int ops_num
, enum tree_code opc
,
3649 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3654 if (param_width
> 0)
3655 width
= param_width
;
3657 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3662 /* Get the minimal time required for sequence computation. */
3663 cycles_best
= get_required_cycles (ops_num
, width
);
3665 /* Check if we may use less width and still compute sequence for
3666 the same time. It will allow us to reduce registers usage.
3667 get_required_cycles is monotonically increasing with lower width
3668 so we can perform a binary search for the minimal width that still
3669 results in the optimal cycle count. */
3671 while (width
> width_min
)
3673 int width_mid
= (width
+ width_min
) / 2;
3675 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3677 else if (width_min
< width_mid
)
3678 width_min
= width_mid
;
3686 /* Recursively rewrite our linearized statements so that the operators
3687 match those in OPS[OPINDEX], putting the computation in rank
3688 order and trying to allow operations to be executed in
3692 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
3693 vec
<operand_entry_t
> ops
)
3695 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3696 int op_num
= ops
.length ();
3697 int stmt_num
= op_num
- 1;
3698 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3699 int op_index
= op_num
- 1;
3701 int ready_stmts_end
= 0;
3703 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3705 /* We start expression rewriting from the top statements.
3706 So, in this loop we create a full list of statements
3707 we will work with. */
3708 stmts
[stmt_num
- 1] = stmt
;
3709 for (i
= stmt_num
- 2; i
>= 0; i
--)
3710 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3712 for (i
= 0; i
< stmt_num
; i
++)
3716 /* Determine whether we should use results of
3717 already handled statements or not. */
3718 if (ready_stmts_end
== 0
3719 && (i
- stmt_index
>= width
|| op_index
< 1))
3720 ready_stmts_end
= i
;
3722 /* Now we choose operands for the next statement. Non zero
3723 value in ready_stmts_end means here that we should use
3724 the result of already generated statements as new operand. */
3725 if (ready_stmts_end
> 0)
3727 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3728 if (ready_stmts_end
> stmt_index
)
3729 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3730 else if (op_index
>= 0)
3731 op2
= ops
[op_index
--]->op
;
3734 gcc_assert (stmt_index
< i
);
3735 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3738 if (stmt_index
>= ready_stmts_end
)
3739 ready_stmts_end
= 0;
3744 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3745 op2
= ops
[op_index
--]->op
;
3746 op1
= ops
[op_index
--]->op
;
3749 /* If we emit the last statement then we should put
3750 operands into the last statement. It will also
3752 if (op_index
< 0 && stmt_index
== i
)
3755 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3757 fprintf (dump_file
, "Transforming ");
3758 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3761 /* We keep original statement only for the last one. All
3762 others are recreated. */
3763 if (i
== stmt_num
- 1)
3765 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3766 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3767 update_stmt (stmts
[i
]);
3770 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3772 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3774 fprintf (dump_file
, " into ");
3775 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3779 remove_visited_stmt_chain (last_rhs1
);
3782 /* Transform STMT, which is really (A +B) + (C + D) into the left
3783 linear form, ((A+B)+C)+D.
3784 Recurse on D if necessary. */
3787 linearize_expr (gimple stmt
)
3789 gimple_stmt_iterator gsi
;
3790 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3791 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3792 gimple oldbinrhs
= binrhs
;
3793 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3794 gimple newbinrhs
= NULL
;
3795 struct loop
*loop
= loop_containing_stmt (stmt
);
3796 tree lhs
= gimple_assign_lhs (stmt
);
3798 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3799 && is_reassociable_op (binrhs
, rhscode
, loop
));
3801 gsi
= gsi_for_stmt (stmt
);
3803 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3804 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
3805 gimple_assign_rhs_code (binrhs
),
3806 gimple_assign_lhs (binlhs
),
3807 gimple_assign_rhs2 (binrhs
));
3808 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3809 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3810 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3812 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3813 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3815 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3817 fprintf (dump_file
, "Linearized: ");
3818 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3821 reassociate_stats
.linearized
++;
3824 gsi
= gsi_for_stmt (oldbinrhs
);
3825 reassoc_remove_stmt (&gsi
);
3826 release_defs (oldbinrhs
);
3828 gimple_set_visited (stmt
, true);
3829 gimple_set_visited (binlhs
, true);
3830 gimple_set_visited (binrhs
, true);
3832 /* Tail recurse on the new rhs if it still needs reassociation. */
3833 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3834 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3835 want to change the algorithm while converting to tuples. */
3836 linearize_expr (stmt
);
3839 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3840 it. Otherwise, return NULL. */
3843 get_single_immediate_use (tree lhs
)
3845 use_operand_p immuse
;
3848 if (TREE_CODE (lhs
) == SSA_NAME
3849 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3850 && is_gimple_assign (immusestmt
))
3856 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3857 representing the negated value. Insertions of any necessary
3858 instructions go before GSI.
3859 This function is recursive in that, if you hand it "a_5" as the
3860 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3861 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3864 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3866 gimple negatedefstmt
= NULL
;
3867 tree resultofnegate
;
3868 gimple_stmt_iterator gsi
;
3871 /* If we are trying to negate a name, defined by an add, negate the
3872 add operands instead. */
3873 if (TREE_CODE (tonegate
) == SSA_NAME
)
3874 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3875 if (TREE_CODE (tonegate
) == SSA_NAME
3876 && is_gimple_assign (negatedefstmt
)
3877 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3878 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3879 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3881 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3882 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3883 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3886 gsi
= gsi_for_stmt (negatedefstmt
);
3887 rhs1
= negate_value (rhs1
, &gsi
);
3889 gsi
= gsi_for_stmt (negatedefstmt
);
3890 rhs2
= negate_value (rhs2
, &gsi
);
3892 gsi
= gsi_for_stmt (negatedefstmt
);
3893 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3894 gimple_set_visited (negatedefstmt
, true);
3895 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
3896 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3897 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3901 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3902 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3903 NULL_TREE
, true, GSI_SAME_STMT
);
3905 uid
= gimple_uid (gsi_stmt (gsi
));
3906 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3908 gimple stmt
= gsi_stmt (gsi
);
3909 if (gimple_uid (stmt
) != 0)
3911 gimple_set_uid (stmt
, uid
);
3913 return resultofnegate
;
3916 /* Return true if we should break up the subtract in STMT into an add
3917 with negate. This is true when we the subtract operands are really
3918 adds, or the subtract itself is used in an add expression. In
3919 either case, breaking up the subtract into an add with negate
3920 exposes the adds to reassociation. */
3923 should_break_up_subtract (gimple stmt
)
3925 tree lhs
= gimple_assign_lhs (stmt
);
3926 tree binlhs
= gimple_assign_rhs1 (stmt
);
3927 tree binrhs
= gimple_assign_rhs2 (stmt
);
3929 struct loop
*loop
= loop_containing_stmt (stmt
);
3931 if (TREE_CODE (binlhs
) == SSA_NAME
3932 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3935 if (TREE_CODE (binrhs
) == SSA_NAME
3936 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3939 if (TREE_CODE (lhs
) == SSA_NAME
3940 && (immusestmt
= get_single_immediate_use (lhs
))
3941 && is_gimple_assign (immusestmt
)
3942 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3943 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3948 /* Transform STMT from A - B into A + -B. */
3951 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3953 tree rhs1
= gimple_assign_rhs1 (stmt
);
3954 tree rhs2
= gimple_assign_rhs2 (stmt
);
3956 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3958 fprintf (dump_file
, "Breaking up subtract ");
3959 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3962 rhs2
= negate_value (rhs2
, gsip
);
3963 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3967 /* Determine whether STMT is a builtin call that raises an SSA name
3968 to an integer power and has only one use. If so, and this is early
3969 reassociation and unsafe math optimizations are permitted, place
3970 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3971 If any of these conditions does not hold, return FALSE. */
3974 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3977 REAL_VALUE_TYPE c
, cint
;
3979 if (!first_pass_instance
3980 || !flag_unsafe_math_optimizations
3981 || !is_gimple_call (stmt
)
3982 || !has_single_use (gimple_call_lhs (stmt
)))
3985 fndecl
= gimple_call_fndecl (stmt
);
3988 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3991 switch (DECL_FUNCTION_CODE (fndecl
))
3993 CASE_FLT_FN (BUILT_IN_POW
):
3994 if (flag_errno_math
)
3997 *base
= gimple_call_arg (stmt
, 0);
3998 arg1
= gimple_call_arg (stmt
, 1);
4000 if (TREE_CODE (arg1
) != REAL_CST
)
4003 c
= TREE_REAL_CST (arg1
);
4005 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
4008 *exponent
= real_to_integer (&c
);
4009 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
4010 if (!real_identical (&c
, &cint
))
4015 CASE_FLT_FN (BUILT_IN_POWI
):
4016 *base
= gimple_call_arg (stmt
, 0);
4017 arg1
= gimple_call_arg (stmt
, 1);
4019 if (!tree_fits_shwi_p (arg1
))
4022 *exponent
= tree_to_shwi (arg1
);
4029 /* Expanding negative exponents is generally unproductive, so we don't
4030 complicate matters with those. Exponents of zero and one should
4031 have been handled by expression folding. */
4032 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
4038 /* Recursively linearize a binary expression that is the RHS of STMT.
4039 Place the operands of the expression tree in the vector named OPS. */
4042 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
4043 bool is_associative
, bool set_visited
)
4045 tree binlhs
= gimple_assign_rhs1 (stmt
);
4046 tree binrhs
= gimple_assign_rhs2 (stmt
);
4047 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
4048 bool binlhsisreassoc
= false;
4049 bool binrhsisreassoc
= false;
4050 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4051 struct loop
*loop
= loop_containing_stmt (stmt
);
4052 tree base
= NULL_TREE
;
4053 HOST_WIDE_INT exponent
= 0;
4056 gimple_set_visited (stmt
, true);
4058 if (TREE_CODE (binlhs
) == SSA_NAME
)
4060 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
4061 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
4062 && !stmt_could_throw_p (binlhsdef
));
4065 if (TREE_CODE (binrhs
) == SSA_NAME
)
4067 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
4068 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
4069 && !stmt_could_throw_p (binrhsdef
));
4072 /* If the LHS is not reassociable, but the RHS is, we need to swap
4073 them. If neither is reassociable, there is nothing we can do, so
4074 just put them in the ops vector. If the LHS is reassociable,
4075 linearize it. If both are reassociable, then linearize the RHS
4078 if (!binlhsisreassoc
)
4080 /* If this is not a associative operation like division, give up. */
4081 if (!is_associative
)
4083 add_to_ops_vec (ops
, binrhs
);
4087 if (!binrhsisreassoc
)
4089 if (rhscode
== MULT_EXPR
4090 && TREE_CODE (binrhs
) == SSA_NAME
4091 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
4093 add_repeat_to_ops_vec (ops
, base
, exponent
);
4094 gimple_set_visited (binrhsdef
, true);
4097 add_to_ops_vec (ops
, binrhs
);
4099 if (rhscode
== MULT_EXPR
4100 && TREE_CODE (binlhs
) == SSA_NAME
4101 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
4103 add_repeat_to_ops_vec (ops
, base
, exponent
);
4104 gimple_set_visited (binlhsdef
, true);
4107 add_to_ops_vec (ops
, binlhs
);
4112 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4114 fprintf (dump_file
, "swapping operands of ");
4115 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4118 swap_ssa_operands (stmt
,
4119 gimple_assign_rhs1_ptr (stmt
),
4120 gimple_assign_rhs2_ptr (stmt
));
4123 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4125 fprintf (dump_file
, " is now ");
4126 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4129 /* We want to make it so the lhs is always the reassociative op,
4131 std::swap (binlhs
, binrhs
);
4133 else if (binrhsisreassoc
)
4135 linearize_expr (stmt
);
4136 binlhs
= gimple_assign_rhs1 (stmt
);
4137 binrhs
= gimple_assign_rhs2 (stmt
);
4140 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
4141 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
4143 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
4144 is_associative
, set_visited
);
4146 if (rhscode
== MULT_EXPR
4147 && TREE_CODE (binrhs
) == SSA_NAME
4148 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
4150 add_repeat_to_ops_vec (ops
, base
, exponent
);
4151 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
4154 add_to_ops_vec (ops
, binrhs
);
4157 /* Repropagate the negates back into subtracts, since no other pass
4158 currently does it. */
4161 repropagate_negates (void)
4166 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
4168 gimple user
= get_single_immediate_use (negate
);
4170 if (!user
|| !is_gimple_assign (user
))
4173 /* The negate operand can be either operand of a PLUS_EXPR
4174 (it can be the LHS if the RHS is a constant for example).
4176 Force the negate operand to the RHS of the PLUS_EXPR, then
4177 transform the PLUS_EXPR into a MINUS_EXPR. */
4178 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
4180 /* If the negated operand appears on the LHS of the
4181 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4182 to force the negated operand to the RHS of the PLUS_EXPR. */
4183 if (gimple_assign_rhs1 (user
) == negate
)
4185 swap_ssa_operands (user
,
4186 gimple_assign_rhs1_ptr (user
),
4187 gimple_assign_rhs2_ptr (user
));
4190 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4191 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4192 if (gimple_assign_rhs2 (user
) == negate
)
4194 tree rhs1
= gimple_assign_rhs1 (user
);
4195 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
4196 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4197 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
4201 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
4203 if (gimple_assign_rhs1 (user
) == negate
)
4208 which we transform into
4211 This pushes down the negate which we possibly can merge
4212 into some other operation, hence insert it into the
4213 plus_negates vector. */
4214 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4215 tree a
= gimple_assign_rhs1 (feed
);
4216 tree b
= gimple_assign_rhs2 (user
);
4217 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
4218 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
4219 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
4220 gimple g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
4221 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
4222 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
4223 user
= gsi_stmt (gsi2
);
4225 reassoc_remove_stmt (&gsi
);
4226 release_defs (feed
);
4227 plus_negates
.safe_push (gimple_assign_lhs (user
));
4231 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4232 rid of one operation. */
4233 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4234 tree a
= gimple_assign_rhs1 (feed
);
4235 tree rhs1
= gimple_assign_rhs1 (user
);
4236 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4237 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
4238 update_stmt (gsi_stmt (gsi
));
4244 /* Returns true if OP is of a type for which we can do reassociation.
4245 That is for integral or non-saturating fixed-point types, and for
4246 floating point type when associative-math is enabled. */
4249 can_reassociate_p (tree op
)
4251 tree type
= TREE_TYPE (op
);
4252 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
4253 || NON_SAT_FIXED_POINT_TYPE_P (type
)
4254 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
4259 /* Break up subtract operations in block BB.
4261 We do this top down because we don't know whether the subtract is
4262 part of a possible chain of reassociation except at the top.
4271 we want to break up k = t - q, but we won't until we've transformed q
4272 = b - r, which won't be broken up until we transform b = c - d.
4274 En passant, clear the GIMPLE visited flag on every statement
4275 and set UIDs within each basic block. */
4278 break_up_subtract_bb (basic_block bb
)
4280 gimple_stmt_iterator gsi
;
4282 unsigned int uid
= 1;
4284 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4286 gimple stmt
= gsi_stmt (gsi
);
4287 gimple_set_visited (stmt
, false);
4288 gimple_set_uid (stmt
, uid
++);
4290 if (!is_gimple_assign (stmt
)
4291 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
4294 /* Look for simple gimple subtract operations. */
4295 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
4297 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
4298 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
4301 /* Check for a subtract used only in an addition. If this
4302 is the case, transform it into add of a negate for better
4303 reassociation. IE transform C = A-B into C = A + -B if C
4304 is only used in an addition. */
4305 if (should_break_up_subtract (stmt
))
4306 break_up_subtract (stmt
, &gsi
);
4308 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
4309 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
4310 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
4312 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
4314 son
= next_dom_son (CDI_DOMINATORS
, son
))
4315 break_up_subtract_bb (son
);
4318 /* Used for repeated factor analysis. */
4319 struct repeat_factor_d
4321 /* An SSA name that occurs in a multiply chain. */
4324 /* Cached rank of the factor. */
4327 /* Number of occurrences of the factor in the chain. */
4328 HOST_WIDE_INT count
;
4330 /* An SSA name representing the product of this factor and
4331 all factors appearing later in the repeated factor vector. */
4335 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
4336 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
4339 static vec
<repeat_factor
> repeat_factor_vec
;
4341 /* Used for sorting the repeat factor vector. Sort primarily by
4342 ascending occurrence count, secondarily by descending rank. */
4345 compare_repeat_factors (const void *x1
, const void *x2
)
4347 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
4348 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
4350 if (rf1
->count
!= rf2
->count
)
4351 return rf1
->count
- rf2
->count
;
4353 return rf2
->rank
- rf1
->rank
;
4356 /* Look for repeated operands in OPS in the multiply tree rooted at
4357 STMT. Replace them with an optimal sequence of multiplies and powi
4358 builtin calls, and remove the used operands from OPS. Return an
4359 SSA name representing the value of the replacement sequence. */
4362 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
4364 unsigned i
, j
, vec_len
;
4367 repeat_factor_t rf1
, rf2
;
4368 repeat_factor rfnew
;
4369 tree result
= NULL_TREE
;
4370 tree target_ssa
, iter_result
;
4371 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4372 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4373 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4374 gimple mul_stmt
, pow_stmt
;
4376 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4381 /* Allocate the repeated factor vector. */
4382 repeat_factor_vec
.create (10);
4384 /* Scan the OPS vector for all SSA names in the product and build
4385 up a vector of occurrence counts for each factor. */
4386 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4388 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4390 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4392 if (rf1
->factor
== oe
->op
)
4394 rf1
->count
+= oe
->count
;
4399 if (j
>= repeat_factor_vec
.length ())
4401 rfnew
.factor
= oe
->op
;
4402 rfnew
.rank
= oe
->rank
;
4403 rfnew
.count
= oe
->count
;
4404 rfnew
.repr
= NULL_TREE
;
4405 repeat_factor_vec
.safe_push (rfnew
);
4410 /* Sort the repeated factor vector by (a) increasing occurrence count,
4411 and (b) decreasing rank. */
4412 repeat_factor_vec
.qsort (compare_repeat_factors
);
4414 /* It is generally best to combine as many base factors as possible
4415 into a product before applying __builtin_powi to the result.
4416 However, the sort order chosen for the repeated factor vector
4417 allows us to cache partial results for the product of the base
4418 factors for subsequent use. When we already have a cached partial
4419 result from a previous iteration, it is best to make use of it
4420 before looking for another __builtin_pow opportunity.
4422 As an example, consider x * x * y * y * y * z * z * z * z.
4423 We want to first compose the product x * y * z, raise it to the
4424 second power, then multiply this by y * z, and finally multiply
4425 by z. This can be done in 5 multiplies provided we cache y * z
4426 for use in both expressions:
4434 If we instead ignored the cached y * z and first multiplied by
4435 the __builtin_pow opportunity z * z, we would get the inferior:
4444 vec_len
= repeat_factor_vec
.length ();
4446 /* Repeatedly look for opportunities to create a builtin_powi call. */
4449 HOST_WIDE_INT power
;
4451 /* First look for the largest cached product of factors from
4452 preceding iterations. If found, create a builtin_powi for
4453 it if the minimum occurrence count for its factors is at
4454 least 2, or just use this cached product as our next
4455 multiplicand if the minimum occurrence count is 1. */
4456 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4458 if (rf1
->repr
&& rf1
->count
> 0)
4468 iter_result
= rf1
->repr
;
4470 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4474 fputs ("Multiplying by cached product ", dump_file
);
4475 for (elt
= j
; elt
< vec_len
; elt
++)
4477 rf
= &repeat_factor_vec
[elt
];
4478 print_generic_expr (dump_file
, rf
->factor
, 0);
4479 if (elt
< vec_len
- 1)
4480 fputs (" * ", dump_file
);
4482 fputs ("\n", dump_file
);
4487 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4488 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4489 build_int_cst (integer_type_node
,
4491 gimple_call_set_lhs (pow_stmt
, iter_result
);
4492 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4493 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4495 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4499 fputs ("Building __builtin_pow call for cached product (",
4501 for (elt
= j
; elt
< vec_len
; elt
++)
4503 rf
= &repeat_factor_vec
[elt
];
4504 print_generic_expr (dump_file
, rf
->factor
, 0);
4505 if (elt
< vec_len
- 1)
4506 fputs (" * ", dump_file
);
4508 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
4515 /* Otherwise, find the first factor in the repeated factor
4516 vector whose occurrence count is at least 2. If no such
4517 factor exists, there are no builtin_powi opportunities
4519 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4521 if (rf1
->count
>= 2)
4530 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4534 fputs ("Building __builtin_pow call for (", dump_file
);
4535 for (elt
= j
; elt
< vec_len
; elt
++)
4537 rf
= &repeat_factor_vec
[elt
];
4538 print_generic_expr (dump_file
, rf
->factor
, 0);
4539 if (elt
< vec_len
- 1)
4540 fputs (" * ", dump_file
);
4542 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4545 reassociate_stats
.pows_created
++;
4547 /* Visit each element of the vector in reverse order (so that
4548 high-occurrence elements are visited first, and within the
4549 same occurrence count, lower-ranked elements are visited
4550 first). Form a linear product of all elements in this order
4551 whose occurrencce count is at least that of element J.
4552 Record the SSA name representing the product of each element
4553 with all subsequent elements in the vector. */
4554 if (j
== vec_len
- 1)
4555 rf1
->repr
= rf1
->factor
;
4558 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4562 rf1
= &repeat_factor_vec
[ii
];
4563 rf2
= &repeat_factor_vec
[ii
+ 1];
4565 /* Init the last factor's representative to be itself. */
4567 rf2
->repr
= rf2
->factor
;
4572 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4573 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
4575 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4576 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4577 rf1
->repr
= target_ssa
;
4579 /* Don't reprocess the multiply we just introduced. */
4580 gimple_set_visited (mul_stmt
, true);
4584 /* Form a call to __builtin_powi for the maximum product
4585 just formed, raised to the power obtained earlier. */
4586 rf1
= &repeat_factor_vec
[j
];
4587 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4588 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4589 build_int_cst (integer_type_node
,
4591 gimple_call_set_lhs (pow_stmt
, iter_result
);
4592 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4593 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4596 /* If we previously formed at least one other builtin_powi call,
4597 form the product of this one and those others. */
4600 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4601 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
4602 result
, iter_result
);
4603 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4604 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4605 gimple_set_visited (mul_stmt
, true);
4606 result
= new_result
;
4609 result
= iter_result
;
4611 /* Decrement the occurrence count of each element in the product
4612 by the count found above, and remove this many copies of each
4614 for (i
= j
; i
< vec_len
; i
++)
4619 rf1
= &repeat_factor_vec
[i
];
4620 rf1
->count
-= power
;
4622 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4624 if (oe
->op
== rf1
->factor
)
4628 ops
->ordered_remove (n
);
4644 /* At this point all elements in the repeated factor vector have a
4645 remaining occurrence count of 0 or 1, and those with a count of 1
4646 don't have cached representatives. Re-sort the ops vector and
4648 ops
->qsort (sort_by_operand_rank
);
4649 repeat_factor_vec
.release ();
4651 /* Return the final product computed herein. Note that there may
4652 still be some elements with single occurrence count left in OPS;
4653 those will be handled by the normal reassociation logic. */
4657 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4660 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4664 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4666 fprintf (dump_file
, "Transforming ");
4667 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4670 rhs1
= gimple_assign_rhs1 (stmt
);
4671 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4673 remove_visited_stmt_chain (rhs1
);
4675 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4677 fprintf (dump_file
, " into ");
4678 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4682 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4685 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4686 tree rhs1
, tree rhs2
)
4688 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4690 fprintf (dump_file
, "Transforming ");
4691 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4694 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4695 update_stmt (gsi_stmt (*gsi
));
4696 remove_visited_stmt_chain (rhs1
);
4698 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4700 fprintf (dump_file
, " into ");
4701 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4705 /* Reassociate expressions in basic block BB and its post-dominator as
4709 reassociate_bb (basic_block bb
)
4711 gimple_stmt_iterator gsi
;
4713 gimple stmt
= last_stmt (bb
);
4715 if (stmt
&& !gimple_visited_p (stmt
))
4716 maybe_optimize_range_tests (stmt
);
4718 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4720 stmt
= gsi_stmt (gsi
);
4722 if (is_gimple_assign (stmt
)
4723 && !stmt_could_throw_p (stmt
))
4725 tree lhs
, rhs1
, rhs2
;
4726 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4728 /* If this is not a gimple binary expression, there is
4729 nothing for us to do with it. */
4730 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4733 /* If this was part of an already processed statement,
4734 we don't need to touch it again. */
4735 if (gimple_visited_p (stmt
))
4737 /* This statement might have become dead because of previous
4739 if (has_zero_uses (gimple_get_lhs (stmt
)))
4741 reassoc_remove_stmt (&gsi
);
4742 release_defs (stmt
);
4743 /* We might end up removing the last stmt above which
4744 places the iterator to the end of the sequence.
4745 Reset it to the last stmt in this case which might
4746 be the end of the sequence as well if we removed
4747 the last statement of the sequence. In which case
4748 we need to bail out. */
4749 if (gsi_end_p (gsi
))
4751 gsi
= gsi_last_bb (bb
);
4752 if (gsi_end_p (gsi
))
4759 lhs
= gimple_assign_lhs (stmt
);
4760 rhs1
= gimple_assign_rhs1 (stmt
);
4761 rhs2
= gimple_assign_rhs2 (stmt
);
4763 /* For non-bit or min/max operations we can't associate
4764 all types. Verify that here. */
4765 if (rhs_code
!= BIT_IOR_EXPR
4766 && rhs_code
!= BIT_AND_EXPR
4767 && rhs_code
!= BIT_XOR_EXPR
4768 && rhs_code
!= MIN_EXPR
4769 && rhs_code
!= MAX_EXPR
4770 && (!can_reassociate_p (lhs
)
4771 || !can_reassociate_p (rhs1
)
4772 || !can_reassociate_p (rhs2
)))
4775 if (associative_tree_code (rhs_code
))
4777 auto_vec
<operand_entry_t
> ops
;
4778 tree powi_result
= NULL_TREE
;
4780 /* There may be no immediate uses left by the time we
4781 get here because we may have eliminated them all. */
4782 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4785 gimple_set_visited (stmt
, true);
4786 linearize_expr_tree (&ops
, stmt
, true, true);
4787 ops
.qsort (sort_by_operand_rank
);
4788 optimize_ops_list (rhs_code
, &ops
);
4789 if (undistribute_ops_list (rhs_code
, &ops
,
4790 loop_containing_stmt (stmt
)))
4792 ops
.qsort (sort_by_operand_rank
);
4793 optimize_ops_list (rhs_code
, &ops
);
4796 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4797 optimize_range_tests (rhs_code
, &ops
);
4799 if (first_pass_instance
4800 && rhs_code
== MULT_EXPR
4801 && flag_unsafe_math_optimizations
)
4802 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4804 /* If the operand vector is now empty, all operands were
4805 consumed by the __builtin_powi optimization. */
4806 if (ops
.length () == 0)
4807 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4808 else if (ops
.length () == 1)
4810 tree last_op
= ops
.last ()->op
;
4813 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4816 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4820 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4821 int ops_num
= ops
.length ();
4822 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4825 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4827 "Width = %d was chosen for reassociation\n", width
);
4830 && ops
.length () > 3)
4831 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
4835 /* When there are three operands left, we want
4836 to make sure the ones that get the double
4837 binary op are chosen wisely. */
4838 int len
= ops
.length ();
4840 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4842 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4843 powi_result
!= NULL
);
4846 /* If we combined some repeated factors into a
4847 __builtin_powi call, multiply that result by the
4848 reassociated operands. */
4851 gimple mul_stmt
, lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4852 tree type
= TREE_TYPE (lhs
);
4853 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4855 gimple_set_lhs (lhs_stmt
, target_ssa
);
4856 update_stmt (lhs_stmt
);
4858 target_ssa
= new_lhs
;
4859 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
4860 powi_result
, target_ssa
);
4861 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4862 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4868 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4870 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4871 reassociate_bb (son
);
4874 /* Add jumps around shifts for range tests turned into bit tests.
4875 For each SSA_NAME VAR we have code like:
4876 VAR = ...; // final stmt of range comparison
4877 // bit test here...;
4878 OTHERVAR = ...; // final stmt of the bit test sequence
4879 RES = VAR | OTHERVAR;
4880 Turn the above into:
4887 // bit test here...;
4890 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4898 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
4900 gimple def_stmt
= SSA_NAME_DEF_STMT (var
);
4903 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
4905 && is_gimple_assign (use_stmt
)
4906 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
4907 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
4909 basic_block cond_bb
= gimple_bb (def_stmt
);
4910 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
4911 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
4913 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
4914 gimple g
= gimple_build_cond (NE_EXPR
, var
,
4915 build_zero_cst (TREE_TYPE (var
)),
4916 NULL_TREE
, NULL_TREE
);
4917 location_t loc
= gimple_location (use_stmt
);
4918 gimple_set_location (g
, loc
);
4919 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
4921 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
4922 etrue
->probability
= REG_BR_PROB_BASE
/ 2;
4923 etrue
->count
= cond_bb
->count
/ 2;
4924 edge efalse
= find_edge (cond_bb
, then_bb
);
4925 efalse
->flags
= EDGE_FALSE_VALUE
;
4926 efalse
->probability
-= etrue
->probability
;
4927 efalse
->count
-= etrue
->count
;
4928 then_bb
->count
-= etrue
->count
;
4930 tree othervar
= NULL_TREE
;
4931 if (gimple_assign_rhs1 (use_stmt
) == var
)
4932 othervar
= gimple_assign_rhs2 (use_stmt
);
4933 else if (gimple_assign_rhs2 (use_stmt
) == var
)
4934 othervar
= gimple_assign_rhs1 (use_stmt
);
4937 tree lhs
= gimple_assign_lhs (use_stmt
);
4938 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
4939 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
4940 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
4941 gsi
= gsi_for_stmt (use_stmt
);
4942 gsi_remove (&gsi
, true);
4944 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
4945 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
4947 reassoc_branch_fixups
.release ();
4950 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4951 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4953 /* Dump the operand entry vector OPS to FILE. */
4956 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4961 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4963 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4964 print_generic_expr (file
, oe
->op
, 0);
4968 /* Dump the operand entry vector OPS to STDERR. */
4971 debug_ops_vector (vec
<operand_entry_t
> ops
)
4973 dump_ops_vector (stderr
, ops
);
4979 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
4980 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
4983 /* Initialize the reassociation pass. */
4990 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
4992 /* Find the loops, so that we can prevent moving calculations in
4994 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
4996 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
4998 next_operand_entry_id
= 0;
5000 /* Reverse RPO (Reverse Post Order) will give us something where
5001 deeper loops come later. */
5002 pre_and_rev_post_order_compute (NULL
, bbs
, false);
5003 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
5004 operand_rank
= new hash_map
<tree
, long>;
5006 /* Give each default definition a distinct rank. This includes
5007 parameters and the static chain. Walk backwards over all
5008 SSA names so that we get proper rank ordering according
5009 to tree_swap_operands_p. */
5010 for (i
= num_ssa_names
- 1; i
> 0; --i
)
5012 tree name
= ssa_name (i
);
5013 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
5014 insert_operand_rank (name
, ++rank
);
5017 /* Set up rank for each BB */
5018 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
5019 bb_rank
[bbs
[i
]] = ++rank
<< 16;
5022 calculate_dominance_info (CDI_POST_DOMINATORS
);
5023 plus_negates
= vNULL
;
5026 /* Cleanup after the reassociation pass, and print stats if
5032 statistics_counter_event (cfun
, "Linearized",
5033 reassociate_stats
.linearized
);
5034 statistics_counter_event (cfun
, "Constants eliminated",
5035 reassociate_stats
.constants_eliminated
);
5036 statistics_counter_event (cfun
, "Ops eliminated",
5037 reassociate_stats
.ops_eliminated
);
5038 statistics_counter_event (cfun
, "Statements rewritten",
5039 reassociate_stats
.rewritten
);
5040 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
5041 reassociate_stats
.pows_encountered
);
5042 statistics_counter_event (cfun
, "Built-in powi calls created",
5043 reassociate_stats
.pows_created
);
5045 delete operand_rank
;
5046 operand_entry_pool
.release ();
5048 plus_negates
.release ();
5049 free_dominance_info (CDI_POST_DOMINATORS
);
5050 loop_optimizer_finalize ();
5053 /* Gate and execute functions for Reassociation. */
5056 execute_reassoc (void)
5061 repropagate_negates ();
5070 const pass_data pass_data_reassoc
=
5072 GIMPLE_PASS
, /* type */
5073 "reassoc", /* name */
5074 OPTGROUP_NONE
, /* optinfo_flags */
5075 TV_TREE_REASSOC
, /* tv_id */
5076 ( PROP_cfg
| PROP_ssa
), /* properties_required */
5077 0, /* properties_provided */
5078 0, /* properties_destroyed */
5079 0, /* todo_flags_start */
5080 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
5083 class pass_reassoc
: public gimple_opt_pass
5086 pass_reassoc (gcc::context
*ctxt
)
5087 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
5090 /* opt_pass methods: */
5091 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
5092 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
5093 virtual unsigned int execute (function
*) { return execute_reassoc (); }
5095 }; // class pass_reassoc
5100 make_pass_reassoc (gcc::context
*ctxt
)
5102 return new pass_reassoc (ctxt
);