1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
24 #include "hash-table.h"
31 #include "double-int.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "hard-reg-set.h"
44 #include "dominance.h"
47 #include "basic-block.h"
48 #include "gimple-pretty-print.h"
49 #include "tree-inline.h"
51 #include "tree-ssa-alias.h"
52 #include "internal-fn.h"
53 #include "gimple-fold.h"
55 #include "gimple-expr.h"
58 #include "gimple-iterator.h"
59 #include "gimplify-me.h"
60 #include "gimple-ssa.h"
62 #include "tree-phinodes.h"
63 #include "ssa-iterators.h"
64 #include "stringpool.h"
65 #include "tree-ssanames.h"
66 #include "tree-ssa-loop-niter.h"
67 #include "tree-ssa-loop.h"
71 #include "tree-iterator.h"
72 #include "tree-pass.h"
73 #include "alloc-pool.h"
74 #include "langhooks.h"
79 #include "diagnostic-core.h"
82 #include "insn-codes.h"
85 /* This is a simple global reassociation pass. It is, in part, based
86 on the LLVM pass of the same name (They do some things more/less
87 than we do, in different orders, etc).
89 It consists of five steps:
91 1. Breaking up subtract operations into addition + negate, where
92 it would promote the reassociation of adds.
94 2. Left linearization of the expression trees, so that (A+B)+(C+D)
95 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
96 During linearization, we place the operands of the binary
97 expressions into a vector of operand_entry_t
99 3. Optimization of the operand lists, eliminating things like a +
102 3a. Combine repeated factors with the same occurrence counts
103 into a __builtin_powi call that will later be optimized into
104 an optimal number of multiplies.
106 4. Rewrite the expression trees we linearized and optimized so
107 they are in proper rank order.
109 5. Repropagate negates, as nothing else will clean it up ATM.
111 A bit of theory on #4, since nobody seems to write anything down
112 about why it makes sense to do it the way they do it:
114 We could do this much nicer theoretically, but don't (for reasons
115 explained after how to do it theoretically nice :P).
117 In order to promote the most redundancy elimination, you want
118 binary expressions whose operands are the same rank (or
119 preferably, the same value) exposed to the redundancy eliminator,
120 for possible elimination.
122 So the way to do this if we really cared, is to build the new op
123 tree from the leaves to the roots, merging as you go, and putting the
124 new op on the end of the worklist, until you are left with one
125 thing on the worklist.
127 IE if you have to rewrite the following set of operands (listed with
128 rank in parentheses), with opcode PLUS_EXPR:
130 a (1), b (1), c (1), d (2), e (2)
133 We start with our merge worklist empty, and the ops list with all of
136 You want to first merge all leaves of the same rank, as much as
139 So first build a binary op of
141 mergetmp = a + b, and put "mergetmp" on the merge worklist.
143 Because there is no three operand form of PLUS_EXPR, c is not going to
144 be exposed to redundancy elimination as a rank 1 operand.
146 So you might as well throw it on the merge worklist (you could also
147 consider it to now be a rank two operand, and merge it with d and e,
148 but in this case, you then have evicted e from a binary op. So at
149 least in this situation, you can't win.)
151 Then build a binary op of d + e
154 and put mergetmp2 on the merge worklist.
156 so merge worklist = {mergetmp, c, mergetmp2}
158 Continue building binary ops of these operations until you have only
159 one operation left on the worklist.
164 mergetmp3 = mergetmp + c
166 worklist = {mergetmp2, mergetmp3}
168 mergetmp4 = mergetmp2 + mergetmp3
170 worklist = {mergetmp4}
172 because we have one operation left, we can now just set the original
173 statement equal to the result of that operation.
175 This will at least expose a + b and d + e to redundancy elimination
176 as binary operations.
178 For extra points, you can reuse the old statements to build the
179 mergetmps, since you shouldn't run out.
181 So why don't we do this?
183 Because it's expensive, and rarely will help. Most trees we are
184 reassociating have 3 or less ops. If they have 2 ops, they already
185 will be written into a nice single binary op. If you have 3 ops, a
186 single simple check suffices to tell you whether the first two are of the
187 same rank. If so, you know to order it
190 newstmt = mergetmp + op3
194 newstmt = mergetmp + op1
196 If all three are of the same rank, you can't expose them all in a
197 single binary operator anyway, so the above is *still* the best you
200 Thus, this is what we do. When we have three ops left, we check to see
201 what order to put them in, and call it a day. As a nod to vector sum
202 reduction, we check if any of the ops are really a phi node that is a
203 destructive update for the associating op, and keep the destructive
204 update together for vector sum reduction recognition. */
211 int constants_eliminated
;
214 int pows_encountered
;
218 /* Operator, rank pair. */
219 typedef struct operand_entry
227 static alloc_pool operand_entry_pool
;
229 /* This is used to assign a unique ID to each struct operand_entry
230 so that qsort results are identical on different hosts. */
231 static int next_operand_entry_id
;
233 /* Starting rank number for a given basic block, so that we can rank
234 operations using unmovable instructions in that BB based on the bb
236 static long *bb_rank
;
238 /* Operand->rank hashtable. */
239 static hash_map
<tree
, long> *operand_rank
;
241 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
242 all basic blocks the CFG should be adjusted - basic blocks
243 split right after that SSA_NAME's definition statement and before
244 the only use, which must be a bit ior. */
245 static vec
<tree
> reassoc_branch_fixups
;
248 static long get_rank (tree
);
249 static bool reassoc_stmt_dominates_stmt_p (gimple
, gimple
);
251 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
252 possibly added by gsi_remove. */
255 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
257 gimple stmt
= gsi_stmt (*gsi
);
259 if (!MAY_HAVE_DEBUG_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
260 return gsi_remove (gsi
, true);
262 gimple_stmt_iterator prev
= *gsi
;
264 unsigned uid
= gimple_uid (stmt
);
265 basic_block bb
= gimple_bb (stmt
);
266 bool ret
= gsi_remove (gsi
, true);
267 if (!gsi_end_p (prev
))
270 prev
= gsi_start_bb (bb
);
271 gimple end_stmt
= gsi_stmt (*gsi
);
272 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
274 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
275 gimple_set_uid (stmt
, uid
);
281 /* Bias amount for loop-carried phis. We want this to be larger than
282 the depth of any reassociation tree we can see, but not larger than
283 the rank difference between two blocks. */
284 #define PHI_LOOP_BIAS (1 << 15)
286 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
287 an innermost loop, and the phi has only a single use which is inside
288 the loop, then the rank is the block rank of the loop latch plus an
289 extra bias for the loop-carried dependence. This causes expressions
290 calculated into an accumulator variable to be independent for each
291 iteration of the loop. If STMT is some other phi, the rank is the
292 block rank of its containing block. */
294 phi_rank (gimple stmt
)
296 basic_block bb
= gimple_bb (stmt
);
297 struct loop
*father
= bb
->loop_father
;
303 /* We only care about real loops (those with a latch). */
305 return bb_rank
[bb
->index
];
307 /* Interesting phis must be in headers of innermost loops. */
308 if (bb
!= father
->header
310 return bb_rank
[bb
->index
];
312 /* Ignore virtual SSA_NAMEs. */
313 res
= gimple_phi_result (stmt
);
314 if (virtual_operand_p (res
))
315 return bb_rank
[bb
->index
];
317 /* The phi definition must have a single use, and that use must be
318 within the loop. Otherwise this isn't an accumulator pattern. */
319 if (!single_imm_use (res
, &use
, &use_stmt
)
320 || gimple_bb (use_stmt
)->loop_father
!= father
)
321 return bb_rank
[bb
->index
];
323 /* Look for phi arguments from within the loop. If found, bias this phi. */
324 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
326 tree arg
= gimple_phi_arg_def (stmt
, i
);
327 if (TREE_CODE (arg
) == SSA_NAME
328 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
330 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
331 if (gimple_bb (def_stmt
)->loop_father
== father
)
332 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
336 /* Must be an uninteresting phi. */
337 return bb_rank
[bb
->index
];
340 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
341 loop-carried dependence of an innermost loop, return TRUE; else
344 loop_carried_phi (tree exp
)
349 if (TREE_CODE (exp
) != SSA_NAME
350 || SSA_NAME_IS_DEFAULT_DEF (exp
))
353 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
355 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
358 /* Non-loop-carried phis have block rank. Loop-carried phis have
359 an additional bias added in. If this phi doesn't have block rank,
360 it's biased and should not be propagated. */
361 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
363 if (phi_rank (phi_stmt
) != block_rank
)
369 /* Return the maximum of RANK and the rank that should be propagated
370 from expression OP. For most operands, this is just the rank of OP.
371 For loop-carried phis, the value is zero to avoid undoing the bias
372 in favor of the phi. */
374 propagate_rank (long rank
, tree op
)
378 if (loop_carried_phi (op
))
381 op_rank
= get_rank (op
);
383 return MAX (rank
, op_rank
);
386 /* Look up the operand rank structure for expression E. */
389 find_operand_rank (tree e
)
391 long *slot
= operand_rank
->get (e
);
392 return slot
? *slot
: -1;
395 /* Insert {E,RANK} into the operand rank hashtable. */
398 insert_operand_rank (tree e
, long rank
)
400 gcc_assert (rank
> 0);
401 gcc_assert (!operand_rank
->put (e
, rank
));
404 /* Given an expression E, return the rank of the expression. */
409 /* Constants have rank 0. */
410 if (is_gimple_min_invariant (e
))
413 /* SSA_NAME's have the rank of the expression they are the result
415 For globals and uninitialized values, the rank is 0.
416 For function arguments, use the pre-setup rank.
417 For PHI nodes, stores, asm statements, etc, we use the rank of
419 For simple operations, the rank is the maximum rank of any of
420 its operands, or the bb_rank, whichever is less.
421 I make no claims that this is optimal, however, it gives good
424 /* We make an exception to the normal ranking system to break
425 dependences of accumulator variables in loops. Suppose we
426 have a simple one-block loop containing:
433 As shown, each iteration of the calculation into x is fully
434 dependent upon the iteration before it. We would prefer to
435 see this in the form:
442 If the loop is unrolled, the calculations of b and c from
443 different iterations can be interleaved.
445 To obtain this result during reassociation, we bias the rank
446 of the phi definition x_1 upward, when it is recognized as an
447 accumulator pattern. The artificial rank causes it to be
448 added last, providing the desired independence. */
450 if (TREE_CODE (e
) == SSA_NAME
)
457 if (SSA_NAME_IS_DEFAULT_DEF (e
))
458 return find_operand_rank (e
);
460 stmt
= SSA_NAME_DEF_STMT (e
);
461 if (gimple_code (stmt
) == GIMPLE_PHI
)
462 return phi_rank (stmt
);
464 if (!is_gimple_assign (stmt
)
465 || gimple_vdef (stmt
))
466 return bb_rank
[gimple_bb (stmt
)->index
];
468 /* If we already have a rank for this expression, use that. */
469 rank
= find_operand_rank (e
);
473 /* Otherwise, find the maximum rank for the operands. As an
474 exception, remove the bias from loop-carried phis when propagating
475 the rank so that dependent operations are not also biased. */
477 if (gimple_assign_single_p (stmt
))
479 tree rhs
= gimple_assign_rhs1 (stmt
);
480 n
= TREE_OPERAND_LENGTH (rhs
);
482 rank
= propagate_rank (rank
, rhs
);
485 for (i
= 0; i
< n
; i
++)
487 op
= TREE_OPERAND (rhs
, i
);
490 rank
= propagate_rank (rank
, op
);
496 n
= gimple_num_ops (stmt
);
497 for (i
= 1; i
< n
; i
++)
499 op
= gimple_op (stmt
, i
);
501 rank
= propagate_rank (rank
, op
);
505 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
507 fprintf (dump_file
, "Rank for ");
508 print_generic_expr (dump_file
, e
, 0);
509 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
512 /* Note the rank in the hashtable so we don't recompute it. */
513 insert_operand_rank (e
, (rank
+ 1));
517 /* Globals, etc, are rank 0 */
522 /* We want integer ones to end up last no matter what, since they are
523 the ones we can do the most with. */
524 #define INTEGER_CONST_TYPE 1 << 3
525 #define FLOAT_CONST_TYPE 1 << 2
526 #define OTHER_CONST_TYPE 1 << 1
528 /* Classify an invariant tree into integer, float, or other, so that
529 we can sort them to be near other constants of the same type. */
531 constant_type (tree t
)
533 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
534 return INTEGER_CONST_TYPE
;
535 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
536 return FLOAT_CONST_TYPE
;
538 return OTHER_CONST_TYPE
;
541 /* qsort comparison function to sort operand entries PA and PB by rank
542 so that the sorted array is ordered by rank in decreasing order. */
544 sort_by_operand_rank (const void *pa
, const void *pb
)
546 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
547 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
549 /* It's nicer for optimize_expression if constants that are likely
550 to fold when added/multiplied//whatever are put next to each
551 other. Since all constants have rank 0, order them by type. */
552 if (oeb
->rank
== 0 && oea
->rank
== 0)
554 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
555 return constant_type (oeb
->op
) - constant_type (oea
->op
);
557 /* To make sorting result stable, we use unique IDs to determine
559 return oeb
->id
- oea
->id
;
562 /* Lastly, make sure the versions that are the same go next to each
564 if ((oeb
->rank
- oea
->rank
== 0)
565 && TREE_CODE (oea
->op
) == SSA_NAME
566 && TREE_CODE (oeb
->op
) == SSA_NAME
)
568 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
569 versions of removed SSA_NAMEs, so if possible, prefer to sort
570 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
572 if (!SSA_NAME_IS_DEFAULT_DEF (oea
->op
)
573 && !SSA_NAME_IS_DEFAULT_DEF (oeb
->op
)
574 && SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
576 gimple stmta
= SSA_NAME_DEF_STMT (oea
->op
);
577 gimple stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
578 basic_block bba
= gimple_bb (stmta
);
579 basic_block bbb
= gimple_bb (stmtb
);
582 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
583 return bb_rank
[bbb
->index
] - bb_rank
[bba
->index
];
587 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
588 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
594 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
595 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
597 return oeb
->id
- oea
->id
;
600 if (oeb
->rank
!= oea
->rank
)
601 return oeb
->rank
- oea
->rank
;
603 return oeb
->id
- oea
->id
;
606 /* Add an operand entry to *OPS for the tree operand OP. */
609 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
611 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
614 oe
->rank
= get_rank (op
);
615 oe
->id
= next_operand_entry_id
++;
620 /* Add an operand entry to *OPS for the tree operand OP with repeat
624 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
625 HOST_WIDE_INT repeat
)
627 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
630 oe
->rank
= get_rank (op
);
631 oe
->id
= next_operand_entry_id
++;
635 reassociate_stats
.pows_encountered
++;
638 /* Return true if STMT is reassociable operation containing a binary
639 operation with tree code CODE, and is inside LOOP. */
642 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
644 basic_block bb
= gimple_bb (stmt
);
646 if (gimple_bb (stmt
) == NULL
)
649 if (!flow_bb_inside_loop_p (loop
, bb
))
652 if (is_gimple_assign (stmt
)
653 && gimple_assign_rhs_code (stmt
) == code
654 && has_single_use (gimple_assign_lhs (stmt
)))
661 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
662 operand of the negate operation. Otherwise, return NULL. */
665 get_unary_op (tree name
, enum tree_code opcode
)
667 gimple stmt
= SSA_NAME_DEF_STMT (name
);
669 if (!is_gimple_assign (stmt
))
672 if (gimple_assign_rhs_code (stmt
) == opcode
)
673 return gimple_assign_rhs1 (stmt
);
677 /* If CURR and LAST are a pair of ops that OPCODE allows us to
678 eliminate through equivalences, do so, remove them from OPS, and
679 return true. Otherwise, return false. */
682 eliminate_duplicate_pair (enum tree_code opcode
,
683 vec
<operand_entry_t
> *ops
,
686 operand_entry_t curr
,
687 operand_entry_t last
)
690 /* If we have two of the same op, and the opcode is & |, min, or max,
691 we can eliminate one of them.
692 If we have two of the same op, and the opcode is ^, we can
693 eliminate both of them. */
695 if (last
&& last
->op
== curr
->op
)
703 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
705 fprintf (dump_file
, "Equivalence: ");
706 print_generic_expr (dump_file
, curr
->op
, 0);
707 fprintf (dump_file
, " [&|minmax] ");
708 print_generic_expr (dump_file
, last
->op
, 0);
709 fprintf (dump_file
, " -> ");
710 print_generic_stmt (dump_file
, last
->op
, 0);
713 ops
->ordered_remove (i
);
714 reassociate_stats
.ops_eliminated
++;
719 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
721 fprintf (dump_file
, "Equivalence: ");
722 print_generic_expr (dump_file
, curr
->op
, 0);
723 fprintf (dump_file
, " ^ ");
724 print_generic_expr (dump_file
, last
->op
, 0);
725 fprintf (dump_file
, " -> nothing\n");
728 reassociate_stats
.ops_eliminated
+= 2;
730 if (ops
->length () == 2)
733 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
738 ops
->ordered_remove (i
-1);
739 ops
->ordered_remove (i
-1);
751 static vec
<tree
> plus_negates
;
753 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
754 expression, look in OPS for a corresponding positive operation to cancel
755 it out. If we find one, remove the other from OPS, replace
756 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
760 eliminate_plus_minus_pair (enum tree_code opcode
,
761 vec
<operand_entry_t
> *ops
,
762 unsigned int currindex
,
763 operand_entry_t curr
)
770 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
773 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
774 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
775 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
778 /* Any non-negated version will have a rank that is one less than
779 the current rank. So once we hit those ranks, if we don't find
782 for (i
= currindex
+ 1;
783 ops
->iterate (i
, &oe
)
784 && oe
->rank
>= curr
->rank
- 1 ;
787 if (oe
->op
== negateop
)
790 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
792 fprintf (dump_file
, "Equivalence: ");
793 print_generic_expr (dump_file
, negateop
, 0);
794 fprintf (dump_file
, " + -");
795 print_generic_expr (dump_file
, oe
->op
, 0);
796 fprintf (dump_file
, " -> 0\n");
799 ops
->ordered_remove (i
);
800 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
801 ops
->ordered_remove (currindex
);
802 reassociate_stats
.ops_eliminated
++;
806 else if (oe
->op
== notop
)
808 tree op_type
= TREE_TYPE (oe
->op
);
810 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
812 fprintf (dump_file
, "Equivalence: ");
813 print_generic_expr (dump_file
, notop
, 0);
814 fprintf (dump_file
, " + ~");
815 print_generic_expr (dump_file
, oe
->op
, 0);
816 fprintf (dump_file
, " -> -1\n");
819 ops
->ordered_remove (i
);
820 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
821 ops
->ordered_remove (currindex
);
822 reassociate_stats
.ops_eliminated
++;
828 /* CURR->OP is a negate expr in a plus expr: save it for later
829 inspection in repropagate_negates(). */
830 if (negateop
!= NULL_TREE
)
831 plus_negates
.safe_push (curr
->op
);
836 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
837 bitwise not expression, look in OPS for a corresponding operand to
838 cancel it out. If we find one, remove the other from OPS, replace
839 OPS[CURRINDEX] with 0, and return true. Otherwise, return
843 eliminate_not_pairs (enum tree_code opcode
,
844 vec
<operand_entry_t
> *ops
,
845 unsigned int currindex
,
846 operand_entry_t curr
)
852 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
853 || TREE_CODE (curr
->op
) != SSA_NAME
)
856 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
857 if (notop
== NULL_TREE
)
860 /* Any non-not version will have a rank that is one less than
861 the current rank. So once we hit those ranks, if we don't find
864 for (i
= currindex
+ 1;
865 ops
->iterate (i
, &oe
)
866 && oe
->rank
>= curr
->rank
- 1;
871 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
873 fprintf (dump_file
, "Equivalence: ");
874 print_generic_expr (dump_file
, notop
, 0);
875 if (opcode
== BIT_AND_EXPR
)
876 fprintf (dump_file
, " & ~");
877 else if (opcode
== BIT_IOR_EXPR
)
878 fprintf (dump_file
, " | ~");
879 print_generic_expr (dump_file
, oe
->op
, 0);
880 if (opcode
== BIT_AND_EXPR
)
881 fprintf (dump_file
, " -> 0\n");
882 else if (opcode
== BIT_IOR_EXPR
)
883 fprintf (dump_file
, " -> -1\n");
886 if (opcode
== BIT_AND_EXPR
)
887 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
888 else if (opcode
== BIT_IOR_EXPR
)
889 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
891 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
893 ops
->quick_push (oe
);
901 /* Use constant value that may be present in OPS to try to eliminate
902 operands. Note that this function is only really used when we've
903 eliminated ops for other reasons, or merged constants. Across
904 single statements, fold already does all of this, plus more. There
905 is little point in duplicating logic, so I've only included the
906 identities that I could ever construct testcases to trigger. */
909 eliminate_using_constants (enum tree_code opcode
,
910 vec
<operand_entry_t
> *ops
)
912 operand_entry_t oelast
= ops
->last ();
913 tree type
= TREE_TYPE (oelast
->op
);
915 if (oelast
->rank
== 0
916 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
921 if (integer_zerop (oelast
->op
))
923 if (ops
->length () != 1)
925 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
926 fprintf (dump_file
, "Found & 0, removing all other ops\n");
928 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
931 ops
->quick_push (oelast
);
935 else if (integer_all_onesp (oelast
->op
))
937 if (ops
->length () != 1)
939 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
940 fprintf (dump_file
, "Found & -1, removing\n");
942 reassociate_stats
.ops_eliminated
++;
947 if (integer_all_onesp (oelast
->op
))
949 if (ops
->length () != 1)
951 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
952 fprintf (dump_file
, "Found | -1, removing all other ops\n");
954 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
957 ops
->quick_push (oelast
);
961 else if (integer_zerop (oelast
->op
))
963 if (ops
->length () != 1)
965 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
966 fprintf (dump_file
, "Found | 0, removing\n");
968 reassociate_stats
.ops_eliminated
++;
973 if (integer_zerop (oelast
->op
)
974 || (FLOAT_TYPE_P (type
)
975 && !HONOR_NANS (type
)
976 && !HONOR_SIGNED_ZEROS (type
)
977 && real_zerop (oelast
->op
)))
979 if (ops
->length () != 1)
981 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
982 fprintf (dump_file
, "Found * 0, removing all other ops\n");
984 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
986 ops
->quick_push (oelast
);
990 else if (integer_onep (oelast
->op
)
991 || (FLOAT_TYPE_P (type
)
992 && !HONOR_SNANS (type
)
993 && real_onep (oelast
->op
)))
995 if (ops
->length () != 1)
997 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
998 fprintf (dump_file
, "Found * 1, removing\n");
1000 reassociate_stats
.ops_eliminated
++;
1008 if (integer_zerop (oelast
->op
)
1009 || (FLOAT_TYPE_P (type
)
1010 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1011 && fold_real_zero_addition_p (type
, oelast
->op
,
1012 opcode
== MINUS_EXPR
)))
1014 if (ops
->length () != 1)
1016 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1017 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1019 reassociate_stats
.ops_eliminated
++;
1031 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
1034 /* Structure for tracking and counting operands. */
1035 typedef struct oecount_s
{
1038 enum tree_code oecode
;
1043 /* The heap for the oecount hashtable and the sorted list of operands. */
1044 static vec
<oecount
> cvec
;
1047 /* Oecount hashtable helpers. */
1049 struct oecount_hasher
1051 typedef int value_type
;
1052 typedef int compare_type
;
1053 typedef int store_values_directly
;
1054 static inline hashval_t
hash (const value_type
&);
1055 static inline bool equal (const value_type
&, const compare_type
&);
1056 static bool is_deleted (int &v
) { return v
== 1; }
1057 static void mark_deleted (int &e
) { e
= 1; }
1058 static bool is_empty (int &v
) { return v
== 0; }
1059 static void mark_empty (int &e
) { e
= 0; }
1060 static void remove (int &) {}
1063 /* Hash function for oecount. */
1066 oecount_hasher::hash (const value_type
&p
)
1068 const oecount
*c
= &cvec
[p
- 42];
1069 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1072 /* Comparison function for oecount. */
1075 oecount_hasher::equal (const value_type
&p1
, const compare_type
&p2
)
1077 const oecount
*c1
= &cvec
[p1
- 42];
1078 const oecount
*c2
= &cvec
[p2
- 42];
1079 return (c1
->oecode
== c2
->oecode
1080 && c1
->op
== c2
->op
);
1083 /* Comparison function for qsort sorting oecount elements by count. */
1086 oecount_cmp (const void *p1
, const void *p2
)
1088 const oecount
*c1
= (const oecount
*)p1
;
1089 const oecount
*c2
= (const oecount
*)p2
;
1090 if (c1
->cnt
!= c2
->cnt
)
1091 return c1
->cnt
- c2
->cnt
;
1093 /* If counts are identical, use unique IDs to stabilize qsort. */
1094 return c1
->id
- c2
->id
;
1097 /* Return TRUE iff STMT represents a builtin call that raises OP
1098 to some exponent. */
1101 stmt_is_power_of_op (gimple stmt
, tree op
)
1105 if (!is_gimple_call (stmt
))
1108 fndecl
= gimple_call_fndecl (stmt
);
1111 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1114 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1116 CASE_FLT_FN (BUILT_IN_POW
):
1117 CASE_FLT_FN (BUILT_IN_POWI
):
1118 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1125 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1126 in place and return the result. Assumes that stmt_is_power_of_op
1127 was previously called for STMT and returned TRUE. */
1129 static HOST_WIDE_INT
1130 decrement_power (gimple stmt
)
1132 REAL_VALUE_TYPE c
, cint
;
1133 HOST_WIDE_INT power
;
1136 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1138 CASE_FLT_FN (BUILT_IN_POW
):
1139 arg1
= gimple_call_arg (stmt
, 1);
1140 c
= TREE_REAL_CST (arg1
);
1141 power
= real_to_integer (&c
) - 1;
1142 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1143 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1146 CASE_FLT_FN (BUILT_IN_POWI
):
1147 arg1
= gimple_call_arg (stmt
, 1);
1148 power
= TREE_INT_CST_LOW (arg1
) - 1;
1149 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1157 /* Find the single immediate use of STMT's LHS, and replace it
1158 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1159 replace *DEF with OP as well. */
1162 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1167 gimple_stmt_iterator gsi
;
1169 if (is_gimple_call (stmt
))
1170 lhs
= gimple_call_lhs (stmt
);
1172 lhs
= gimple_assign_lhs (stmt
);
1174 gcc_assert (has_single_use (lhs
));
1175 single_imm_use (lhs
, &use
, &use_stmt
);
1179 if (TREE_CODE (op
) != SSA_NAME
)
1180 update_stmt (use_stmt
);
1181 gsi
= gsi_for_stmt (stmt
);
1182 unlink_stmt_vdef (stmt
);
1183 reassoc_remove_stmt (&gsi
);
1184 release_defs (stmt
);
1187 /* Walks the linear chain with result *DEF searching for an operation
1188 with operand OP and code OPCODE removing that from the chain. *DEF
1189 is updated if there is only one operand but no operation left. */
1192 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1194 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1200 if (opcode
== MULT_EXPR
1201 && stmt_is_power_of_op (stmt
, op
))
1203 if (decrement_power (stmt
) == 1)
1204 propagate_op_to_single_use (op
, stmt
, def
);
1208 name
= gimple_assign_rhs1 (stmt
);
1210 /* If this is the operation we look for and one of the operands
1211 is ours simply propagate the other operand into the stmts
1213 if (gimple_assign_rhs_code (stmt
) == opcode
1215 || gimple_assign_rhs2 (stmt
) == op
))
1218 name
= gimple_assign_rhs2 (stmt
);
1219 propagate_op_to_single_use (name
, stmt
, def
);
1223 /* We might have a multiply of two __builtin_pow* calls, and
1224 the operand might be hiding in the rightmost one. */
1225 if (opcode
== MULT_EXPR
1226 && gimple_assign_rhs_code (stmt
) == opcode
1227 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1228 && has_single_use (gimple_assign_rhs2 (stmt
)))
1230 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1231 if (stmt_is_power_of_op (stmt2
, op
))
1233 if (decrement_power (stmt2
) == 1)
1234 propagate_op_to_single_use (op
, stmt2
, def
);
1239 /* Continue walking the chain. */
1240 gcc_assert (name
!= op
1241 && TREE_CODE (name
) == SSA_NAME
);
1242 stmt
= SSA_NAME_DEF_STMT (name
);
1247 /* Returns true if statement S1 dominates statement S2. Like
1248 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1251 reassoc_stmt_dominates_stmt_p (gimple s1
, gimple s2
)
1253 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1255 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1256 SSA_NAME. Assume it lives at the beginning of function and
1257 thus dominates everything. */
1258 if (!bb1
|| s1
== s2
)
1261 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1267 /* PHIs in the same basic block are assumed to be
1268 executed all in parallel, if only one stmt is a PHI,
1269 it dominates the other stmt in the same basic block. */
1270 if (gimple_code (s1
) == GIMPLE_PHI
)
1273 if (gimple_code (s2
) == GIMPLE_PHI
)
1276 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1278 if (gimple_uid (s1
) < gimple_uid (s2
))
1281 if (gimple_uid (s1
) > gimple_uid (s2
))
1284 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1285 unsigned int uid
= gimple_uid (s1
);
1286 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1288 gimple s
= gsi_stmt (gsi
);
1289 if (gimple_uid (s
) != uid
)
1298 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1301 /* Insert STMT after INSERT_POINT. */
1304 insert_stmt_after (gimple stmt
, gimple insert_point
)
1306 gimple_stmt_iterator gsi
;
1309 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1310 bb
= gimple_bb (insert_point
);
1311 else if (!stmt_ends_bb_p (insert_point
))
1313 gsi
= gsi_for_stmt (insert_point
);
1314 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1315 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1319 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1320 thus if it must end a basic block, it should be a call that can
1321 throw, or some assignment that can throw. If it throws, the LHS
1322 of it will not be initialized though, so only valid places using
1323 the SSA_NAME should be dominated by the fallthru edge. */
1324 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1325 gsi
= gsi_after_labels (bb
);
1326 if (gsi_end_p (gsi
))
1328 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1329 gimple_set_uid (stmt
,
1330 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1333 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1334 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1337 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1338 the result. Places the statement after the definition of either
1339 OP1 or OP2. Returns the new statement. */
1342 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1344 gimple op1def
= NULL
, op2def
= NULL
;
1345 gimple_stmt_iterator gsi
;
1349 /* Create the addition statement. */
1350 op
= make_ssa_name (type
);
1351 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1353 /* Find an insertion place and insert. */
1354 if (TREE_CODE (op1
) == SSA_NAME
)
1355 op1def
= SSA_NAME_DEF_STMT (op1
);
1356 if (TREE_CODE (op2
) == SSA_NAME
)
1357 op2def
= SSA_NAME_DEF_STMT (op2
);
1358 if ((!op1def
|| gimple_nop_p (op1def
))
1359 && (!op2def
|| gimple_nop_p (op2def
)))
1361 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1362 if (gsi_end_p (gsi
))
1364 gimple_stmt_iterator gsi2
1365 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1366 gimple_set_uid (sum
,
1367 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1370 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1371 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1375 gimple insert_point
;
1376 if ((!op1def
|| gimple_nop_p (op1def
))
1377 || (op2def
&& !gimple_nop_p (op2def
)
1378 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1379 insert_point
= op2def
;
1381 insert_point
= op1def
;
1382 insert_stmt_after (sum
, insert_point
);
1389 /* Perform un-distribution of divisions and multiplications.
1390 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1391 to (A + B) / X for real X.
1393 The algorithm is organized as follows.
1395 - First we walk the addition chain *OPS looking for summands that
1396 are defined by a multiplication or a real division. This results
1397 in the candidates bitmap with relevant indices into *OPS.
1399 - Second we build the chains of multiplications or divisions for
1400 these candidates, counting the number of occurrences of (operand, code)
1401 pairs in all of the candidates chains.
1403 - Third we sort the (operand, code) pairs by number of occurrence and
1404 process them starting with the pair with the most uses.
1406 * For each such pair we walk the candidates again to build a
1407 second candidate bitmap noting all multiplication/division chains
1408 that have at least one occurrence of (operand, code).
1410 * We build an alternate addition chain only covering these
1411 candidates with one (operand, code) operation removed from their
1412 multiplication/division chain.
1414 * The first candidate gets replaced by the alternate addition chain
1415 multiplied/divided by the operand.
1417 * All candidate chains get disabled for further processing and
1418 processing of (operand, code) pairs continues.
1420 The alternate addition chains built are re-processed by the main
1421 reassociation algorithm which allows optimizing a * x * y + b * y * x
1422 to (a + b ) * x * y in one invocation of the reassociation pass. */
1425 undistribute_ops_list (enum tree_code opcode
,
1426 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1428 unsigned int length
= ops
->length ();
1429 operand_entry_t oe1
;
1431 sbitmap candidates
, candidates2
;
1432 unsigned nr_candidates
, nr_candidates2
;
1433 sbitmap_iterator sbi0
;
1434 vec
<operand_entry_t
> *subops
;
1435 bool changed
= false;
1436 int next_oecount_id
= 0;
1439 || opcode
!= PLUS_EXPR
)
1442 /* Build a list of candidates to process. */
1443 candidates
= sbitmap_alloc (length
);
1444 bitmap_clear (candidates
);
1446 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1448 enum tree_code dcode
;
1451 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1453 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1454 if (!is_gimple_assign (oe1def
))
1456 dcode
= gimple_assign_rhs_code (oe1def
);
1457 if ((dcode
!= MULT_EXPR
1458 && dcode
!= RDIV_EXPR
)
1459 || !is_reassociable_op (oe1def
, dcode
, loop
))
1462 bitmap_set_bit (candidates
, i
);
1466 if (nr_candidates
< 2)
1468 sbitmap_free (candidates
);
1472 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1474 fprintf (dump_file
, "searching for un-distribute opportunities ");
1475 print_generic_expr (dump_file
,
1476 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1477 fprintf (dump_file
, " %d\n", nr_candidates
);
1480 /* Build linearized sub-operand lists and the counting table. */
1483 hash_table
<oecount_hasher
> ctable (15);
1485 /* ??? Macro arguments cannot have multi-argument template types in
1486 them. This typedef is needed to workaround that limitation. */
1487 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1488 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1489 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1492 enum tree_code oecode
;
1495 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1496 oecode
= gimple_assign_rhs_code (oedef
);
1497 linearize_expr_tree (&subops
[i
], oedef
,
1498 associative_tree_code (oecode
), false);
1500 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1507 c
.id
= next_oecount_id
++;
1510 idx
= cvec
.length () + 41;
1511 slot
= ctable
.find_slot (idx
, INSERT
);
1519 cvec
[*slot
- 42].cnt
++;
1524 /* Sort the counting table. */
1525 cvec
.qsort (oecount_cmp
);
1527 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1530 fprintf (dump_file
, "Candidates:\n");
1531 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1533 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1534 c
->oecode
== MULT_EXPR
1535 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1536 print_generic_expr (dump_file
, c
->op
, 0);
1537 fprintf (dump_file
, "\n");
1541 /* Process the (operand, code) pairs in order of most occurrence. */
1542 candidates2
= sbitmap_alloc (length
);
1543 while (!cvec
.is_empty ())
1545 oecount
*c
= &cvec
.last ();
1549 /* Now collect the operands in the outer chain that contain
1550 the common operand in their inner chain. */
1551 bitmap_clear (candidates2
);
1553 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1556 enum tree_code oecode
;
1558 tree op
= (*ops
)[i
]->op
;
1560 /* If we undistributed in this chain already this may be
1562 if (TREE_CODE (op
) != SSA_NAME
)
1565 oedef
= SSA_NAME_DEF_STMT (op
);
1566 oecode
= gimple_assign_rhs_code (oedef
);
1567 if (oecode
!= c
->oecode
)
1570 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1572 if (oe1
->op
== c
->op
)
1574 bitmap_set_bit (candidates2
, i
);
1581 if (nr_candidates2
>= 2)
1583 operand_entry_t oe1
, oe2
;
1585 int first
= bitmap_first_set_bit (candidates2
);
1587 /* Build the new addition chain. */
1588 oe1
= (*ops
)[first
];
1589 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1591 fprintf (dump_file
, "Building (");
1592 print_generic_expr (dump_file
, oe1
->op
, 0);
1594 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1595 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1599 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1601 fprintf (dump_file
, " + ");
1602 print_generic_expr (dump_file
, oe2
->op
, 0);
1604 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1605 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1606 oe1
->op
, oe2
->op
, opcode
);
1607 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1609 oe1
->op
= gimple_get_lhs (sum
);
1612 /* Apply the multiplication/division. */
1613 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1614 oe1
->op
, c
->op
, c
->oecode
);
1615 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1617 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1618 print_generic_expr (dump_file
, c
->op
, 0);
1619 fprintf (dump_file
, "\n");
1622 /* Record it in the addition chain and disable further
1623 undistribution with this op. */
1624 oe1
->op
= gimple_assign_lhs (prod
);
1625 oe1
->rank
= get_rank (oe1
->op
);
1626 subops
[first
].release ();
1634 for (i
= 0; i
< ops
->length (); ++i
)
1635 subops
[i
].release ();
1638 sbitmap_free (candidates
);
1639 sbitmap_free (candidates2
);
1644 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1645 expression, examine the other OPS to see if any of them are comparisons
1646 of the same values, which we may be able to combine or eliminate.
1647 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1650 eliminate_redundant_comparison (enum tree_code opcode
,
1651 vec
<operand_entry_t
> *ops
,
1652 unsigned int currindex
,
1653 operand_entry_t curr
)
1656 enum tree_code lcode
, rcode
;
1661 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1664 /* Check that CURR is a comparison. */
1665 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1667 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1668 if (!is_gimple_assign (def1
))
1670 lcode
= gimple_assign_rhs_code (def1
);
1671 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1673 op1
= gimple_assign_rhs1 (def1
);
1674 op2
= gimple_assign_rhs2 (def1
);
1676 /* Now look for a similar comparison in the remaining OPS. */
1677 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1681 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1683 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1684 if (!is_gimple_assign (def2
))
1686 rcode
= gimple_assign_rhs_code (def2
);
1687 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1690 /* If we got here, we have a match. See if we can combine the
1692 if (opcode
== BIT_IOR_EXPR
)
1693 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1694 rcode
, gimple_assign_rhs1 (def2
),
1695 gimple_assign_rhs2 (def2
));
1697 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1698 rcode
, gimple_assign_rhs1 (def2
),
1699 gimple_assign_rhs2 (def2
));
1703 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1704 always give us a boolean_type_node value back. If the original
1705 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1706 we need to convert. */
1707 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1708 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1710 if (TREE_CODE (t
) != INTEGER_CST
1711 && !operand_equal_p (t
, curr
->op
, 0))
1713 enum tree_code subcode
;
1714 tree newop1
, newop2
;
1715 if (!COMPARISON_CLASS_P (t
))
1717 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1718 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1719 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1720 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1724 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1726 fprintf (dump_file
, "Equivalence: ");
1727 print_generic_expr (dump_file
, curr
->op
, 0);
1728 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1729 print_generic_expr (dump_file
, oe
->op
, 0);
1730 fprintf (dump_file
, " -> ");
1731 print_generic_expr (dump_file
, t
, 0);
1732 fprintf (dump_file
, "\n");
1735 /* Now we can delete oe, as it has been subsumed by the new combined
1737 ops
->ordered_remove (i
);
1738 reassociate_stats
.ops_eliminated
++;
1740 /* If t is the same as curr->op, we're done. Otherwise we must
1741 replace curr->op with t. Special case is if we got a constant
1742 back, in which case we add it to the end instead of in place of
1743 the current entry. */
1744 if (TREE_CODE (t
) == INTEGER_CST
)
1746 ops
->ordered_remove (currindex
);
1747 add_to_ops_vec (ops
, t
);
1749 else if (!operand_equal_p (t
, curr
->op
, 0))
1752 enum tree_code subcode
;
1755 gcc_assert (COMPARISON_CLASS_P (t
));
1756 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1757 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1758 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1759 gcc_checking_assert (is_gimple_val (newop1
)
1760 && is_gimple_val (newop2
));
1761 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1762 curr
->op
= gimple_get_lhs (sum
);
1770 /* Perform various identities and other optimizations on the list of
1771 operand entries, stored in OPS. The tree code for the binary
1772 operation between all the operands is OPCODE. */
1775 optimize_ops_list (enum tree_code opcode
,
1776 vec
<operand_entry_t
> *ops
)
1778 unsigned int length
= ops
->length ();
1781 operand_entry_t oelast
= NULL
;
1782 bool iterate
= false;
1787 oelast
= ops
->last ();
1789 /* If the last two are constants, pop the constants off, merge them
1790 and try the next two. */
1791 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1793 operand_entry_t oelm1
= (*ops
)[length
- 2];
1795 if (oelm1
->rank
== 0
1796 && is_gimple_min_invariant (oelm1
->op
)
1797 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1798 TREE_TYPE (oelast
->op
)))
1800 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1801 oelm1
->op
, oelast
->op
);
1803 if (folded
&& is_gimple_min_invariant (folded
))
1805 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1806 fprintf (dump_file
, "Merging constants\n");
1811 add_to_ops_vec (ops
, folded
);
1812 reassociate_stats
.constants_eliminated
++;
1814 optimize_ops_list (opcode
, ops
);
1820 eliminate_using_constants (opcode
, ops
);
1823 for (i
= 0; ops
->iterate (i
, &oe
);)
1827 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1829 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1830 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1831 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1843 length
= ops
->length ();
1844 oelast
= ops
->last ();
1847 optimize_ops_list (opcode
, ops
);
1850 /* The following functions are subroutines to optimize_range_tests and allow
1851 it to try to change a logical combination of comparisons into a range
1855 X == 2 || X == 5 || X == 3 || X == 4
1859 (unsigned) (X - 2) <= 3
1861 For more information see comments above fold_test_range in fold-const.c,
1862 this implementation is for GIMPLE. */
1870 bool strict_overflow_p
;
1871 unsigned int idx
, next
;
1874 /* This is similar to make_range in fold-const.c, but on top of
1875 GIMPLE instead of trees. If EXP is non-NULL, it should be
1876 an SSA_NAME and STMT argument is ignored, otherwise STMT
1877 argument should be a GIMPLE_COND. */
1880 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1884 bool is_bool
, strict_overflow_p
;
1888 r
->strict_overflow_p
= false;
1890 r
->high
= NULL_TREE
;
1891 if (exp
!= NULL_TREE
1892 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1895 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1896 and see if we can refine the range. Some of the cases below may not
1897 happen, but it doesn't seem worth worrying about this. We "continue"
1898 the outer loop when we've changed something; otherwise we "break"
1899 the switch, which will "break" the while. */
1900 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1903 strict_overflow_p
= false;
1905 if (exp
== NULL_TREE
)
1907 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1909 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1914 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1919 enum tree_code code
;
1920 tree arg0
, arg1
, exp_type
;
1924 if (exp
!= NULL_TREE
)
1926 if (TREE_CODE (exp
) != SSA_NAME
1927 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
1930 stmt
= SSA_NAME_DEF_STMT (exp
);
1931 if (!is_gimple_assign (stmt
))
1934 code
= gimple_assign_rhs_code (stmt
);
1935 arg0
= gimple_assign_rhs1 (stmt
);
1936 arg1
= gimple_assign_rhs2 (stmt
);
1937 exp_type
= TREE_TYPE (exp
);
1941 code
= gimple_cond_code (stmt
);
1942 arg0
= gimple_cond_lhs (stmt
);
1943 arg1
= gimple_cond_rhs (stmt
);
1944 exp_type
= boolean_type_node
;
1947 if (TREE_CODE (arg0
) != SSA_NAME
)
1949 loc
= gimple_location (stmt
);
1953 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1954 /* Ensure the range is either +[-,0], +[0,0],
1955 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1956 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1957 or similar expression of unconditional true or
1958 false, it should not be negated. */
1959 && ((high
&& integer_zerop (high
))
1960 || (low
&& integer_onep (low
))))
1973 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1975 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1980 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1995 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1997 &strict_overflow_p
);
1998 if (nexp
!= NULL_TREE
)
2001 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2014 r
->strict_overflow_p
= strict_overflow_p
;
2018 /* Comparison function for qsort. Sort entries
2019 without SSA_NAME exp first, then with SSA_NAMEs sorted
2020 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2021 by increasing ->low and if ->low is the same, by increasing
2022 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2026 range_entry_cmp (const void *a
, const void *b
)
2028 const struct range_entry
*p
= (const struct range_entry
*) a
;
2029 const struct range_entry
*q
= (const struct range_entry
*) b
;
2031 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2033 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2035 /* Group range_entries for the same SSA_NAME together. */
2036 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2038 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2040 /* If ->low is different, NULL low goes first, then by
2042 if (p
->low
!= NULL_TREE
)
2044 if (q
->low
!= NULL_TREE
)
2046 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2048 if (tem
&& integer_onep (tem
))
2050 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2052 if (tem
&& integer_onep (tem
))
2058 else if (q
->low
!= NULL_TREE
)
2060 /* If ->high is different, NULL high goes last, before that by
2062 if (p
->high
!= NULL_TREE
)
2064 if (q
->high
!= NULL_TREE
)
2066 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2068 if (tem
&& integer_onep (tem
))
2070 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2072 if (tem
&& integer_onep (tem
))
2078 else if (q
->high
!= NULL_TREE
)
2080 /* If both ranges are the same, sort below by ascending idx. */
2085 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2088 if (p
->idx
< q
->idx
)
2092 gcc_checking_assert (p
->idx
> q
->idx
);
2097 /* Helper routine of optimize_range_test.
2098 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2099 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2100 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2101 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2102 an array of COUNT pointers to other ranges. Return
2103 true if the range merge has been successful.
2104 If OPCODE is ERROR_MARK, this is called from within
2105 maybe_optimize_range_tests and is performing inter-bb range optimization.
2106 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2110 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2111 struct range_entry
**otherrangep
,
2112 unsigned int count
, enum tree_code opcode
,
2113 vec
<operand_entry_t
> *ops
, tree exp
, gimple_seq seq
,
2114 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2116 operand_entry_t oe
= (*ops
)[range
->idx
];
2118 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) :
2119 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2120 location_t loc
= gimple_location (stmt
);
2121 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2122 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2124 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2125 gimple_stmt_iterator gsi
;
2128 if (tem
== NULL_TREE
)
2131 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2132 warning_at (loc
, OPT_Wstrict_overflow
,
2133 "assuming signed overflow does not occur "
2134 "when simplifying range test");
2136 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2138 struct range_entry
*r
;
2139 fprintf (dump_file
, "Optimizing range tests ");
2140 print_generic_expr (dump_file
, range
->exp
, 0);
2141 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2142 print_generic_expr (dump_file
, range
->low
, 0);
2143 fprintf (dump_file
, ", ");
2144 print_generic_expr (dump_file
, range
->high
, 0);
2145 fprintf (dump_file
, "]");
2146 for (i
= 0; i
< count
; i
++)
2152 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2153 print_generic_expr (dump_file
, r
->low
, 0);
2154 fprintf (dump_file
, ", ");
2155 print_generic_expr (dump_file
, r
->high
, 0);
2156 fprintf (dump_file
, "]");
2158 fprintf (dump_file
, "\n into ");
2159 print_generic_expr (dump_file
, tem
, 0);
2160 fprintf (dump_file
, "\n");
2163 if (opcode
== BIT_IOR_EXPR
2164 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2165 tem
= invert_truthvalue_loc (loc
, tem
);
2167 tem
= fold_convert_loc (loc
, optype
, tem
);
2168 gsi
= gsi_for_stmt (stmt
);
2169 /* In rare cases range->exp can be equal to lhs of stmt.
2170 In that case we have to insert after the stmt rather then before
2172 if (op
== range
->exp
)
2174 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2175 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2176 GSI_CONTINUE_LINKING
);
2180 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2181 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2185 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2186 if (gimple_uid (gsi_stmt (gsi
)))
2189 gimple_set_uid (gsi_stmt (gsi
), gimple_uid (stmt
));
2196 range
->strict_overflow_p
= false;
2198 for (i
= 0; i
< count
; i
++)
2201 range
= otherrange
+ i
;
2203 range
= otherrangep
[i
];
2204 oe
= (*ops
)[range
->idx
];
2205 /* Now change all the other range test immediate uses, so that
2206 those tests will be optimized away. */
2207 if (opcode
== ERROR_MARK
)
2210 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2211 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2213 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2214 ? boolean_false_node
: boolean_true_node
);
2217 oe
->op
= error_mark_node
;
2218 range
->exp
= NULL_TREE
;
2223 /* Optimize X == CST1 || X == CST2
2224 if popcount (CST1 ^ CST2) == 1 into
2225 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2226 Similarly for ranges. E.g.
2227 X != 2 && X != 3 && X != 10 && X != 11
2228 will be transformed by the previous optimization into
2229 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2230 and this loop can transform that into
2231 !(((X & ~8) - 2U) <= 1U). */
2234 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2235 tree lowi
, tree lowj
, tree highi
, tree highj
,
2236 vec
<operand_entry_t
> *ops
,
2237 struct range_entry
*rangei
,
2238 struct range_entry
*rangej
)
2240 tree lowxor
, highxor
, tem
, exp
;
2241 /* Check lowi ^ lowj == highi ^ highj and
2242 popcount (lowi ^ lowj) == 1. */
2243 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2244 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2246 if (!integer_pow2p (lowxor
))
2248 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2249 if (!tree_int_cst_equal (lowxor
, highxor
))
2252 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2253 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2254 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2255 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2256 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2257 NULL
, rangei
->in_p
, lowj
, highj
,
2258 rangei
->strict_overflow_p
2259 || rangej
->strict_overflow_p
))
2264 /* Optimize X == CST1 || X == CST2
2265 if popcount (CST2 - CST1) == 1 into
2266 ((X - CST1) & ~(CST2 - CST1)) == 0.
2267 Similarly for ranges. E.g.
2268 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2269 || X == 75 || X == 45
2270 will be transformed by the previous optimization into
2271 (X - 43U) <= 3U || (X - 75U) <= 3U
2272 and this loop can transform that into
2273 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2275 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2276 tree lowi
, tree lowj
, tree highi
, tree highj
,
2277 vec
<operand_entry_t
> *ops
,
2278 struct range_entry
*rangei
,
2279 struct range_entry
*rangej
)
2281 tree tem1
, tem2
, mask
;
2282 /* Check highi - lowi == highj - lowj. */
2283 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2284 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2286 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2287 if (!tree_int_cst_equal (tem1
, tem2
))
2289 /* Check popcount (lowj - lowi) == 1. */
2290 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2291 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2293 if (!integer_pow2p (tem1
))
2296 type
= unsigned_type_for (type
);
2297 tem1
= fold_convert (type
, tem1
);
2298 tem2
= fold_convert (type
, tem2
);
2299 lowi
= fold_convert (type
, lowi
);
2300 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2301 tem1
= fold_binary (MINUS_EXPR
, type
,
2302 fold_convert (type
, rangei
->exp
), lowi
);
2303 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2304 lowj
= build_int_cst (type
, 0);
2305 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2306 NULL
, rangei
->in_p
, lowj
, tem2
,
2307 rangei
->strict_overflow_p
2308 || rangej
->strict_overflow_p
))
2313 /* It does some common checks for function optimize_range_tests_xor and
2314 optimize_range_tests_diff.
2315 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2316 Else it calls optimize_range_tests_diff. */
2319 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2320 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2321 struct range_entry
*ranges
)
2324 bool any_changes
= false;
2325 for (i
= first
; i
< length
; i
++)
2327 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2329 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2331 type
= TREE_TYPE (ranges
[i
].exp
);
2332 if (!INTEGRAL_TYPE_P (type
))
2334 lowi
= ranges
[i
].low
;
2335 if (lowi
== NULL_TREE
)
2336 lowi
= TYPE_MIN_VALUE (type
);
2337 highi
= ranges
[i
].high
;
2338 if (highi
== NULL_TREE
)
2340 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2343 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2345 lowj
= ranges
[j
].low
;
2346 if (lowj
== NULL_TREE
)
2348 highj
= ranges
[j
].high
;
2349 if (highj
== NULL_TREE
)
2350 highj
= TYPE_MAX_VALUE (type
);
2351 /* Check lowj > highi. */
2352 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2354 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2357 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2359 ranges
+ i
, ranges
+ j
);
2361 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2363 ranges
+ i
, ranges
+ j
);
2374 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2375 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2376 EXP on success, NULL otherwise. */
2379 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2380 wide_int
*mask
, tree
*totallowp
)
2382 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2383 if (tem
== NULL_TREE
2384 || TREE_CODE (tem
) != INTEGER_CST
2385 || TREE_OVERFLOW (tem
)
2386 || tree_int_cst_sgn (tem
) == -1
2387 || compare_tree_int (tem
, prec
) != -1)
2390 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2391 *mask
= wi::shifted_mask (0, max
, false, prec
);
2392 if (TREE_CODE (exp
) == BIT_AND_EXPR
2393 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2395 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2396 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2397 if (wi::popcount (msk
) == 1
2398 && wi::ltu_p (msk
, prec
- max
))
2400 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2401 max
+= msk
.to_uhwi ();
2402 exp
= TREE_OPERAND (exp
, 0);
2403 if (integer_zerop (low
)
2404 && TREE_CODE (exp
) == PLUS_EXPR
2405 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2408 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2409 TYPE_PRECISION (TREE_TYPE (low
))));
2410 tree tbias
= wide_int_to_tree (TREE_TYPE (low
), bias
);
2414 exp
= TREE_OPERAND (exp
, 0);
2418 else if (!tree_int_cst_lt (totallow
, tbias
))
2420 bias
-= wi::to_widest (totallow
);
2421 if (wi::ges_p (bias
, 0) && wi::lts_p (bias
, prec
- max
))
2423 *mask
= wi::lshift (*mask
, bias
);
2424 exp
= TREE_OPERAND (exp
, 0);
2433 if (!tree_int_cst_lt (totallow
, low
))
2435 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2436 if (tem
== NULL_TREE
2437 || TREE_CODE (tem
) != INTEGER_CST
2438 || TREE_OVERFLOW (tem
)
2439 || compare_tree_int (tem
, prec
- max
) == 1)
2442 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2446 /* Attempt to optimize small range tests using bit test.
2448 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2449 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2450 has been by earlier optimizations optimized into:
2451 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2452 As all the 43 through 82 range is less than 64 numbers,
2453 for 64-bit word targets optimize that into:
2454 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2457 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2458 vec
<operand_entry_t
> *ops
,
2459 struct range_entry
*ranges
)
2462 bool any_changes
= false;
2463 int prec
= GET_MODE_BITSIZE (word_mode
);
2464 auto_vec
<struct range_entry
*, 64> candidates
;
2466 for (i
= first
; i
< length
- 2; i
++)
2468 tree lowi
, highi
, lowj
, highj
, type
;
2470 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2472 type
= TREE_TYPE (ranges
[i
].exp
);
2473 if (!INTEGRAL_TYPE_P (type
))
2475 lowi
= ranges
[i
].low
;
2476 if (lowi
== NULL_TREE
)
2477 lowi
= TYPE_MIN_VALUE (type
);
2478 highi
= ranges
[i
].high
;
2479 if (highi
== NULL_TREE
)
2482 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2483 highi
, &mask
, &lowi
);
2484 if (exp
== NULL_TREE
)
2486 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2487 candidates
.truncate (0);
2488 int end
= MIN (i
+ 64, length
);
2489 for (j
= i
+ 1; j
< end
; j
++)
2492 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2494 if (ranges
[j
].exp
== exp
)
2496 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2498 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2501 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2503 exp2
= TREE_OPERAND (exp2
, 0);
2513 lowj
= ranges
[j
].low
;
2514 if (lowj
== NULL_TREE
)
2516 highj
= ranges
[j
].high
;
2517 if (highj
== NULL_TREE
)
2518 highj
= TYPE_MAX_VALUE (type
);
2520 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2521 highj
, &mask2
, NULL
);
2525 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2526 candidates
.safe_push (&ranges
[j
]);
2529 /* If we need otherwise 3 or more comparisons, use a bit test. */
2530 if (candidates
.length () >= 2)
2532 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2533 wi::to_widest (lowi
)
2534 + prec
- 1 - wi::clz (mask
));
2535 operand_entry_t oe
= (*ops
)[ranges
[i
].idx
];
2537 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
)
2538 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2539 location_t loc
= gimple_location (stmt
);
2540 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2542 /* See if it isn't cheaper to pretend the minimum value of the
2543 range is 0, if maximum value is small enough.
2544 We can avoid then subtraction of the minimum value, but the
2545 mask constant could be perhaps more expensive. */
2546 if (compare_tree_int (lowi
, 0) > 0
2547 && compare_tree_int (high
, prec
) < 0)
2550 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2551 rtx reg
= gen_raw_REG (word_mode
, 10000);
2552 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2553 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2554 GEN_INT (-m
)), speed_p
);
2555 rtx r
= immed_wide_int_const (mask
, word_mode
);
2556 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2558 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2559 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2563 mask
= wi::lshift (mask
, m
);
2564 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2568 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2570 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2572 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2573 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2574 fold_convert_loc (loc
, etype
, exp
),
2575 fold_convert_loc (loc
, etype
, lowi
));
2576 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2577 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2578 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2579 build_int_cst (word_type
, 1), exp
);
2580 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2581 wide_int_to_tree (word_type
, mask
));
2582 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2583 build_zero_cst (word_type
));
2584 if (is_gimple_val (exp
))
2587 /* The shift might have undefined behavior if TEM is true,
2588 but reassociate_bb isn't prepared to have basic blocks
2589 split when it is running. So, temporarily emit a code
2590 with BIT_IOR_EXPR instead of &&, and fix it up in
2593 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2594 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2595 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2597 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2598 gimple_seq_add_seq_without_update (&seq
, seq2
);
2599 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2600 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2601 gimple g
= gimple_build_assign (make_ssa_name (optype
),
2602 BIT_IOR_EXPR
, tem
, exp
);
2603 gimple_set_location (g
, loc
);
2604 gimple_seq_add_stmt_without_update (&seq
, g
);
2605 exp
= gimple_assign_lhs (g
);
2606 tree val
= build_zero_cst (optype
);
2607 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2608 candidates
.length (), opcode
, ops
, exp
,
2609 seq
, false, val
, val
, strict_overflow_p
))
2612 reassoc_branch_fixups
.safe_push (tem
);
2615 gimple_seq_discard (seq
);
2621 /* Optimize range tests, similarly how fold_range_test optimizes
2622 it on trees. The tree code for the binary
2623 operation between all the operands is OPCODE.
2624 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2625 maybe_optimize_range_tests for inter-bb range optimization.
2626 In that case if oe->op is NULL, oe->id is bb->index whose
2627 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2628 the actual opcode. */
2631 optimize_range_tests (enum tree_code opcode
,
2632 vec
<operand_entry_t
> *ops
)
2634 unsigned int length
= ops
->length (), i
, j
, first
;
2636 struct range_entry
*ranges
;
2637 bool any_changes
= false;
2642 ranges
= XNEWVEC (struct range_entry
, length
);
2643 for (i
= 0; i
< length
; i
++)
2647 init_range_entry (ranges
+ i
, oe
->op
,
2649 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2650 /* For | invert it now, we will invert it again before emitting
2651 the optimized expression. */
2652 if (opcode
== BIT_IOR_EXPR
2653 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2654 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2657 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2658 for (i
= 0; i
< length
; i
++)
2659 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2662 /* Try to merge ranges. */
2663 for (first
= i
; i
< length
; i
++)
2665 tree low
= ranges
[i
].low
;
2666 tree high
= ranges
[i
].high
;
2667 int in_p
= ranges
[i
].in_p
;
2668 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2669 int update_fail_count
= 0;
2671 for (j
= i
+ 1; j
< length
; j
++)
2673 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2675 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2676 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2678 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2684 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
2685 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
2686 low
, high
, strict_overflow_p
))
2691 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2692 while update_range_test would fail. */
2693 else if (update_fail_count
== 64)
2696 ++update_fail_count
;
2699 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2702 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2703 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2705 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
2706 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
2709 if (any_changes
&& opcode
!= ERROR_MARK
)
2712 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2714 if (oe
->op
== error_mark_node
)
2723 XDELETEVEC (ranges
);
2727 /* Return true if STMT is a cast like:
2733 # _345 = PHI <_123(N), 1(...), 1(...)>
2734 where _234 has bool type, _123 has single use and
2735 bb N has a single successor M. This is commonly used in
2736 the last block of a range test. */
2739 final_range_test_p (gimple stmt
)
2741 basic_block bb
, rhs_bb
;
2744 use_operand_p use_p
;
2747 if (!gimple_assign_cast_p (stmt
))
2749 bb
= gimple_bb (stmt
);
2750 if (!single_succ_p (bb
))
2752 e
= single_succ_edge (bb
);
2753 if (e
->flags
& EDGE_COMPLEX
)
2756 lhs
= gimple_assign_lhs (stmt
);
2757 rhs
= gimple_assign_rhs1 (stmt
);
2758 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2759 || TREE_CODE (rhs
) != SSA_NAME
2760 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2763 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2764 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2767 if (gimple_code (use_stmt
) != GIMPLE_PHI
2768 || gimple_bb (use_stmt
) != e
->dest
)
2771 /* And that the rhs is defined in the same loop. */
2772 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2774 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2780 /* Return true if BB is suitable basic block for inter-bb range test
2781 optimization. If BACKWARD is true, BB should be the only predecessor
2782 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2783 or compared with to find a common basic block to which all conditions
2784 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2785 be the only predecessor of BB. */
2788 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2791 edge_iterator ei
, ei2
;
2795 bool other_edge_seen
= false;
2800 /* Check last stmt first. */
2801 stmt
= last_stmt (bb
);
2803 || (gimple_code (stmt
) != GIMPLE_COND
2804 && (backward
|| !final_range_test_p (stmt
)))
2805 || gimple_visited_p (stmt
)
2806 || stmt_could_throw_p (stmt
)
2809 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2812 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2813 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2814 to *OTHER_BB (if not set yet, try to find it out). */
2815 if (EDGE_COUNT (bb
->succs
) != 2)
2817 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2819 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2821 if (e
->dest
== test_bb
)
2830 if (*other_bb
== NULL
)
2832 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2833 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2835 else if (e
->dest
== e2
->dest
)
2836 *other_bb
= e
->dest
;
2837 if (*other_bb
== NULL
)
2840 if (e
->dest
== *other_bb
)
2841 other_edge_seen
= true;
2845 if (*other_bb
== NULL
|| !other_edge_seen
)
2848 else if (single_succ (bb
) != *other_bb
)
2851 /* Now check all PHIs of *OTHER_BB. */
2852 e
= find_edge (bb
, *other_bb
);
2853 e2
= find_edge (test_bb
, *other_bb
);
2854 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2856 gphi
*phi
= gsi
.phi ();
2857 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2858 corresponding to BB and TEST_BB predecessor must be the same. */
2859 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2860 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2862 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2863 one of the PHIs should have the lhs of the last stmt in
2864 that block as PHI arg and that PHI should have 0 or 1
2865 corresponding to it in all other range test basic blocks
2869 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2870 == gimple_assign_lhs (stmt
)
2871 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2872 || integer_onep (gimple_phi_arg_def (phi
,
2878 gimple test_last
= last_stmt (test_bb
);
2879 if (gimple_code (test_last
) != GIMPLE_COND
2880 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2881 == gimple_assign_lhs (test_last
)
2882 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2883 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2893 /* Return true if BB doesn't have side-effects that would disallow
2894 range test optimization, all SSA_NAMEs set in the bb are consumed
2895 in the bb and there are no PHIs. */
2898 no_side_effect_bb (basic_block bb
)
2900 gimple_stmt_iterator gsi
;
2903 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2905 last
= last_stmt (bb
);
2906 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2908 gimple stmt
= gsi_stmt (gsi
);
2910 imm_use_iterator imm_iter
;
2911 use_operand_p use_p
;
2913 if (is_gimple_debug (stmt
))
2915 if (gimple_has_side_effects (stmt
))
2919 if (!is_gimple_assign (stmt
))
2921 lhs
= gimple_assign_lhs (stmt
);
2922 if (TREE_CODE (lhs
) != SSA_NAME
)
2924 if (gimple_assign_rhs_could_trap_p (stmt
))
2926 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2928 gimple use_stmt
= USE_STMT (use_p
);
2929 if (is_gimple_debug (use_stmt
))
2931 if (gimple_bb (use_stmt
) != bb
)
2938 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2939 return true and fill in *OPS recursively. */
2942 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2945 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2949 if (!is_reassociable_op (stmt
, code
, loop
))
2952 rhs
[0] = gimple_assign_rhs1 (stmt
);
2953 rhs
[1] = gimple_assign_rhs2 (stmt
);
2954 gimple_set_visited (stmt
, true);
2955 for (i
= 0; i
< 2; i
++)
2956 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2957 && !get_ops (rhs
[i
], code
, ops
, loop
)
2958 && has_single_use (rhs
[i
]))
2960 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
2966 ops
->safe_push (oe
);
2971 /* Find the ops that were added by get_ops starting from VAR, see if
2972 they were changed during update_range_test and if yes, create new
2976 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
2977 unsigned int *pidx
, struct loop
*loop
)
2979 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2983 if (!is_reassociable_op (stmt
, code
, loop
))
2986 rhs
[0] = gimple_assign_rhs1 (stmt
);
2987 rhs
[1] = gimple_assign_rhs2 (stmt
);
2990 for (i
= 0; i
< 2; i
++)
2991 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
2993 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
2994 if (rhs
[2 + i
] == NULL_TREE
)
2996 if (has_single_use (rhs
[i
]))
2997 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
2999 rhs
[2 + i
] = rhs
[i
];
3002 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
3003 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
3005 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3006 var
= make_ssa_name (TREE_TYPE (var
));
3007 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
3009 gimple_set_uid (g
, gimple_uid (stmt
));
3010 gimple_set_visited (g
, true);
3011 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3016 /* Structure to track the initial value passed to get_ops and
3017 the range in the ops vector for each basic block. */
3019 struct inter_bb_range_test_entry
3022 unsigned int first_idx
, last_idx
;
3025 /* Inter-bb range test optimization. */
3028 maybe_optimize_range_tests (gimple stmt
)
3030 basic_block first_bb
= gimple_bb (stmt
);
3031 basic_block last_bb
= first_bb
;
3032 basic_block other_bb
= NULL
;
3036 auto_vec
<operand_entry_t
> ops
;
3037 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3038 bool any_changes
= false;
3040 /* Consider only basic blocks that end with GIMPLE_COND or
3041 a cast statement satisfying final_range_test_p. All
3042 but the last bb in the first_bb .. last_bb range
3043 should end with GIMPLE_COND. */
3044 if (gimple_code (stmt
) == GIMPLE_COND
)
3046 if (EDGE_COUNT (first_bb
->succs
) != 2)
3049 else if (final_range_test_p (stmt
))
3050 other_bb
= single_succ (first_bb
);
3054 if (stmt_could_throw_p (stmt
))
3057 /* As relative ordering of post-dominator sons isn't fixed,
3058 maybe_optimize_range_tests can be called first on any
3059 bb in the range we want to optimize. So, start searching
3060 backwards, if first_bb can be set to a predecessor. */
3061 while (single_pred_p (first_bb
))
3063 basic_block pred_bb
= single_pred (first_bb
);
3064 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3066 if (!no_side_effect_bb (first_bb
))
3070 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3071 Before starting forward search in last_bb successors, find
3072 out the other_bb. */
3073 if (first_bb
== last_bb
)
3076 /* As non-GIMPLE_COND last stmt always terminates the range,
3077 if forward search didn't discover anything, just give up. */
3078 if (gimple_code (stmt
) != GIMPLE_COND
)
3080 /* Look at both successors. Either it ends with a GIMPLE_COND
3081 and satisfies suitable_cond_bb, or ends with a cast and
3082 other_bb is that cast's successor. */
3083 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
3084 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
3085 || e
->dest
== first_bb
)
3087 else if (single_pred_p (e
->dest
))
3089 stmt
= last_stmt (e
->dest
);
3091 && gimple_code (stmt
) == GIMPLE_COND
3092 && EDGE_COUNT (e
->dest
->succs
) == 2)
3094 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
3100 && final_range_test_p (stmt
)
3101 && find_edge (first_bb
, single_succ (e
->dest
)))
3103 other_bb
= single_succ (e
->dest
);
3104 if (other_bb
== first_bb
)
3108 if (other_bb
== NULL
)
3111 /* Now do the forward search, moving last_bb to successor bbs
3112 that aren't other_bb. */
3113 while (EDGE_COUNT (last_bb
->succs
) == 2)
3115 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
3116 if (e
->dest
!= other_bb
)
3120 if (!single_pred_p (e
->dest
))
3122 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
3124 if (!no_side_effect_bb (e
->dest
))
3128 if (first_bb
== last_bb
)
3130 /* Here basic blocks first_bb through last_bb's predecessor
3131 end with GIMPLE_COND, all of them have one of the edges to
3132 other_bb and another to another block in the range,
3133 all blocks except first_bb don't have side-effects and
3134 last_bb ends with either GIMPLE_COND, or cast satisfying
3135 final_range_test_p. */
3136 for (bb
= last_bb
; ; bb
= single_pred (bb
))
3138 enum tree_code code
;
3140 inter_bb_range_test_entry bb_ent
;
3142 bb_ent
.op
= NULL_TREE
;
3143 bb_ent
.first_idx
= ops
.length ();
3144 bb_ent
.last_idx
= bb_ent
.first_idx
;
3145 e
= find_edge (bb
, other_bb
);
3146 stmt
= last_stmt (bb
);
3147 gimple_set_visited (stmt
, true);
3148 if (gimple_code (stmt
) != GIMPLE_COND
)
3150 use_operand_p use_p
;
3155 lhs
= gimple_assign_lhs (stmt
);
3156 rhs
= gimple_assign_rhs1 (stmt
);
3157 gcc_assert (bb
== last_bb
);
3164 # _345 = PHI <_123(N), 1(...), 1(...)>
3166 or 0 instead of 1. If it is 0, the _234
3167 range test is anded together with all the
3168 other range tests, if it is 1, it is ored with
3170 single_imm_use (lhs
, &use_p
, &phi
);
3171 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
3172 e2
= find_edge (first_bb
, other_bb
);
3174 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
3175 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
3176 code
= BIT_AND_EXPR
;
3179 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
3180 code
= BIT_IOR_EXPR
;
3183 /* If _234 SSA_NAME_DEF_STMT is
3185 (or &, corresponding to 1/0 in the phi arguments,
3186 push into ops the individual range test arguments
3187 of the bitwise or resp. and, recursively. */
3188 if (!get_ops (rhs
, code
, &ops
,
3189 loop_containing_stmt (stmt
))
3190 && has_single_use (rhs
))
3192 /* Otherwise, push the _234 range test itself. */
3194 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
3204 bb_ent
.last_idx
= ops
.length ();
3206 bbinfo
.safe_push (bb_ent
);
3209 /* Otherwise stmt is GIMPLE_COND. */
3210 code
= gimple_cond_code (stmt
);
3211 lhs
= gimple_cond_lhs (stmt
);
3212 rhs
= gimple_cond_rhs (stmt
);
3213 if (TREE_CODE (lhs
) == SSA_NAME
3214 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3215 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
3216 || rhs
!= boolean_false_node
3217 /* Either push into ops the individual bitwise
3218 or resp. and operands, depending on which
3219 edge is other_bb. */
3220 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
3221 ^ (code
== EQ_EXPR
))
3222 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
3223 loop_containing_stmt (stmt
))))
3225 /* Or push the GIMPLE_COND stmt itself. */
3227 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
3230 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
3231 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3232 /* oe->op = NULL signs that there is no SSA_NAME
3233 for the range test, and oe->id instead is the
3234 basic block number, at which's end the GIMPLE_COND
3242 else if (ops
.length () > bb_ent
.first_idx
)
3245 bb_ent
.last_idx
= ops
.length ();
3247 bbinfo
.safe_push (bb_ent
);
3251 if (ops
.length () > 1)
3252 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
3256 /* update_ops relies on has_single_use predicates returning the
3257 same values as it did during get_ops earlier. Additionally it
3258 never removes statements, only adds new ones and it should walk
3259 from the single imm use and check the predicate already before
3260 making those changes.
3261 On the other side, the handling of GIMPLE_COND directly can turn
3262 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3263 it needs to be done in a separate loop afterwards. */
3264 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3266 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3267 && bbinfo
[idx
].op
!= NULL_TREE
)
3271 stmt
= last_stmt (bb
);
3272 new_op
= update_ops (bbinfo
[idx
].op
,
3274 ops
[bbinfo
[idx
].first_idx
]->rank
,
3275 ops
, &bbinfo
[idx
].first_idx
,
3276 loop_containing_stmt (stmt
));
3277 if (new_op
== NULL_TREE
)
3279 gcc_assert (bb
== last_bb
);
3280 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
3282 if (bbinfo
[idx
].op
!= new_op
)
3284 imm_use_iterator iter
;
3285 use_operand_p use_p
;
3286 gimple use_stmt
, cast_stmt
= NULL
;
3288 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
3289 if (is_gimple_debug (use_stmt
))
3291 else if (gimple_code (use_stmt
) == GIMPLE_COND
3292 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3293 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3294 SET_USE (use_p
, new_op
);
3295 else if (gimple_assign_cast_p (use_stmt
))
3296 cast_stmt
= use_stmt
;
3301 gcc_assert (bb
== last_bb
);
3302 tree lhs
= gimple_assign_lhs (cast_stmt
);
3303 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
3304 enum tree_code rhs_code
3305 = gimple_assign_rhs_code (cast_stmt
);
3307 if (is_gimple_min_invariant (new_op
))
3309 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
3310 g
= gimple_build_assign (new_lhs
, new_op
);
3313 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
3314 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
3315 gimple_set_uid (g
, gimple_uid (cast_stmt
));
3316 gimple_set_visited (g
, true);
3317 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3318 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3319 if (is_gimple_debug (use_stmt
))
3321 else if (gimple_code (use_stmt
) == GIMPLE_COND
3322 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3323 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3324 SET_USE (use_p
, new_lhs
);
3333 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3335 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3336 && bbinfo
[idx
].op
== NULL_TREE
3337 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
3339 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
3340 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
3341 gimple_cond_make_false (cond_stmt
);
3342 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
3343 gimple_cond_make_true (cond_stmt
);
3346 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
3347 gimple_cond_set_lhs (cond_stmt
,
3348 ops
[bbinfo
[idx
].first_idx
]->op
);
3349 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
3351 update_stmt (cond_stmt
);
3359 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3360 of STMT in it's operands. This is also known as a "destructive
3361 update" operation. */
3364 is_phi_for_stmt (gimple stmt
, tree operand
)
3369 use_operand_p arg_p
;
3372 if (TREE_CODE (operand
) != SSA_NAME
)
3375 lhs
= gimple_assign_lhs (stmt
);
3377 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3378 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
3382 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
3383 if (lhs
== USE_FROM_PTR (arg_p
))
3388 /* Remove def stmt of VAR if VAR has zero uses and recurse
3389 on rhs1 operand if so. */
3392 remove_visited_stmt_chain (tree var
)
3395 gimple_stmt_iterator gsi
;
3399 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3401 stmt
= SSA_NAME_DEF_STMT (var
);
3402 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3404 var
= gimple_assign_rhs1 (stmt
);
3405 gsi
= gsi_for_stmt (stmt
);
3406 reassoc_remove_stmt (&gsi
);
3407 release_defs (stmt
);
3414 /* This function checks three consequtive operands in
3415 passed operands vector OPS starting from OPINDEX and
3416 swaps two operands if it is profitable for binary operation
3417 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3419 We pair ops with the same rank if possible.
3421 The alternative we try is to see if STMT is a destructive
3422 update style statement, which is like:
3425 In that case, we want to use the destructive update form to
3426 expose the possible vectorizer sum reduction opportunity.
3427 In that case, the third operand will be the phi node. This
3428 check is not performed if STMT is null.
3430 We could, of course, try to be better as noted above, and do a
3431 lot of work to try to find these opportunities in >3 operand
3432 cases, but it is unlikely to be worth it. */
3435 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3436 unsigned int opindex
, gimple stmt
)
3438 operand_entry_t oe1
, oe2
, oe3
;
3441 oe2
= ops
[opindex
+ 1];
3442 oe3
= ops
[opindex
+ 2];
3444 if ((oe1
->rank
== oe2
->rank
3445 && oe2
->rank
!= oe3
->rank
)
3446 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3447 && !is_phi_for_stmt (stmt
, oe1
->op
)
3448 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3450 struct operand_entry temp
= *oe3
;
3452 oe3
->rank
= oe1
->rank
;
3454 oe1
->rank
= temp
.rank
;
3456 else if ((oe1
->rank
== oe3
->rank
3457 && oe2
->rank
!= oe3
->rank
)
3458 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3459 && !is_phi_for_stmt (stmt
, oe1
->op
)
3460 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3462 struct operand_entry temp
= *oe2
;
3464 oe2
->rank
= oe1
->rank
;
3466 oe1
->rank
= temp
.rank
;
3470 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3471 two definitions, otherwise return STMT. */
3473 static inline gimple
3474 find_insert_point (gimple stmt
, tree rhs1
, tree rhs2
)
3476 if (TREE_CODE (rhs1
) == SSA_NAME
3477 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3478 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3479 if (TREE_CODE (rhs2
) == SSA_NAME
3480 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3481 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3485 /* Recursively rewrite our linearized statements so that the operators
3486 match those in OPS[OPINDEX], putting the computation in rank
3487 order. Return new lhs. */
3490 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3491 vec
<operand_entry_t
> ops
, bool changed
)
3493 tree rhs1
= gimple_assign_rhs1 (stmt
);
3494 tree rhs2
= gimple_assign_rhs2 (stmt
);
3495 tree lhs
= gimple_assign_lhs (stmt
);
3498 /* The final recursion case for this function is that you have
3499 exactly two operations left.
3500 If we had one exactly one op in the entire list to start with, we
3501 would have never called this function, and the tail recursion
3502 rewrites them one at a time. */
3503 if (opindex
+ 2 == ops
.length ())
3505 operand_entry_t oe1
, oe2
;
3508 oe2
= ops
[opindex
+ 1];
3510 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3512 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3513 unsigned int uid
= gimple_uid (stmt
);
3515 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3517 fprintf (dump_file
, "Transforming ");
3518 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3523 gimple insert_point
= find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3524 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3526 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3528 gimple_set_uid (stmt
, uid
);
3529 gimple_set_visited (stmt
, true);
3530 if (insert_point
== gsi_stmt (gsi
))
3531 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3533 insert_stmt_after (stmt
, insert_point
);
3537 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3539 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3540 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3544 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3545 remove_visited_stmt_chain (rhs1
);
3547 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3549 fprintf (dump_file
, " into ");
3550 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3556 /* If we hit here, we should have 3 or more ops left. */
3557 gcc_assert (opindex
+ 2 < ops
.length ());
3559 /* Rewrite the next operator. */
3562 /* Recurse on the LHS of the binary operator, which is guaranteed to
3563 be the non-leaf side. */
3565 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3566 changed
|| oe
->op
!= rhs2
);
3568 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3570 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3572 fprintf (dump_file
, "Transforming ");
3573 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3576 /* If changed is false, this is either opindex == 0
3577 or all outer rhs2's were equal to corresponding oe->op,
3578 and powi_result is NULL.
3579 That means lhs is equivalent before and after reassociation.
3580 Otherwise ensure the old lhs SSA_NAME is not reused and
3581 create a new stmt as well, so that any debug stmts will be
3582 properly adjusted. */
3585 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3586 unsigned int uid
= gimple_uid (stmt
);
3587 gimple insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3589 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3590 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3592 gimple_set_uid (stmt
, uid
);
3593 gimple_set_visited (stmt
, true);
3594 if (insert_point
== gsi_stmt (gsi
))
3595 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3597 insert_stmt_after (stmt
, insert_point
);
3601 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3603 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3604 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3608 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3610 fprintf (dump_file
, " into ");
3611 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3617 /* Find out how many cycles we need to compute statements chain.
3618 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3619 maximum number of independent statements we may execute per cycle. */
3622 get_required_cycles (int ops_num
, int cpu_width
)
3628 /* While we have more than 2 * cpu_width operands
3629 we may reduce number of operands by cpu_width
3631 res
= ops_num
/ (2 * cpu_width
);
3633 /* Remained operands count may be reduced twice per cycle
3634 until we have only one operand. */
3635 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3636 elog
= exact_log2 (rest
);
3640 res
+= floor_log2 (rest
) + 1;
3645 /* Returns an optimal number of registers to use for computation of
3646 given statements. */
3649 get_reassociation_width (int ops_num
, enum tree_code opc
,
3652 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3657 if (param_width
> 0)
3658 width
= param_width
;
3660 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3665 /* Get the minimal time required for sequence computation. */
3666 cycles_best
= get_required_cycles (ops_num
, width
);
3668 /* Check if we may use less width and still compute sequence for
3669 the same time. It will allow us to reduce registers usage.
3670 get_required_cycles is monotonically increasing with lower width
3671 so we can perform a binary search for the minimal width that still
3672 results in the optimal cycle count. */
3674 while (width
> width_min
)
3676 int width_mid
= (width
+ width_min
) / 2;
3678 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3680 else if (width_min
< width_mid
)
3681 width_min
= width_mid
;
3689 /* Recursively rewrite our linearized statements so that the operators
3690 match those in OPS[OPINDEX], putting the computation in rank
3691 order and trying to allow operations to be executed in
3695 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
3696 vec
<operand_entry_t
> ops
)
3698 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3699 int op_num
= ops
.length ();
3700 int stmt_num
= op_num
- 1;
3701 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3702 int op_index
= op_num
- 1;
3704 int ready_stmts_end
= 0;
3706 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3708 /* We start expression rewriting from the top statements.
3709 So, in this loop we create a full list of statements
3710 we will work with. */
3711 stmts
[stmt_num
- 1] = stmt
;
3712 for (i
= stmt_num
- 2; i
>= 0; i
--)
3713 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3715 for (i
= 0; i
< stmt_num
; i
++)
3719 /* Determine whether we should use results of
3720 already handled statements or not. */
3721 if (ready_stmts_end
== 0
3722 && (i
- stmt_index
>= width
|| op_index
< 1))
3723 ready_stmts_end
= i
;
3725 /* Now we choose operands for the next statement. Non zero
3726 value in ready_stmts_end means here that we should use
3727 the result of already generated statements as new operand. */
3728 if (ready_stmts_end
> 0)
3730 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3731 if (ready_stmts_end
> stmt_index
)
3732 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3733 else if (op_index
>= 0)
3734 op2
= ops
[op_index
--]->op
;
3737 gcc_assert (stmt_index
< i
);
3738 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3741 if (stmt_index
>= ready_stmts_end
)
3742 ready_stmts_end
= 0;
3747 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3748 op2
= ops
[op_index
--]->op
;
3749 op1
= ops
[op_index
--]->op
;
3752 /* If we emit the last statement then we should put
3753 operands into the last statement. It will also
3755 if (op_index
< 0 && stmt_index
== i
)
3758 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3760 fprintf (dump_file
, "Transforming ");
3761 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3764 /* We keep original statement only for the last one. All
3765 others are recreated. */
3766 if (i
== stmt_num
- 1)
3768 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3769 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3770 update_stmt (stmts
[i
]);
3773 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3775 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3777 fprintf (dump_file
, " into ");
3778 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3782 remove_visited_stmt_chain (last_rhs1
);
3785 /* Transform STMT, which is really (A +B) + (C + D) into the left
3786 linear form, ((A+B)+C)+D.
3787 Recurse on D if necessary. */
3790 linearize_expr (gimple stmt
)
3792 gimple_stmt_iterator gsi
;
3793 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3794 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3795 gimple oldbinrhs
= binrhs
;
3796 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3797 gimple newbinrhs
= NULL
;
3798 struct loop
*loop
= loop_containing_stmt (stmt
);
3799 tree lhs
= gimple_assign_lhs (stmt
);
3801 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3802 && is_reassociable_op (binrhs
, rhscode
, loop
));
3804 gsi
= gsi_for_stmt (stmt
);
3806 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3807 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
3808 gimple_assign_rhs_code (binrhs
),
3809 gimple_assign_lhs (binlhs
),
3810 gimple_assign_rhs2 (binrhs
));
3811 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3812 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3813 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3815 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3816 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3818 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3820 fprintf (dump_file
, "Linearized: ");
3821 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3824 reassociate_stats
.linearized
++;
3827 gsi
= gsi_for_stmt (oldbinrhs
);
3828 reassoc_remove_stmt (&gsi
);
3829 release_defs (oldbinrhs
);
3831 gimple_set_visited (stmt
, true);
3832 gimple_set_visited (binlhs
, true);
3833 gimple_set_visited (binrhs
, true);
3835 /* Tail recurse on the new rhs if it still needs reassociation. */
3836 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3837 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3838 want to change the algorithm while converting to tuples. */
3839 linearize_expr (stmt
);
3842 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3843 it. Otherwise, return NULL. */
3846 get_single_immediate_use (tree lhs
)
3848 use_operand_p immuse
;
3851 if (TREE_CODE (lhs
) == SSA_NAME
3852 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3853 && is_gimple_assign (immusestmt
))
3859 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3860 representing the negated value. Insertions of any necessary
3861 instructions go before GSI.
3862 This function is recursive in that, if you hand it "a_5" as the
3863 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3864 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3867 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3869 gimple negatedefstmt
= NULL
;
3870 tree resultofnegate
;
3871 gimple_stmt_iterator gsi
;
3874 /* If we are trying to negate a name, defined by an add, negate the
3875 add operands instead. */
3876 if (TREE_CODE (tonegate
) == SSA_NAME
)
3877 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3878 if (TREE_CODE (tonegate
) == SSA_NAME
3879 && is_gimple_assign (negatedefstmt
)
3880 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3881 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3882 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3884 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3885 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3886 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3889 gsi
= gsi_for_stmt (negatedefstmt
);
3890 rhs1
= negate_value (rhs1
, &gsi
);
3892 gsi
= gsi_for_stmt (negatedefstmt
);
3893 rhs2
= negate_value (rhs2
, &gsi
);
3895 gsi
= gsi_for_stmt (negatedefstmt
);
3896 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3897 gimple_set_visited (negatedefstmt
, true);
3898 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
3899 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3900 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3904 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3905 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3906 NULL_TREE
, true, GSI_SAME_STMT
);
3908 uid
= gimple_uid (gsi_stmt (gsi
));
3909 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3911 gimple stmt
= gsi_stmt (gsi
);
3912 if (gimple_uid (stmt
) != 0)
3914 gimple_set_uid (stmt
, uid
);
3916 return resultofnegate
;
3919 /* Return true if we should break up the subtract in STMT into an add
3920 with negate. This is true when we the subtract operands are really
3921 adds, or the subtract itself is used in an add expression. In
3922 either case, breaking up the subtract into an add with negate
3923 exposes the adds to reassociation. */
3926 should_break_up_subtract (gimple stmt
)
3928 tree lhs
= gimple_assign_lhs (stmt
);
3929 tree binlhs
= gimple_assign_rhs1 (stmt
);
3930 tree binrhs
= gimple_assign_rhs2 (stmt
);
3932 struct loop
*loop
= loop_containing_stmt (stmt
);
3934 if (TREE_CODE (binlhs
) == SSA_NAME
3935 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3938 if (TREE_CODE (binrhs
) == SSA_NAME
3939 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3942 if (TREE_CODE (lhs
) == SSA_NAME
3943 && (immusestmt
= get_single_immediate_use (lhs
))
3944 && is_gimple_assign (immusestmt
)
3945 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3946 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3951 /* Transform STMT from A - B into A + -B. */
3954 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3956 tree rhs1
= gimple_assign_rhs1 (stmt
);
3957 tree rhs2
= gimple_assign_rhs2 (stmt
);
3959 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3961 fprintf (dump_file
, "Breaking up subtract ");
3962 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3965 rhs2
= negate_value (rhs2
, gsip
);
3966 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3970 /* Determine whether STMT is a builtin call that raises an SSA name
3971 to an integer power and has only one use. If so, and this is early
3972 reassociation and unsafe math optimizations are permitted, place
3973 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3974 If any of these conditions does not hold, return FALSE. */
3977 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3980 REAL_VALUE_TYPE c
, cint
;
3982 if (!first_pass_instance
3983 || !flag_unsafe_math_optimizations
3984 || !is_gimple_call (stmt
)
3985 || !has_single_use (gimple_call_lhs (stmt
)))
3988 fndecl
= gimple_call_fndecl (stmt
);
3991 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3994 switch (DECL_FUNCTION_CODE (fndecl
))
3996 CASE_FLT_FN (BUILT_IN_POW
):
3997 if (flag_errno_math
)
4000 *base
= gimple_call_arg (stmt
, 0);
4001 arg1
= gimple_call_arg (stmt
, 1);
4003 if (TREE_CODE (arg1
) != REAL_CST
)
4006 c
= TREE_REAL_CST (arg1
);
4008 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
4011 *exponent
= real_to_integer (&c
);
4012 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
4013 if (!real_identical (&c
, &cint
))
4018 CASE_FLT_FN (BUILT_IN_POWI
):
4019 *base
= gimple_call_arg (stmt
, 0);
4020 arg1
= gimple_call_arg (stmt
, 1);
4022 if (!tree_fits_shwi_p (arg1
))
4025 *exponent
= tree_to_shwi (arg1
);
4032 /* Expanding negative exponents is generally unproductive, so we don't
4033 complicate matters with those. Exponents of zero and one should
4034 have been handled by expression folding. */
4035 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
4041 /* Recursively linearize a binary expression that is the RHS of STMT.
4042 Place the operands of the expression tree in the vector named OPS. */
4045 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
4046 bool is_associative
, bool set_visited
)
4048 tree binlhs
= gimple_assign_rhs1 (stmt
);
4049 tree binrhs
= gimple_assign_rhs2 (stmt
);
4050 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
4051 bool binlhsisreassoc
= false;
4052 bool binrhsisreassoc
= false;
4053 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4054 struct loop
*loop
= loop_containing_stmt (stmt
);
4055 tree base
= NULL_TREE
;
4056 HOST_WIDE_INT exponent
= 0;
4059 gimple_set_visited (stmt
, true);
4061 if (TREE_CODE (binlhs
) == SSA_NAME
)
4063 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
4064 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
4065 && !stmt_could_throw_p (binlhsdef
));
4068 if (TREE_CODE (binrhs
) == SSA_NAME
)
4070 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
4071 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
4072 && !stmt_could_throw_p (binrhsdef
));
4075 /* If the LHS is not reassociable, but the RHS is, we need to swap
4076 them. If neither is reassociable, there is nothing we can do, so
4077 just put them in the ops vector. If the LHS is reassociable,
4078 linearize it. If both are reassociable, then linearize the RHS
4081 if (!binlhsisreassoc
)
4085 /* If this is not a associative operation like division, give up. */
4086 if (!is_associative
)
4088 add_to_ops_vec (ops
, binrhs
);
4092 if (!binrhsisreassoc
)
4094 if (rhscode
== MULT_EXPR
4095 && TREE_CODE (binrhs
) == SSA_NAME
4096 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
4098 add_repeat_to_ops_vec (ops
, base
, exponent
);
4099 gimple_set_visited (binrhsdef
, true);
4102 add_to_ops_vec (ops
, binrhs
);
4104 if (rhscode
== MULT_EXPR
4105 && TREE_CODE (binlhs
) == SSA_NAME
4106 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
4108 add_repeat_to_ops_vec (ops
, base
, exponent
);
4109 gimple_set_visited (binlhsdef
, true);
4112 add_to_ops_vec (ops
, binlhs
);
4117 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4119 fprintf (dump_file
, "swapping operands of ");
4120 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4123 swap_ssa_operands (stmt
,
4124 gimple_assign_rhs1_ptr (stmt
),
4125 gimple_assign_rhs2_ptr (stmt
));
4128 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4130 fprintf (dump_file
, " is now ");
4131 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4134 /* We want to make it so the lhs is always the reassociative op,
4140 else if (binrhsisreassoc
)
4142 linearize_expr (stmt
);
4143 binlhs
= gimple_assign_rhs1 (stmt
);
4144 binrhs
= gimple_assign_rhs2 (stmt
);
4147 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
4148 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
4150 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
4151 is_associative
, set_visited
);
4153 if (rhscode
== MULT_EXPR
4154 && TREE_CODE (binrhs
) == SSA_NAME
4155 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
4157 add_repeat_to_ops_vec (ops
, base
, exponent
);
4158 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
4161 add_to_ops_vec (ops
, binrhs
);
4164 /* Repropagate the negates back into subtracts, since no other pass
4165 currently does it. */
4168 repropagate_negates (void)
4173 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
4175 gimple user
= get_single_immediate_use (negate
);
4177 if (!user
|| !is_gimple_assign (user
))
4180 /* The negate operand can be either operand of a PLUS_EXPR
4181 (it can be the LHS if the RHS is a constant for example).
4183 Force the negate operand to the RHS of the PLUS_EXPR, then
4184 transform the PLUS_EXPR into a MINUS_EXPR. */
4185 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
4187 /* If the negated operand appears on the LHS of the
4188 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4189 to force the negated operand to the RHS of the PLUS_EXPR. */
4190 if (gimple_assign_rhs1 (user
) == negate
)
4192 swap_ssa_operands (user
,
4193 gimple_assign_rhs1_ptr (user
),
4194 gimple_assign_rhs2_ptr (user
));
4197 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4198 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4199 if (gimple_assign_rhs2 (user
) == negate
)
4201 tree rhs1
= gimple_assign_rhs1 (user
);
4202 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
4203 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4204 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
4208 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
4210 if (gimple_assign_rhs1 (user
) == negate
)
4215 which we transform into
4218 This pushes down the negate which we possibly can merge
4219 into some other operation, hence insert it into the
4220 plus_negates vector. */
4221 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4222 tree a
= gimple_assign_rhs1 (feed
);
4223 tree b
= gimple_assign_rhs2 (user
);
4224 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
4225 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
4226 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
4227 gimple g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
4228 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
4229 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
4230 user
= gsi_stmt (gsi2
);
4232 reassoc_remove_stmt (&gsi
);
4233 release_defs (feed
);
4234 plus_negates
.safe_push (gimple_assign_lhs (user
));
4238 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4239 rid of one operation. */
4240 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4241 tree a
= gimple_assign_rhs1 (feed
);
4242 tree rhs1
= gimple_assign_rhs1 (user
);
4243 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4244 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
4245 update_stmt (gsi_stmt (gsi
));
4251 /* Returns true if OP is of a type for which we can do reassociation.
4252 That is for integral or non-saturating fixed-point types, and for
4253 floating point type when associative-math is enabled. */
4256 can_reassociate_p (tree op
)
4258 tree type
= TREE_TYPE (op
);
4259 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
4260 || NON_SAT_FIXED_POINT_TYPE_P (type
)
4261 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
4266 /* Break up subtract operations in block BB.
4268 We do this top down because we don't know whether the subtract is
4269 part of a possible chain of reassociation except at the top.
4278 we want to break up k = t - q, but we won't until we've transformed q
4279 = b - r, which won't be broken up until we transform b = c - d.
4281 En passant, clear the GIMPLE visited flag on every statement
4282 and set UIDs within each basic block. */
4285 break_up_subtract_bb (basic_block bb
)
4287 gimple_stmt_iterator gsi
;
4289 unsigned int uid
= 1;
4291 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4293 gimple stmt
= gsi_stmt (gsi
);
4294 gimple_set_visited (stmt
, false);
4295 gimple_set_uid (stmt
, uid
++);
4297 if (!is_gimple_assign (stmt
)
4298 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
4301 /* Look for simple gimple subtract operations. */
4302 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
4304 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
4305 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
4308 /* Check for a subtract used only in an addition. If this
4309 is the case, transform it into add of a negate for better
4310 reassociation. IE transform C = A-B into C = A + -B if C
4311 is only used in an addition. */
4312 if (should_break_up_subtract (stmt
))
4313 break_up_subtract (stmt
, &gsi
);
4315 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
4316 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
4317 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
4319 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
4321 son
= next_dom_son (CDI_DOMINATORS
, son
))
4322 break_up_subtract_bb (son
);
4325 /* Used for repeated factor analysis. */
4326 struct repeat_factor_d
4328 /* An SSA name that occurs in a multiply chain. */
4331 /* Cached rank of the factor. */
4334 /* Number of occurrences of the factor in the chain. */
4335 HOST_WIDE_INT count
;
4337 /* An SSA name representing the product of this factor and
4338 all factors appearing later in the repeated factor vector. */
4342 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
4343 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
4346 static vec
<repeat_factor
> repeat_factor_vec
;
4348 /* Used for sorting the repeat factor vector. Sort primarily by
4349 ascending occurrence count, secondarily by descending rank. */
4352 compare_repeat_factors (const void *x1
, const void *x2
)
4354 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
4355 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
4357 if (rf1
->count
!= rf2
->count
)
4358 return rf1
->count
- rf2
->count
;
4360 return rf2
->rank
- rf1
->rank
;
4363 /* Look for repeated operands in OPS in the multiply tree rooted at
4364 STMT. Replace them with an optimal sequence of multiplies and powi
4365 builtin calls, and remove the used operands from OPS. Return an
4366 SSA name representing the value of the replacement sequence. */
4369 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
4371 unsigned i
, j
, vec_len
;
4374 repeat_factor_t rf1
, rf2
;
4375 repeat_factor rfnew
;
4376 tree result
= NULL_TREE
;
4377 tree target_ssa
, iter_result
;
4378 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4379 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4380 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4381 gimple mul_stmt
, pow_stmt
;
4383 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4388 /* Allocate the repeated factor vector. */
4389 repeat_factor_vec
.create (10);
4391 /* Scan the OPS vector for all SSA names in the product and build
4392 up a vector of occurrence counts for each factor. */
4393 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4395 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4397 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4399 if (rf1
->factor
== oe
->op
)
4401 rf1
->count
+= oe
->count
;
4406 if (j
>= repeat_factor_vec
.length ())
4408 rfnew
.factor
= oe
->op
;
4409 rfnew
.rank
= oe
->rank
;
4410 rfnew
.count
= oe
->count
;
4411 rfnew
.repr
= NULL_TREE
;
4412 repeat_factor_vec
.safe_push (rfnew
);
4417 /* Sort the repeated factor vector by (a) increasing occurrence count,
4418 and (b) decreasing rank. */
4419 repeat_factor_vec
.qsort (compare_repeat_factors
);
4421 /* It is generally best to combine as many base factors as possible
4422 into a product before applying __builtin_powi to the result.
4423 However, the sort order chosen for the repeated factor vector
4424 allows us to cache partial results for the product of the base
4425 factors for subsequent use. When we already have a cached partial
4426 result from a previous iteration, it is best to make use of it
4427 before looking for another __builtin_pow opportunity.
4429 As an example, consider x * x * y * y * y * z * z * z * z.
4430 We want to first compose the product x * y * z, raise it to the
4431 second power, then multiply this by y * z, and finally multiply
4432 by z. This can be done in 5 multiplies provided we cache y * z
4433 for use in both expressions:
4441 If we instead ignored the cached y * z and first multiplied by
4442 the __builtin_pow opportunity z * z, we would get the inferior:
4451 vec_len
= repeat_factor_vec
.length ();
4453 /* Repeatedly look for opportunities to create a builtin_powi call. */
4456 HOST_WIDE_INT power
;
4458 /* First look for the largest cached product of factors from
4459 preceding iterations. If found, create a builtin_powi for
4460 it if the minimum occurrence count for its factors is at
4461 least 2, or just use this cached product as our next
4462 multiplicand if the minimum occurrence count is 1. */
4463 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4465 if (rf1
->repr
&& rf1
->count
> 0)
4475 iter_result
= rf1
->repr
;
4477 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4481 fputs ("Multiplying by cached product ", dump_file
);
4482 for (elt
= j
; elt
< vec_len
; elt
++)
4484 rf
= &repeat_factor_vec
[elt
];
4485 print_generic_expr (dump_file
, rf
->factor
, 0);
4486 if (elt
< vec_len
- 1)
4487 fputs (" * ", dump_file
);
4489 fputs ("\n", dump_file
);
4494 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4495 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4496 build_int_cst (integer_type_node
,
4498 gimple_call_set_lhs (pow_stmt
, iter_result
);
4499 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4500 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4502 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4506 fputs ("Building __builtin_pow call for cached product (",
4508 for (elt
= j
; elt
< vec_len
; elt
++)
4510 rf
= &repeat_factor_vec
[elt
];
4511 print_generic_expr (dump_file
, rf
->factor
, 0);
4512 if (elt
< vec_len
- 1)
4513 fputs (" * ", dump_file
);
4515 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n",
4522 /* Otherwise, find the first factor in the repeated factor
4523 vector whose occurrence count is at least 2. If no such
4524 factor exists, there are no builtin_powi opportunities
4526 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4528 if (rf1
->count
>= 2)
4537 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4541 fputs ("Building __builtin_pow call for (", dump_file
);
4542 for (elt
= j
; elt
< vec_len
; elt
++)
4544 rf
= &repeat_factor_vec
[elt
];
4545 print_generic_expr (dump_file
, rf
->factor
, 0);
4546 if (elt
< vec_len
- 1)
4547 fputs (" * ", dump_file
);
4549 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4552 reassociate_stats
.pows_created
++;
4554 /* Visit each element of the vector in reverse order (so that
4555 high-occurrence elements are visited first, and within the
4556 same occurrence count, lower-ranked elements are visited
4557 first). Form a linear product of all elements in this order
4558 whose occurrencce count is at least that of element J.
4559 Record the SSA name representing the product of each element
4560 with all subsequent elements in the vector. */
4561 if (j
== vec_len
- 1)
4562 rf1
->repr
= rf1
->factor
;
4565 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4569 rf1
= &repeat_factor_vec
[ii
];
4570 rf2
= &repeat_factor_vec
[ii
+ 1];
4572 /* Init the last factor's representative to be itself. */
4574 rf2
->repr
= rf2
->factor
;
4579 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4580 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
4582 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4583 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4584 rf1
->repr
= target_ssa
;
4586 /* Don't reprocess the multiply we just introduced. */
4587 gimple_set_visited (mul_stmt
, true);
4591 /* Form a call to __builtin_powi for the maximum product
4592 just formed, raised to the power obtained earlier. */
4593 rf1
= &repeat_factor_vec
[j
];
4594 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4595 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4596 build_int_cst (integer_type_node
,
4598 gimple_call_set_lhs (pow_stmt
, iter_result
);
4599 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4600 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4603 /* If we previously formed at least one other builtin_powi call,
4604 form the product of this one and those others. */
4607 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4608 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
4609 result
, iter_result
);
4610 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4611 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4612 gimple_set_visited (mul_stmt
, true);
4613 result
= new_result
;
4616 result
= iter_result
;
4618 /* Decrement the occurrence count of each element in the product
4619 by the count found above, and remove this many copies of each
4621 for (i
= j
; i
< vec_len
; i
++)
4626 rf1
= &repeat_factor_vec
[i
];
4627 rf1
->count
-= power
;
4629 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4631 if (oe
->op
== rf1
->factor
)
4635 ops
->ordered_remove (n
);
4651 /* At this point all elements in the repeated factor vector have a
4652 remaining occurrence count of 0 or 1, and those with a count of 1
4653 don't have cached representatives. Re-sort the ops vector and
4655 ops
->qsort (sort_by_operand_rank
);
4656 repeat_factor_vec
.release ();
4658 /* Return the final product computed herein. Note that there may
4659 still be some elements with single occurrence count left in OPS;
4660 those will be handled by the normal reassociation logic. */
4664 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4667 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4671 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4673 fprintf (dump_file
, "Transforming ");
4674 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4677 rhs1
= gimple_assign_rhs1 (stmt
);
4678 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4680 remove_visited_stmt_chain (rhs1
);
4682 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4684 fprintf (dump_file
, " into ");
4685 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4689 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4692 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4693 tree rhs1
, tree rhs2
)
4695 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4697 fprintf (dump_file
, "Transforming ");
4698 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4701 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4702 update_stmt (gsi_stmt (*gsi
));
4703 remove_visited_stmt_chain (rhs1
);
4705 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4707 fprintf (dump_file
, " into ");
4708 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4712 /* Reassociate expressions in basic block BB and its post-dominator as
4716 reassociate_bb (basic_block bb
)
4718 gimple_stmt_iterator gsi
;
4720 gimple stmt
= last_stmt (bb
);
4722 if (stmt
&& !gimple_visited_p (stmt
))
4723 maybe_optimize_range_tests (stmt
);
4725 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4727 stmt
= gsi_stmt (gsi
);
4729 if (is_gimple_assign (stmt
)
4730 && !stmt_could_throw_p (stmt
))
4732 tree lhs
, rhs1
, rhs2
;
4733 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4735 /* If this is not a gimple binary expression, there is
4736 nothing for us to do with it. */
4737 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4740 /* If this was part of an already processed statement,
4741 we don't need to touch it again. */
4742 if (gimple_visited_p (stmt
))
4744 /* This statement might have become dead because of previous
4746 if (has_zero_uses (gimple_get_lhs (stmt
)))
4748 reassoc_remove_stmt (&gsi
);
4749 release_defs (stmt
);
4750 /* We might end up removing the last stmt above which
4751 places the iterator to the end of the sequence.
4752 Reset it to the last stmt in this case which might
4753 be the end of the sequence as well if we removed
4754 the last statement of the sequence. In which case
4755 we need to bail out. */
4756 if (gsi_end_p (gsi
))
4758 gsi
= gsi_last_bb (bb
);
4759 if (gsi_end_p (gsi
))
4766 lhs
= gimple_assign_lhs (stmt
);
4767 rhs1
= gimple_assign_rhs1 (stmt
);
4768 rhs2
= gimple_assign_rhs2 (stmt
);
4770 /* For non-bit or min/max operations we can't associate
4771 all types. Verify that here. */
4772 if (rhs_code
!= BIT_IOR_EXPR
4773 && rhs_code
!= BIT_AND_EXPR
4774 && rhs_code
!= BIT_XOR_EXPR
4775 && rhs_code
!= MIN_EXPR
4776 && rhs_code
!= MAX_EXPR
4777 && (!can_reassociate_p (lhs
)
4778 || !can_reassociate_p (rhs1
)
4779 || !can_reassociate_p (rhs2
)))
4782 if (associative_tree_code (rhs_code
))
4784 auto_vec
<operand_entry_t
> ops
;
4785 tree powi_result
= NULL_TREE
;
4787 /* There may be no immediate uses left by the time we
4788 get here because we may have eliminated them all. */
4789 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4792 gimple_set_visited (stmt
, true);
4793 linearize_expr_tree (&ops
, stmt
, true, true);
4794 ops
.qsort (sort_by_operand_rank
);
4795 optimize_ops_list (rhs_code
, &ops
);
4796 if (undistribute_ops_list (rhs_code
, &ops
,
4797 loop_containing_stmt (stmt
)))
4799 ops
.qsort (sort_by_operand_rank
);
4800 optimize_ops_list (rhs_code
, &ops
);
4803 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4804 optimize_range_tests (rhs_code
, &ops
);
4806 if (first_pass_instance
4807 && rhs_code
== MULT_EXPR
4808 && flag_unsafe_math_optimizations
)
4809 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4811 /* If the operand vector is now empty, all operands were
4812 consumed by the __builtin_powi optimization. */
4813 if (ops
.length () == 0)
4814 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4815 else if (ops
.length () == 1)
4817 tree last_op
= ops
.last ()->op
;
4820 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4823 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4827 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4828 int ops_num
= ops
.length ();
4829 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4832 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4834 "Width = %d was chosen for reassociation\n", width
);
4837 && ops
.length () > 3)
4838 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
4842 /* When there are three operands left, we want
4843 to make sure the ones that get the double
4844 binary op are chosen wisely. */
4845 int len
= ops
.length ();
4847 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4849 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4850 powi_result
!= NULL
);
4853 /* If we combined some repeated factors into a
4854 __builtin_powi call, multiply that result by the
4855 reassociated operands. */
4858 gimple mul_stmt
, lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4859 tree type
= TREE_TYPE (lhs
);
4860 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4862 gimple_set_lhs (lhs_stmt
, target_ssa
);
4863 update_stmt (lhs_stmt
);
4865 target_ssa
= new_lhs
;
4866 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
4867 powi_result
, target_ssa
);
4868 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4869 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4875 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4877 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4878 reassociate_bb (son
);
4881 /* Add jumps around shifts for range tests turned into bit tests.
4882 For each SSA_NAME VAR we have code like:
4883 VAR = ...; // final stmt of range comparison
4884 // bit test here...;
4885 OTHERVAR = ...; // final stmt of the bit test sequence
4886 RES = VAR | OTHERVAR;
4887 Turn the above into:
4894 // bit test here...;
4897 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4905 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
4907 gimple def_stmt
= SSA_NAME_DEF_STMT (var
);
4910 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
4912 && is_gimple_assign (use_stmt
)
4913 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
4914 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
4916 basic_block cond_bb
= gimple_bb (def_stmt
);
4917 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
4918 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
4920 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
4921 gimple g
= gimple_build_cond (NE_EXPR
, var
,
4922 build_zero_cst (TREE_TYPE (var
)),
4923 NULL_TREE
, NULL_TREE
);
4924 location_t loc
= gimple_location (use_stmt
);
4925 gimple_set_location (g
, loc
);
4926 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
4928 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
4929 etrue
->probability
= REG_BR_PROB_BASE
/ 2;
4930 etrue
->count
= cond_bb
->count
/ 2;
4931 edge efalse
= find_edge (cond_bb
, then_bb
);
4932 efalse
->flags
= EDGE_FALSE_VALUE
;
4933 efalse
->probability
-= etrue
->probability
;
4934 efalse
->count
-= etrue
->count
;
4935 then_bb
->count
-= etrue
->count
;
4937 tree othervar
= NULL_TREE
;
4938 if (gimple_assign_rhs1 (use_stmt
) == var
)
4939 othervar
= gimple_assign_rhs2 (use_stmt
);
4940 else if (gimple_assign_rhs2 (use_stmt
) == var
)
4941 othervar
= gimple_assign_rhs1 (use_stmt
);
4944 tree lhs
= gimple_assign_lhs (use_stmt
);
4945 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
4946 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
4947 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
4948 gsi
= gsi_for_stmt (use_stmt
);
4949 gsi_remove (&gsi
, true);
4951 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
4952 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
4954 reassoc_branch_fixups
.release ();
4957 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4958 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4960 /* Dump the operand entry vector OPS to FILE. */
4963 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4968 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4970 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4971 print_generic_expr (file
, oe
->op
, 0);
4975 /* Dump the operand entry vector OPS to STDERR. */
4978 debug_ops_vector (vec
<operand_entry_t
> ops
)
4980 dump_ops_vector (stderr
, ops
);
4986 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
4987 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
4990 /* Initialize the reassociation pass. */
4997 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
4999 /* Find the loops, so that we can prevent moving calculations in
5001 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
5003 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
5005 operand_entry_pool
= create_alloc_pool ("operand entry pool",
5006 sizeof (struct operand_entry
), 30);
5007 next_operand_entry_id
= 0;
5009 /* Reverse RPO (Reverse Post Order) will give us something where
5010 deeper loops come later. */
5011 pre_and_rev_post_order_compute (NULL
, bbs
, false);
5012 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
5013 operand_rank
= new hash_map
<tree
, long>;
5015 /* Give each default definition a distinct rank. This includes
5016 parameters and the static chain. Walk backwards over all
5017 SSA names so that we get proper rank ordering according
5018 to tree_swap_operands_p. */
5019 for (i
= num_ssa_names
- 1; i
> 0; --i
)
5021 tree name
= ssa_name (i
);
5022 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
5023 insert_operand_rank (name
, ++rank
);
5026 /* Set up rank for each BB */
5027 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
5028 bb_rank
[bbs
[i
]] = ++rank
<< 16;
5031 calculate_dominance_info (CDI_POST_DOMINATORS
);
5032 plus_negates
= vNULL
;
5035 /* Cleanup after the reassociation pass, and print stats if
5041 statistics_counter_event (cfun
, "Linearized",
5042 reassociate_stats
.linearized
);
5043 statistics_counter_event (cfun
, "Constants eliminated",
5044 reassociate_stats
.constants_eliminated
);
5045 statistics_counter_event (cfun
, "Ops eliminated",
5046 reassociate_stats
.ops_eliminated
);
5047 statistics_counter_event (cfun
, "Statements rewritten",
5048 reassociate_stats
.rewritten
);
5049 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
5050 reassociate_stats
.pows_encountered
);
5051 statistics_counter_event (cfun
, "Built-in powi calls created",
5052 reassociate_stats
.pows_created
);
5054 delete operand_rank
;
5055 free_alloc_pool (operand_entry_pool
);
5057 plus_negates
.release ();
5058 free_dominance_info (CDI_POST_DOMINATORS
);
5059 loop_optimizer_finalize ();
5062 /* Gate and execute functions for Reassociation. */
5065 execute_reassoc (void)
5070 repropagate_negates ();
5079 const pass_data pass_data_reassoc
=
5081 GIMPLE_PASS
, /* type */
5082 "reassoc", /* name */
5083 OPTGROUP_NONE
, /* optinfo_flags */
5084 TV_TREE_REASSOC
, /* tv_id */
5085 ( PROP_cfg
| PROP_ssa
), /* properties_required */
5086 0, /* properties_provided */
5087 0, /* properties_destroyed */
5088 0, /* todo_flags_start */
5089 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
5092 class pass_reassoc
: public gimple_opt_pass
5095 pass_reassoc (gcc::context
*ctxt
)
5096 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
5099 /* opt_pass methods: */
5100 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
5101 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
5102 virtual unsigned int execute (function
*) { return execute_reassoc (); }
5104 }; // class pass_reassoc
5109 make_pass_reassoc (gcc::context
*ctxt
)
5111 return new pass_reassoc (ctxt
);