1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
31 #include "fold-const.h"
32 #include "stor-layout.h"
34 #include "gimple-pretty-print.h"
35 #include "tree-inline.h"
36 #include "internal-fn.h"
37 #include "gimple-fold.h"
39 #include "gimple-iterator.h"
40 #include "gimplify-me.h"
42 #include "tree-ssa-loop-niter.h"
43 #include "tree-ssa-loop.h"
45 #include "insn-config.h"
56 #include "tree-iterator.h"
57 #include "tree-pass.h"
58 #include "alloc-pool.h"
59 #include "langhooks.h"
63 #include "diagnostic-core.h"
66 #include "insn-codes.h"
69 /* This is a simple global reassociation pass. It is, in part, based
70 on the LLVM pass of the same name (They do some things more/less
71 than we do, in different orders, etc).
73 It consists of five steps:
75 1. Breaking up subtract operations into addition + negate, where
76 it would promote the reassociation of adds.
78 2. Left linearization of the expression trees, so that (A+B)+(C+D)
79 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
80 During linearization, we place the operands of the binary
81 expressions into a vector of operand_entry_t
83 3. Optimization of the operand lists, eliminating things like a +
86 3a. Combine repeated factors with the same occurrence counts
87 into a __builtin_powi call that will later be optimized into
88 an optimal number of multiplies.
90 4. Rewrite the expression trees we linearized and optimized so
91 they are in proper rank order.
93 5. Repropagate negates, as nothing else will clean it up ATM.
95 A bit of theory on #4, since nobody seems to write anything down
96 about why it makes sense to do it the way they do it:
98 We could do this much nicer theoretically, but don't (for reasons
99 explained after how to do it theoretically nice :P).
101 In order to promote the most redundancy elimination, you want
102 binary expressions whose operands are the same rank (or
103 preferably, the same value) exposed to the redundancy eliminator,
104 for possible elimination.
106 So the way to do this if we really cared, is to build the new op
107 tree from the leaves to the roots, merging as you go, and putting the
108 new op on the end of the worklist, until you are left with one
109 thing on the worklist.
111 IE if you have to rewrite the following set of operands (listed with
112 rank in parentheses), with opcode PLUS_EXPR:
114 a (1), b (1), c (1), d (2), e (2)
117 We start with our merge worklist empty, and the ops list with all of
120 You want to first merge all leaves of the same rank, as much as
123 So first build a binary op of
125 mergetmp = a + b, and put "mergetmp" on the merge worklist.
127 Because there is no three operand form of PLUS_EXPR, c is not going to
128 be exposed to redundancy elimination as a rank 1 operand.
130 So you might as well throw it on the merge worklist (you could also
131 consider it to now be a rank two operand, and merge it with d and e,
132 but in this case, you then have evicted e from a binary op. So at
133 least in this situation, you can't win.)
135 Then build a binary op of d + e
138 and put mergetmp2 on the merge worklist.
140 so merge worklist = {mergetmp, c, mergetmp2}
142 Continue building binary ops of these operations until you have only
143 one operation left on the worklist.
148 mergetmp3 = mergetmp + c
150 worklist = {mergetmp2, mergetmp3}
152 mergetmp4 = mergetmp2 + mergetmp3
154 worklist = {mergetmp4}
156 because we have one operation left, we can now just set the original
157 statement equal to the result of that operation.
159 This will at least expose a + b and d + e to redundancy elimination
160 as binary operations.
162 For extra points, you can reuse the old statements to build the
163 mergetmps, since you shouldn't run out.
165 So why don't we do this?
167 Because it's expensive, and rarely will help. Most trees we are
168 reassociating have 3 or less ops. If they have 2 ops, they already
169 will be written into a nice single binary op. If you have 3 ops, a
170 single simple check suffices to tell you whether the first two are of the
171 same rank. If so, you know to order it
174 newstmt = mergetmp + op3
178 newstmt = mergetmp + op1
180 If all three are of the same rank, you can't expose them all in a
181 single binary operator anyway, so the above is *still* the best you
184 Thus, this is what we do. When we have three ops left, we check to see
185 what order to put them in, and call it a day. As a nod to vector sum
186 reduction, we check if any of the ops are really a phi node that is a
187 destructive update for the associating op, and keep the destructive
188 update together for vector sum reduction recognition. */
195 int constants_eliminated
;
198 int pows_encountered
;
202 /* Operator, rank pair. */
203 typedef struct operand_entry
211 static pool_allocator
<operand_entry
> operand_entry_pool ("operand entry pool",
214 /* This is used to assign a unique ID to each struct operand_entry
215 so that qsort results are identical on different hosts. */
216 static int next_operand_entry_id
;
218 /* Starting rank number for a given basic block, so that we can rank
219 operations using unmovable instructions in that BB based on the bb
221 static long *bb_rank
;
223 /* Operand->rank hashtable. */
224 static hash_map
<tree
, long> *operand_rank
;
226 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
227 all basic blocks the CFG should be adjusted - basic blocks
228 split right after that SSA_NAME's definition statement and before
229 the only use, which must be a bit ior. */
230 static vec
<tree
> reassoc_branch_fixups
;
233 static long get_rank (tree
);
234 static bool reassoc_stmt_dominates_stmt_p (gimple
, gimple
);
236 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
237 possibly added by gsi_remove. */
240 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
242 gimple stmt
= gsi_stmt (*gsi
);
244 if (!MAY_HAVE_DEBUG_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
245 return gsi_remove (gsi
, true);
247 gimple_stmt_iterator prev
= *gsi
;
249 unsigned uid
= gimple_uid (stmt
);
250 basic_block bb
= gimple_bb (stmt
);
251 bool ret
= gsi_remove (gsi
, true);
252 if (!gsi_end_p (prev
))
255 prev
= gsi_start_bb (bb
);
256 gimple end_stmt
= gsi_stmt (*gsi
);
257 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
259 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
260 gimple_set_uid (stmt
, uid
);
266 /* Bias amount for loop-carried phis. We want this to be larger than
267 the depth of any reassociation tree we can see, but not larger than
268 the rank difference between two blocks. */
269 #define PHI_LOOP_BIAS (1 << 15)
271 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
272 an innermost loop, and the phi has only a single use which is inside
273 the loop, then the rank is the block rank of the loop latch plus an
274 extra bias for the loop-carried dependence. This causes expressions
275 calculated into an accumulator variable to be independent for each
276 iteration of the loop. If STMT is some other phi, the rank is the
277 block rank of its containing block. */
279 phi_rank (gimple stmt
)
281 basic_block bb
= gimple_bb (stmt
);
282 struct loop
*father
= bb
->loop_father
;
288 /* We only care about real loops (those with a latch). */
290 return bb_rank
[bb
->index
];
292 /* Interesting phis must be in headers of innermost loops. */
293 if (bb
!= father
->header
295 return bb_rank
[bb
->index
];
297 /* Ignore virtual SSA_NAMEs. */
298 res
= gimple_phi_result (stmt
);
299 if (virtual_operand_p (res
))
300 return bb_rank
[bb
->index
];
302 /* The phi definition must have a single use, and that use must be
303 within the loop. Otherwise this isn't an accumulator pattern. */
304 if (!single_imm_use (res
, &use
, &use_stmt
)
305 || gimple_bb (use_stmt
)->loop_father
!= father
)
306 return bb_rank
[bb
->index
];
308 /* Look for phi arguments from within the loop. If found, bias this phi. */
309 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
311 tree arg
= gimple_phi_arg_def (stmt
, i
);
312 if (TREE_CODE (arg
) == SSA_NAME
313 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
315 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
316 if (gimple_bb (def_stmt
)->loop_father
== father
)
317 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
321 /* Must be an uninteresting phi. */
322 return bb_rank
[bb
->index
];
325 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
326 loop-carried dependence of an innermost loop, return TRUE; else
329 loop_carried_phi (tree exp
)
334 if (TREE_CODE (exp
) != SSA_NAME
335 || SSA_NAME_IS_DEFAULT_DEF (exp
))
338 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
340 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
343 /* Non-loop-carried phis have block rank. Loop-carried phis have
344 an additional bias added in. If this phi doesn't have block rank,
345 it's biased and should not be propagated. */
346 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
348 if (phi_rank (phi_stmt
) != block_rank
)
354 /* Return the maximum of RANK and the rank that should be propagated
355 from expression OP. For most operands, this is just the rank of OP.
356 For loop-carried phis, the value is zero to avoid undoing the bias
357 in favor of the phi. */
359 propagate_rank (long rank
, tree op
)
363 if (loop_carried_phi (op
))
366 op_rank
= get_rank (op
);
368 return MAX (rank
, op_rank
);
371 /* Look up the operand rank structure for expression E. */
374 find_operand_rank (tree e
)
376 long *slot
= operand_rank
->get (e
);
377 return slot
? *slot
: -1;
380 /* Insert {E,RANK} into the operand rank hashtable. */
383 insert_operand_rank (tree e
, long rank
)
385 gcc_assert (rank
> 0);
386 gcc_assert (!operand_rank
->put (e
, rank
));
389 /* Given an expression E, return the rank of the expression. */
394 /* SSA_NAME's have the rank of the expression they are the result
396 For globals and uninitialized values, the rank is 0.
397 For function arguments, use the pre-setup rank.
398 For PHI nodes, stores, asm statements, etc, we use the rank of
400 For simple operations, the rank is the maximum rank of any of
401 its operands, or the bb_rank, whichever is less.
402 I make no claims that this is optimal, however, it gives good
405 /* We make an exception to the normal ranking system to break
406 dependences of accumulator variables in loops. Suppose we
407 have a simple one-block loop containing:
414 As shown, each iteration of the calculation into x is fully
415 dependent upon the iteration before it. We would prefer to
416 see this in the form:
423 If the loop is unrolled, the calculations of b and c from
424 different iterations can be interleaved.
426 To obtain this result during reassociation, we bias the rank
427 of the phi definition x_1 upward, when it is recognized as an
428 accumulator pattern. The artificial rank causes it to be
429 added last, providing the desired independence. */
431 if (TREE_CODE (e
) == SSA_NAME
)
438 if (SSA_NAME_IS_DEFAULT_DEF (e
))
439 return find_operand_rank (e
);
441 stmt
= SSA_NAME_DEF_STMT (e
);
442 if (gimple_code (stmt
) == GIMPLE_PHI
)
443 return phi_rank (stmt
);
445 if (!is_gimple_assign (stmt
))
446 return bb_rank
[gimple_bb (stmt
)->index
];
448 /* If we already have a rank for this expression, use that. */
449 rank
= find_operand_rank (e
);
453 /* Otherwise, find the maximum rank for the operands. As an
454 exception, remove the bias from loop-carried phis when propagating
455 the rank so that dependent operations are not also biased. */
456 /* Simply walk over all SSA uses - this takes advatage of the
457 fact that non-SSA operands are is_gimple_min_invariant and
460 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
461 rank
= propagate_rank (rank
, op
);
463 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
465 fprintf (dump_file
, "Rank for ");
466 print_generic_expr (dump_file
, e
, 0);
467 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
470 /* Note the rank in the hashtable so we don't recompute it. */
471 insert_operand_rank (e
, (rank
+ 1));
475 /* Constants, globals, etc., are rank 0 */
480 /* We want integer ones to end up last no matter what, since they are
481 the ones we can do the most with. */
482 #define INTEGER_CONST_TYPE 1 << 3
483 #define FLOAT_CONST_TYPE 1 << 2
484 #define OTHER_CONST_TYPE 1 << 1
486 /* Classify an invariant tree into integer, float, or other, so that
487 we can sort them to be near other constants of the same type. */
489 constant_type (tree t
)
491 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
492 return INTEGER_CONST_TYPE
;
493 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
494 return FLOAT_CONST_TYPE
;
496 return OTHER_CONST_TYPE
;
499 /* qsort comparison function to sort operand entries PA and PB by rank
500 so that the sorted array is ordered by rank in decreasing order. */
502 sort_by_operand_rank (const void *pa
, const void *pb
)
504 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
505 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
507 /* It's nicer for optimize_expression if constants that are likely
508 to fold when added/multiplied//whatever are put next to each
509 other. Since all constants have rank 0, order them by type. */
510 if (oeb
->rank
== 0 && oea
->rank
== 0)
512 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
513 return constant_type (oeb
->op
) - constant_type (oea
->op
);
515 /* To make sorting result stable, we use unique IDs to determine
517 return oeb
->id
- oea
->id
;
520 /* Lastly, make sure the versions that are the same go next to each
522 if ((oeb
->rank
- oea
->rank
== 0)
523 && TREE_CODE (oea
->op
) == SSA_NAME
524 && TREE_CODE (oeb
->op
) == SSA_NAME
)
526 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
527 versions of removed SSA_NAMEs, so if possible, prefer to sort
528 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
530 if (!SSA_NAME_IS_DEFAULT_DEF (oea
->op
)
531 && !SSA_NAME_IS_DEFAULT_DEF (oeb
->op
)
532 && SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
534 gimple stmta
= SSA_NAME_DEF_STMT (oea
->op
);
535 gimple stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
536 basic_block bba
= gimple_bb (stmta
);
537 basic_block bbb
= gimple_bb (stmtb
);
540 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
541 return bb_rank
[bbb
->index
] - bb_rank
[bba
->index
];
545 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
546 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
552 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
553 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
555 return oeb
->id
- oea
->id
;
558 if (oeb
->rank
!= oea
->rank
)
559 return oeb
->rank
- oea
->rank
;
561 return oeb
->id
- oea
->id
;
564 /* Add an operand entry to *OPS for the tree operand OP. */
567 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
569 operand_entry_t oe
= operand_entry_pool
.allocate ();
572 oe
->rank
= get_rank (op
);
573 oe
->id
= next_operand_entry_id
++;
578 /* Add an operand entry to *OPS for the tree operand OP with repeat
582 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
583 HOST_WIDE_INT repeat
)
585 operand_entry_t oe
= operand_entry_pool
.allocate ();
588 oe
->rank
= get_rank (op
);
589 oe
->id
= next_operand_entry_id
++;
593 reassociate_stats
.pows_encountered
++;
596 /* Return true if STMT is reassociable operation containing a binary
597 operation with tree code CODE, and is inside LOOP. */
600 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
602 basic_block bb
= gimple_bb (stmt
);
604 if (gimple_bb (stmt
) == NULL
)
607 if (!flow_bb_inside_loop_p (loop
, bb
))
610 if (is_gimple_assign (stmt
)
611 && gimple_assign_rhs_code (stmt
) == code
612 && has_single_use (gimple_assign_lhs (stmt
)))
619 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
620 operand of the negate operation. Otherwise, return NULL. */
623 get_unary_op (tree name
, enum tree_code opcode
)
625 gimple stmt
= SSA_NAME_DEF_STMT (name
);
627 if (!is_gimple_assign (stmt
))
630 if (gimple_assign_rhs_code (stmt
) == opcode
)
631 return gimple_assign_rhs1 (stmt
);
635 /* If CURR and LAST are a pair of ops that OPCODE allows us to
636 eliminate through equivalences, do so, remove them from OPS, and
637 return true. Otherwise, return false. */
640 eliminate_duplicate_pair (enum tree_code opcode
,
641 vec
<operand_entry_t
> *ops
,
644 operand_entry_t curr
,
645 operand_entry_t last
)
648 /* If we have two of the same op, and the opcode is & |, min, or max,
649 we can eliminate one of them.
650 If we have two of the same op, and the opcode is ^, we can
651 eliminate both of them. */
653 if (last
&& last
->op
== curr
->op
)
661 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
663 fprintf (dump_file
, "Equivalence: ");
664 print_generic_expr (dump_file
, curr
->op
, 0);
665 fprintf (dump_file
, " [&|minmax] ");
666 print_generic_expr (dump_file
, last
->op
, 0);
667 fprintf (dump_file
, " -> ");
668 print_generic_stmt (dump_file
, last
->op
, 0);
671 ops
->ordered_remove (i
);
672 reassociate_stats
.ops_eliminated
++;
677 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
679 fprintf (dump_file
, "Equivalence: ");
680 print_generic_expr (dump_file
, curr
->op
, 0);
681 fprintf (dump_file
, " ^ ");
682 print_generic_expr (dump_file
, last
->op
, 0);
683 fprintf (dump_file
, " -> nothing\n");
686 reassociate_stats
.ops_eliminated
+= 2;
688 if (ops
->length () == 2)
691 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
696 ops
->ordered_remove (i
-1);
697 ops
->ordered_remove (i
-1);
709 static vec
<tree
> plus_negates
;
711 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
712 expression, look in OPS for a corresponding positive operation to cancel
713 it out. If we find one, remove the other from OPS, replace
714 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
718 eliminate_plus_minus_pair (enum tree_code opcode
,
719 vec
<operand_entry_t
> *ops
,
720 unsigned int currindex
,
721 operand_entry_t curr
)
728 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
731 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
732 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
733 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
736 /* Any non-negated version will have a rank that is one less than
737 the current rank. So once we hit those ranks, if we don't find
740 for (i
= currindex
+ 1;
741 ops
->iterate (i
, &oe
)
742 && oe
->rank
>= curr
->rank
- 1 ;
745 if (oe
->op
== negateop
)
748 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
750 fprintf (dump_file
, "Equivalence: ");
751 print_generic_expr (dump_file
, negateop
, 0);
752 fprintf (dump_file
, " + -");
753 print_generic_expr (dump_file
, oe
->op
, 0);
754 fprintf (dump_file
, " -> 0\n");
757 ops
->ordered_remove (i
);
758 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
759 ops
->ordered_remove (currindex
);
760 reassociate_stats
.ops_eliminated
++;
764 else if (oe
->op
== notop
)
766 tree op_type
= TREE_TYPE (oe
->op
);
768 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
770 fprintf (dump_file
, "Equivalence: ");
771 print_generic_expr (dump_file
, notop
, 0);
772 fprintf (dump_file
, " + ~");
773 print_generic_expr (dump_file
, oe
->op
, 0);
774 fprintf (dump_file
, " -> -1\n");
777 ops
->ordered_remove (i
);
778 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
779 ops
->ordered_remove (currindex
);
780 reassociate_stats
.ops_eliminated
++;
786 /* CURR->OP is a negate expr in a plus expr: save it for later
787 inspection in repropagate_negates(). */
788 if (negateop
!= NULL_TREE
)
789 plus_negates
.safe_push (curr
->op
);
794 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
795 bitwise not expression, look in OPS for a corresponding operand to
796 cancel it out. If we find one, remove the other from OPS, replace
797 OPS[CURRINDEX] with 0, and return true. Otherwise, return
801 eliminate_not_pairs (enum tree_code opcode
,
802 vec
<operand_entry_t
> *ops
,
803 unsigned int currindex
,
804 operand_entry_t curr
)
810 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
811 || TREE_CODE (curr
->op
) != SSA_NAME
)
814 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
815 if (notop
== NULL_TREE
)
818 /* Any non-not version will have a rank that is one less than
819 the current rank. So once we hit those ranks, if we don't find
822 for (i
= currindex
+ 1;
823 ops
->iterate (i
, &oe
)
824 && oe
->rank
>= curr
->rank
- 1;
829 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
831 fprintf (dump_file
, "Equivalence: ");
832 print_generic_expr (dump_file
, notop
, 0);
833 if (opcode
== BIT_AND_EXPR
)
834 fprintf (dump_file
, " & ~");
835 else if (opcode
== BIT_IOR_EXPR
)
836 fprintf (dump_file
, " | ~");
837 print_generic_expr (dump_file
, oe
->op
, 0);
838 if (opcode
== BIT_AND_EXPR
)
839 fprintf (dump_file
, " -> 0\n");
840 else if (opcode
== BIT_IOR_EXPR
)
841 fprintf (dump_file
, " -> -1\n");
844 if (opcode
== BIT_AND_EXPR
)
845 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
846 else if (opcode
== BIT_IOR_EXPR
)
847 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
849 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
851 ops
->quick_push (oe
);
859 /* Use constant value that may be present in OPS to try to eliminate
860 operands. Note that this function is only really used when we've
861 eliminated ops for other reasons, or merged constants. Across
862 single statements, fold already does all of this, plus more. There
863 is little point in duplicating logic, so I've only included the
864 identities that I could ever construct testcases to trigger. */
867 eliminate_using_constants (enum tree_code opcode
,
868 vec
<operand_entry_t
> *ops
)
870 operand_entry_t oelast
= ops
->last ();
871 tree type
= TREE_TYPE (oelast
->op
);
873 if (oelast
->rank
== 0
874 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
879 if (integer_zerop (oelast
->op
))
881 if (ops
->length () != 1)
883 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
884 fprintf (dump_file
, "Found & 0, removing all other ops\n");
886 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
889 ops
->quick_push (oelast
);
893 else if (integer_all_onesp (oelast
->op
))
895 if (ops
->length () != 1)
897 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
898 fprintf (dump_file
, "Found & -1, removing\n");
900 reassociate_stats
.ops_eliminated
++;
905 if (integer_all_onesp (oelast
->op
))
907 if (ops
->length () != 1)
909 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
910 fprintf (dump_file
, "Found | -1, removing all other ops\n");
912 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
915 ops
->quick_push (oelast
);
919 else if (integer_zerop (oelast
->op
))
921 if (ops
->length () != 1)
923 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
924 fprintf (dump_file
, "Found | 0, removing\n");
926 reassociate_stats
.ops_eliminated
++;
931 if (integer_zerop (oelast
->op
)
932 || (FLOAT_TYPE_P (type
)
933 && !HONOR_NANS (type
)
934 && !HONOR_SIGNED_ZEROS (type
)
935 && real_zerop (oelast
->op
)))
937 if (ops
->length () != 1)
939 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
940 fprintf (dump_file
, "Found * 0, removing all other ops\n");
942 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
944 ops
->quick_push (oelast
);
948 else if (integer_onep (oelast
->op
)
949 || (FLOAT_TYPE_P (type
)
950 && !HONOR_SNANS (type
)
951 && real_onep (oelast
->op
)))
953 if (ops
->length () != 1)
955 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
956 fprintf (dump_file
, "Found * 1, removing\n");
958 reassociate_stats
.ops_eliminated
++;
966 if (integer_zerop (oelast
->op
)
967 || (FLOAT_TYPE_P (type
)
968 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
969 && fold_real_zero_addition_p (type
, oelast
->op
,
970 opcode
== MINUS_EXPR
)))
972 if (ops
->length () != 1)
974 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
975 fprintf (dump_file
, "Found [|^+] 0, removing\n");
977 reassociate_stats
.ops_eliminated
++;
989 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
992 /* Structure for tracking and counting operands. */
993 typedef struct oecount_s
{
996 enum tree_code oecode
;
1001 /* The heap for the oecount hashtable and the sorted list of operands. */
1002 static vec
<oecount
> cvec
;
1005 /* Oecount hashtable helpers. */
1007 struct oecount_hasher
: int_hash
<int, 0, 1>
1009 static inline hashval_t
hash (int);
1010 static inline bool equal (int, int);
1013 /* Hash function for oecount. */
1016 oecount_hasher::hash (int p
)
1018 const oecount
*c
= &cvec
[p
- 42];
1019 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1022 /* Comparison function for oecount. */
1025 oecount_hasher::equal (int p1
, int p2
)
1027 const oecount
*c1
= &cvec
[p1
- 42];
1028 const oecount
*c2
= &cvec
[p2
- 42];
1029 return (c1
->oecode
== c2
->oecode
1030 && c1
->op
== c2
->op
);
1033 /* Comparison function for qsort sorting oecount elements by count. */
1036 oecount_cmp (const void *p1
, const void *p2
)
1038 const oecount
*c1
= (const oecount
*)p1
;
1039 const oecount
*c2
= (const oecount
*)p2
;
1040 if (c1
->cnt
!= c2
->cnt
)
1041 return c1
->cnt
- c2
->cnt
;
1043 /* If counts are identical, use unique IDs to stabilize qsort. */
1044 return c1
->id
- c2
->id
;
1047 /* Return TRUE iff STMT represents a builtin call that raises OP
1048 to some exponent. */
1051 stmt_is_power_of_op (gimple stmt
, tree op
)
1055 if (!is_gimple_call (stmt
))
1058 fndecl
= gimple_call_fndecl (stmt
);
1061 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1064 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1066 CASE_FLT_FN (BUILT_IN_POW
):
1067 CASE_FLT_FN (BUILT_IN_POWI
):
1068 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1075 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1076 in place and return the result. Assumes that stmt_is_power_of_op
1077 was previously called for STMT and returned TRUE. */
1079 static HOST_WIDE_INT
1080 decrement_power (gimple stmt
)
1082 REAL_VALUE_TYPE c
, cint
;
1083 HOST_WIDE_INT power
;
1086 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1088 CASE_FLT_FN (BUILT_IN_POW
):
1089 arg1
= gimple_call_arg (stmt
, 1);
1090 c
= TREE_REAL_CST (arg1
);
1091 power
= real_to_integer (&c
) - 1;
1092 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1093 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1096 CASE_FLT_FN (BUILT_IN_POWI
):
1097 arg1
= gimple_call_arg (stmt
, 1);
1098 power
= TREE_INT_CST_LOW (arg1
) - 1;
1099 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1107 /* Find the single immediate use of STMT's LHS, and replace it
1108 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1109 replace *DEF with OP as well. */
1112 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1117 gimple_stmt_iterator gsi
;
1119 if (is_gimple_call (stmt
))
1120 lhs
= gimple_call_lhs (stmt
);
1122 lhs
= gimple_assign_lhs (stmt
);
1124 gcc_assert (has_single_use (lhs
));
1125 single_imm_use (lhs
, &use
, &use_stmt
);
1129 if (TREE_CODE (op
) != SSA_NAME
)
1130 update_stmt (use_stmt
);
1131 gsi
= gsi_for_stmt (stmt
);
1132 unlink_stmt_vdef (stmt
);
1133 reassoc_remove_stmt (&gsi
);
1134 release_defs (stmt
);
1137 /* Walks the linear chain with result *DEF searching for an operation
1138 with operand OP and code OPCODE removing that from the chain. *DEF
1139 is updated if there is only one operand but no operation left. */
1142 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1144 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1150 if (opcode
== MULT_EXPR
1151 && stmt_is_power_of_op (stmt
, op
))
1153 if (decrement_power (stmt
) == 1)
1154 propagate_op_to_single_use (op
, stmt
, def
);
1158 name
= gimple_assign_rhs1 (stmt
);
1160 /* If this is the operation we look for and one of the operands
1161 is ours simply propagate the other operand into the stmts
1163 if (gimple_assign_rhs_code (stmt
) == opcode
1165 || gimple_assign_rhs2 (stmt
) == op
))
1168 name
= gimple_assign_rhs2 (stmt
);
1169 propagate_op_to_single_use (name
, stmt
, def
);
1173 /* We might have a multiply of two __builtin_pow* calls, and
1174 the operand might be hiding in the rightmost one. */
1175 if (opcode
== MULT_EXPR
1176 && gimple_assign_rhs_code (stmt
) == opcode
1177 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1178 && has_single_use (gimple_assign_rhs2 (stmt
)))
1180 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1181 if (stmt_is_power_of_op (stmt2
, op
))
1183 if (decrement_power (stmt2
) == 1)
1184 propagate_op_to_single_use (op
, stmt2
, def
);
1189 /* Continue walking the chain. */
1190 gcc_assert (name
!= op
1191 && TREE_CODE (name
) == SSA_NAME
);
1192 stmt
= SSA_NAME_DEF_STMT (name
);
1197 /* Returns true if statement S1 dominates statement S2. Like
1198 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1201 reassoc_stmt_dominates_stmt_p (gimple s1
, gimple s2
)
1203 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1205 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1206 SSA_NAME. Assume it lives at the beginning of function and
1207 thus dominates everything. */
1208 if (!bb1
|| s1
== s2
)
1211 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1217 /* PHIs in the same basic block are assumed to be
1218 executed all in parallel, if only one stmt is a PHI,
1219 it dominates the other stmt in the same basic block. */
1220 if (gimple_code (s1
) == GIMPLE_PHI
)
1223 if (gimple_code (s2
) == GIMPLE_PHI
)
1226 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1228 if (gimple_uid (s1
) < gimple_uid (s2
))
1231 if (gimple_uid (s1
) > gimple_uid (s2
))
1234 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1235 unsigned int uid
= gimple_uid (s1
);
1236 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1238 gimple s
= gsi_stmt (gsi
);
1239 if (gimple_uid (s
) != uid
)
1248 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1251 /* Insert STMT after INSERT_POINT. */
1254 insert_stmt_after (gimple stmt
, gimple insert_point
)
1256 gimple_stmt_iterator gsi
;
1259 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1260 bb
= gimple_bb (insert_point
);
1261 else if (!stmt_ends_bb_p (insert_point
))
1263 gsi
= gsi_for_stmt (insert_point
);
1264 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1265 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1269 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1270 thus if it must end a basic block, it should be a call that can
1271 throw, or some assignment that can throw. If it throws, the LHS
1272 of it will not be initialized though, so only valid places using
1273 the SSA_NAME should be dominated by the fallthru edge. */
1274 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1275 gsi
= gsi_after_labels (bb
);
1276 if (gsi_end_p (gsi
))
1278 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1279 gimple_set_uid (stmt
,
1280 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1283 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1284 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1287 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1288 the result. Places the statement after the definition of either
1289 OP1 or OP2. Returns the new statement. */
1292 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1294 gimple op1def
= NULL
, op2def
= NULL
;
1295 gimple_stmt_iterator gsi
;
1299 /* Create the addition statement. */
1300 op
= make_ssa_name (type
);
1301 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1303 /* Find an insertion place and insert. */
1304 if (TREE_CODE (op1
) == SSA_NAME
)
1305 op1def
= SSA_NAME_DEF_STMT (op1
);
1306 if (TREE_CODE (op2
) == SSA_NAME
)
1307 op2def
= SSA_NAME_DEF_STMT (op2
);
1308 if ((!op1def
|| gimple_nop_p (op1def
))
1309 && (!op2def
|| gimple_nop_p (op2def
)))
1311 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1312 if (gsi_end_p (gsi
))
1314 gimple_stmt_iterator gsi2
1315 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1316 gimple_set_uid (sum
,
1317 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1320 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1321 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1325 gimple insert_point
;
1326 if ((!op1def
|| gimple_nop_p (op1def
))
1327 || (op2def
&& !gimple_nop_p (op2def
)
1328 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1329 insert_point
= op2def
;
1331 insert_point
= op1def
;
1332 insert_stmt_after (sum
, insert_point
);
1339 /* Perform un-distribution of divisions and multiplications.
1340 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1341 to (A + B) / X for real X.
1343 The algorithm is organized as follows.
1345 - First we walk the addition chain *OPS looking for summands that
1346 are defined by a multiplication or a real division. This results
1347 in the candidates bitmap with relevant indices into *OPS.
1349 - Second we build the chains of multiplications or divisions for
1350 these candidates, counting the number of occurrences of (operand, code)
1351 pairs in all of the candidates chains.
1353 - Third we sort the (operand, code) pairs by number of occurrence and
1354 process them starting with the pair with the most uses.
1356 * For each such pair we walk the candidates again to build a
1357 second candidate bitmap noting all multiplication/division chains
1358 that have at least one occurrence of (operand, code).
1360 * We build an alternate addition chain only covering these
1361 candidates with one (operand, code) operation removed from their
1362 multiplication/division chain.
1364 * The first candidate gets replaced by the alternate addition chain
1365 multiplied/divided by the operand.
1367 * All candidate chains get disabled for further processing and
1368 processing of (operand, code) pairs continues.
1370 The alternate addition chains built are re-processed by the main
1371 reassociation algorithm which allows optimizing a * x * y + b * y * x
1372 to (a + b ) * x * y in one invocation of the reassociation pass. */
1375 undistribute_ops_list (enum tree_code opcode
,
1376 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1378 unsigned int length
= ops
->length ();
1379 operand_entry_t oe1
;
1381 sbitmap candidates
, candidates2
;
1382 unsigned nr_candidates
, nr_candidates2
;
1383 sbitmap_iterator sbi0
;
1384 vec
<operand_entry_t
> *subops
;
1385 bool changed
= false;
1386 int next_oecount_id
= 0;
1389 || opcode
!= PLUS_EXPR
)
1392 /* Build a list of candidates to process. */
1393 candidates
= sbitmap_alloc (length
);
1394 bitmap_clear (candidates
);
1396 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1398 enum tree_code dcode
;
1401 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1403 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1404 if (!is_gimple_assign (oe1def
))
1406 dcode
= gimple_assign_rhs_code (oe1def
);
1407 if ((dcode
!= MULT_EXPR
1408 && dcode
!= RDIV_EXPR
)
1409 || !is_reassociable_op (oe1def
, dcode
, loop
))
1412 bitmap_set_bit (candidates
, i
);
1416 if (nr_candidates
< 2)
1418 sbitmap_free (candidates
);
1422 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1424 fprintf (dump_file
, "searching for un-distribute opportunities ");
1425 print_generic_expr (dump_file
,
1426 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1427 fprintf (dump_file
, " %d\n", nr_candidates
);
1430 /* Build linearized sub-operand lists and the counting table. */
1433 hash_table
<oecount_hasher
> ctable (15);
1435 /* ??? Macro arguments cannot have multi-argument template types in
1436 them. This typedef is needed to workaround that limitation. */
1437 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1438 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1439 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1442 enum tree_code oecode
;
1445 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1446 oecode
= gimple_assign_rhs_code (oedef
);
1447 linearize_expr_tree (&subops
[i
], oedef
,
1448 associative_tree_code (oecode
), false);
1450 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1457 c
.id
= next_oecount_id
++;
1460 idx
= cvec
.length () + 41;
1461 slot
= ctable
.find_slot (idx
, INSERT
);
1469 cvec
[*slot
- 42].cnt
++;
1474 /* Sort the counting table. */
1475 cvec
.qsort (oecount_cmp
);
1477 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1480 fprintf (dump_file
, "Candidates:\n");
1481 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1483 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1484 c
->oecode
== MULT_EXPR
1485 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1486 print_generic_expr (dump_file
, c
->op
, 0);
1487 fprintf (dump_file
, "\n");
1491 /* Process the (operand, code) pairs in order of most occurrence. */
1492 candidates2
= sbitmap_alloc (length
);
1493 while (!cvec
.is_empty ())
1495 oecount
*c
= &cvec
.last ();
1499 /* Now collect the operands in the outer chain that contain
1500 the common operand in their inner chain. */
1501 bitmap_clear (candidates2
);
1503 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1506 enum tree_code oecode
;
1508 tree op
= (*ops
)[i
]->op
;
1510 /* If we undistributed in this chain already this may be
1512 if (TREE_CODE (op
) != SSA_NAME
)
1515 oedef
= SSA_NAME_DEF_STMT (op
);
1516 oecode
= gimple_assign_rhs_code (oedef
);
1517 if (oecode
!= c
->oecode
)
1520 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1522 if (oe1
->op
== c
->op
)
1524 bitmap_set_bit (candidates2
, i
);
1531 if (nr_candidates2
>= 2)
1533 operand_entry_t oe1
, oe2
;
1535 int first
= bitmap_first_set_bit (candidates2
);
1537 /* Build the new addition chain. */
1538 oe1
= (*ops
)[first
];
1539 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1541 fprintf (dump_file
, "Building (");
1542 print_generic_expr (dump_file
, oe1
->op
, 0);
1544 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1545 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1549 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1551 fprintf (dump_file
, " + ");
1552 print_generic_expr (dump_file
, oe2
->op
, 0);
1554 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1555 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1556 oe1
->op
, oe2
->op
, opcode
);
1557 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1559 oe1
->op
= gimple_get_lhs (sum
);
1562 /* Apply the multiplication/division. */
1563 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1564 oe1
->op
, c
->op
, c
->oecode
);
1565 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1567 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1568 print_generic_expr (dump_file
, c
->op
, 0);
1569 fprintf (dump_file
, "\n");
1572 /* Record it in the addition chain and disable further
1573 undistribution with this op. */
1574 oe1
->op
= gimple_assign_lhs (prod
);
1575 oe1
->rank
= get_rank (oe1
->op
);
1576 subops
[first
].release ();
1584 for (i
= 0; i
< ops
->length (); ++i
)
1585 subops
[i
].release ();
1588 sbitmap_free (candidates
);
1589 sbitmap_free (candidates2
);
1594 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1595 expression, examine the other OPS to see if any of them are comparisons
1596 of the same values, which we may be able to combine or eliminate.
1597 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1600 eliminate_redundant_comparison (enum tree_code opcode
,
1601 vec
<operand_entry_t
> *ops
,
1602 unsigned int currindex
,
1603 operand_entry_t curr
)
1606 enum tree_code lcode
, rcode
;
1611 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1614 /* Check that CURR is a comparison. */
1615 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1617 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1618 if (!is_gimple_assign (def1
))
1620 lcode
= gimple_assign_rhs_code (def1
);
1621 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1623 op1
= gimple_assign_rhs1 (def1
);
1624 op2
= gimple_assign_rhs2 (def1
);
1626 /* Now look for a similar comparison in the remaining OPS. */
1627 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1631 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1633 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1634 if (!is_gimple_assign (def2
))
1636 rcode
= gimple_assign_rhs_code (def2
);
1637 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1640 /* If we got here, we have a match. See if we can combine the
1642 if (opcode
== BIT_IOR_EXPR
)
1643 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1644 rcode
, gimple_assign_rhs1 (def2
),
1645 gimple_assign_rhs2 (def2
));
1647 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1648 rcode
, gimple_assign_rhs1 (def2
),
1649 gimple_assign_rhs2 (def2
));
1653 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1654 always give us a boolean_type_node value back. If the original
1655 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1656 we need to convert. */
1657 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1658 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1660 if (TREE_CODE (t
) != INTEGER_CST
1661 && !operand_equal_p (t
, curr
->op
, 0))
1663 enum tree_code subcode
;
1664 tree newop1
, newop2
;
1665 if (!COMPARISON_CLASS_P (t
))
1667 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1668 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1669 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1670 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1674 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1676 fprintf (dump_file
, "Equivalence: ");
1677 print_generic_expr (dump_file
, curr
->op
, 0);
1678 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1679 print_generic_expr (dump_file
, oe
->op
, 0);
1680 fprintf (dump_file
, " -> ");
1681 print_generic_expr (dump_file
, t
, 0);
1682 fprintf (dump_file
, "\n");
1685 /* Now we can delete oe, as it has been subsumed by the new combined
1687 ops
->ordered_remove (i
);
1688 reassociate_stats
.ops_eliminated
++;
1690 /* If t is the same as curr->op, we're done. Otherwise we must
1691 replace curr->op with t. Special case is if we got a constant
1692 back, in which case we add it to the end instead of in place of
1693 the current entry. */
1694 if (TREE_CODE (t
) == INTEGER_CST
)
1696 ops
->ordered_remove (currindex
);
1697 add_to_ops_vec (ops
, t
);
1699 else if (!operand_equal_p (t
, curr
->op
, 0))
1702 enum tree_code subcode
;
1705 gcc_assert (COMPARISON_CLASS_P (t
));
1706 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1707 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1708 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1709 gcc_checking_assert (is_gimple_val (newop1
)
1710 && is_gimple_val (newop2
));
1711 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1712 curr
->op
= gimple_get_lhs (sum
);
1720 /* Perform various identities and other optimizations on the list of
1721 operand entries, stored in OPS. The tree code for the binary
1722 operation between all the operands is OPCODE. */
1725 optimize_ops_list (enum tree_code opcode
,
1726 vec
<operand_entry_t
> *ops
)
1728 unsigned int length
= ops
->length ();
1731 operand_entry_t oelast
= NULL
;
1732 bool iterate
= false;
1737 oelast
= ops
->last ();
1739 /* If the last two are constants, pop the constants off, merge them
1740 and try the next two. */
1741 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1743 operand_entry_t oelm1
= (*ops
)[length
- 2];
1745 if (oelm1
->rank
== 0
1746 && is_gimple_min_invariant (oelm1
->op
)
1747 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1748 TREE_TYPE (oelast
->op
)))
1750 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1751 oelm1
->op
, oelast
->op
);
1753 if (folded
&& is_gimple_min_invariant (folded
))
1755 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1756 fprintf (dump_file
, "Merging constants\n");
1761 add_to_ops_vec (ops
, folded
);
1762 reassociate_stats
.constants_eliminated
++;
1764 optimize_ops_list (opcode
, ops
);
1770 eliminate_using_constants (opcode
, ops
);
1773 for (i
= 0; ops
->iterate (i
, &oe
);)
1777 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1779 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1780 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1781 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1793 length
= ops
->length ();
1794 oelast
= ops
->last ();
1797 optimize_ops_list (opcode
, ops
);
1800 /* The following functions are subroutines to optimize_range_tests and allow
1801 it to try to change a logical combination of comparisons into a range
1805 X == 2 || X == 5 || X == 3 || X == 4
1809 (unsigned) (X - 2) <= 3
1811 For more information see comments above fold_test_range in fold-const.c,
1812 this implementation is for GIMPLE. */
1820 bool strict_overflow_p
;
1821 unsigned int idx
, next
;
1824 /* This is similar to make_range in fold-const.c, but on top of
1825 GIMPLE instead of trees. If EXP is non-NULL, it should be
1826 an SSA_NAME and STMT argument is ignored, otherwise STMT
1827 argument should be a GIMPLE_COND. */
1830 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1834 bool is_bool
, strict_overflow_p
;
1838 r
->strict_overflow_p
= false;
1840 r
->high
= NULL_TREE
;
1841 if (exp
!= NULL_TREE
1842 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1845 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1846 and see if we can refine the range. Some of the cases below may not
1847 happen, but it doesn't seem worth worrying about this. We "continue"
1848 the outer loop when we've changed something; otherwise we "break"
1849 the switch, which will "break" the while. */
1850 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1853 strict_overflow_p
= false;
1855 if (exp
== NULL_TREE
)
1857 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1859 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1864 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1869 enum tree_code code
;
1870 tree arg0
, arg1
, exp_type
;
1874 if (exp
!= NULL_TREE
)
1876 if (TREE_CODE (exp
) != SSA_NAME
1877 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
1880 stmt
= SSA_NAME_DEF_STMT (exp
);
1881 if (!is_gimple_assign (stmt
))
1884 code
= gimple_assign_rhs_code (stmt
);
1885 arg0
= gimple_assign_rhs1 (stmt
);
1886 arg1
= gimple_assign_rhs2 (stmt
);
1887 exp_type
= TREE_TYPE (exp
);
1891 code
= gimple_cond_code (stmt
);
1892 arg0
= gimple_cond_lhs (stmt
);
1893 arg1
= gimple_cond_rhs (stmt
);
1894 exp_type
= boolean_type_node
;
1897 if (TREE_CODE (arg0
) != SSA_NAME
)
1899 loc
= gimple_location (stmt
);
1903 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1904 /* Ensure the range is either +[-,0], +[0,0],
1905 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1906 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1907 or similar expression of unconditional true or
1908 false, it should not be negated. */
1909 && ((high
&& integer_zerop (high
))
1910 || (low
&& integer_onep (low
))))
1923 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1925 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1930 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1945 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1947 &strict_overflow_p
);
1948 if (nexp
!= NULL_TREE
)
1951 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
1964 r
->strict_overflow_p
= strict_overflow_p
;
1968 /* Comparison function for qsort. Sort entries
1969 without SSA_NAME exp first, then with SSA_NAMEs sorted
1970 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1971 by increasing ->low and if ->low is the same, by increasing
1972 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1976 range_entry_cmp (const void *a
, const void *b
)
1978 const struct range_entry
*p
= (const struct range_entry
*) a
;
1979 const struct range_entry
*q
= (const struct range_entry
*) b
;
1981 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
1983 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
1985 /* Group range_entries for the same SSA_NAME together. */
1986 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
1988 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
1990 /* If ->low is different, NULL low goes first, then by
1992 if (p
->low
!= NULL_TREE
)
1994 if (q
->low
!= NULL_TREE
)
1996 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
1998 if (tem
&& integer_onep (tem
))
2000 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2002 if (tem
&& integer_onep (tem
))
2008 else if (q
->low
!= NULL_TREE
)
2010 /* If ->high is different, NULL high goes last, before that by
2012 if (p
->high
!= NULL_TREE
)
2014 if (q
->high
!= NULL_TREE
)
2016 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2018 if (tem
&& integer_onep (tem
))
2020 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2022 if (tem
&& integer_onep (tem
))
2028 else if (q
->high
!= NULL_TREE
)
2030 /* If both ranges are the same, sort below by ascending idx. */
2035 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2038 if (p
->idx
< q
->idx
)
2042 gcc_checking_assert (p
->idx
> q
->idx
);
2047 /* Helper routine of optimize_range_test.
2048 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2049 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2050 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2051 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2052 an array of COUNT pointers to other ranges. Return
2053 true if the range merge has been successful.
2054 If OPCODE is ERROR_MARK, this is called from within
2055 maybe_optimize_range_tests and is performing inter-bb range optimization.
2056 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2060 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2061 struct range_entry
**otherrangep
,
2062 unsigned int count
, enum tree_code opcode
,
2063 vec
<operand_entry_t
> *ops
, tree exp
, gimple_seq seq
,
2064 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2066 operand_entry_t oe
= (*ops
)[range
->idx
];
2068 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) :
2069 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2070 location_t loc
= gimple_location (stmt
);
2071 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2072 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2074 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2075 gimple_stmt_iterator gsi
;
2078 if (tem
== NULL_TREE
)
2081 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2082 warning_at (loc
, OPT_Wstrict_overflow
,
2083 "assuming signed overflow does not occur "
2084 "when simplifying range test");
2086 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2088 struct range_entry
*r
;
2089 fprintf (dump_file
, "Optimizing range tests ");
2090 print_generic_expr (dump_file
, range
->exp
, 0);
2091 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2092 print_generic_expr (dump_file
, range
->low
, 0);
2093 fprintf (dump_file
, ", ");
2094 print_generic_expr (dump_file
, range
->high
, 0);
2095 fprintf (dump_file
, "]");
2096 for (i
= 0; i
< count
; i
++)
2102 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2103 print_generic_expr (dump_file
, r
->low
, 0);
2104 fprintf (dump_file
, ", ");
2105 print_generic_expr (dump_file
, r
->high
, 0);
2106 fprintf (dump_file
, "]");
2108 fprintf (dump_file
, "\n into ");
2109 print_generic_expr (dump_file
, tem
, 0);
2110 fprintf (dump_file
, "\n");
2113 if (opcode
== BIT_IOR_EXPR
2114 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2115 tem
= invert_truthvalue_loc (loc
, tem
);
2117 tem
= fold_convert_loc (loc
, optype
, tem
);
2118 gsi
= gsi_for_stmt (stmt
);
2119 unsigned int uid
= gimple_uid (stmt
);
2120 /* In rare cases range->exp can be equal to lhs of stmt.
2121 In that case we have to insert after the stmt rather then before
2122 it. If stmt is a PHI, insert it at the start of the basic block. */
2123 if (op
!= range
->exp
)
2125 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2126 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2130 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2132 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2133 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2134 GSI_CONTINUE_LINKING
);
2138 gsi
= gsi_after_labels (gimple_bb (stmt
));
2139 if (!gsi_end_p (gsi
))
2140 uid
= gimple_uid (gsi_stmt (gsi
));
2143 gsi
= gsi_start_bb (gimple_bb (stmt
));
2145 while (!gsi_end_p (gsi
))
2147 uid
= gimple_uid (gsi_stmt (gsi
));
2151 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2152 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2154 if (gsi_end_p (gsi
))
2155 gsi
= gsi_last_bb (gimple_bb (stmt
));
2159 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2160 if (gimple_uid (gsi_stmt (gsi
)))
2163 gimple_set_uid (gsi_stmt (gsi
), uid
);
2170 range
->strict_overflow_p
= false;
2172 for (i
= 0; i
< count
; i
++)
2175 range
= otherrange
+ i
;
2177 range
= otherrangep
[i
];
2178 oe
= (*ops
)[range
->idx
];
2179 /* Now change all the other range test immediate uses, so that
2180 those tests will be optimized away. */
2181 if (opcode
== ERROR_MARK
)
2184 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2185 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2187 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2188 ? boolean_false_node
: boolean_true_node
);
2191 oe
->op
= error_mark_node
;
2192 range
->exp
= NULL_TREE
;
2197 /* Optimize X == CST1 || X == CST2
2198 if popcount (CST1 ^ CST2) == 1 into
2199 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2200 Similarly for ranges. E.g.
2201 X != 2 && X != 3 && X != 10 && X != 11
2202 will be transformed by the previous optimization into
2203 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2204 and this loop can transform that into
2205 !(((X & ~8) - 2U) <= 1U). */
2208 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2209 tree lowi
, tree lowj
, tree highi
, tree highj
,
2210 vec
<operand_entry_t
> *ops
,
2211 struct range_entry
*rangei
,
2212 struct range_entry
*rangej
)
2214 tree lowxor
, highxor
, tem
, exp
;
2215 /* Check lowi ^ lowj == highi ^ highj and
2216 popcount (lowi ^ lowj) == 1. */
2217 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2218 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2220 if (!integer_pow2p (lowxor
))
2222 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2223 if (!tree_int_cst_equal (lowxor
, highxor
))
2226 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2227 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2228 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2229 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2230 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2231 NULL
, rangei
->in_p
, lowj
, highj
,
2232 rangei
->strict_overflow_p
2233 || rangej
->strict_overflow_p
))
2238 /* Optimize X == CST1 || X == CST2
2239 if popcount (CST2 - CST1) == 1 into
2240 ((X - CST1) & ~(CST2 - CST1)) == 0.
2241 Similarly for ranges. E.g.
2242 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2243 || X == 75 || X == 45
2244 will be transformed by the previous optimization into
2245 (X - 43U) <= 3U || (X - 75U) <= 3U
2246 and this loop can transform that into
2247 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2249 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2250 tree lowi
, tree lowj
, tree highi
, tree highj
,
2251 vec
<operand_entry_t
> *ops
,
2252 struct range_entry
*rangei
,
2253 struct range_entry
*rangej
)
2255 tree tem1
, tem2
, mask
;
2256 /* Check highi - lowi == highj - lowj. */
2257 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2258 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2260 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2261 if (!tree_int_cst_equal (tem1
, tem2
))
2263 /* Check popcount (lowj - lowi) == 1. */
2264 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2265 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2267 if (!integer_pow2p (tem1
))
2270 type
= unsigned_type_for (type
);
2271 tem1
= fold_convert (type
, tem1
);
2272 tem2
= fold_convert (type
, tem2
);
2273 lowi
= fold_convert (type
, lowi
);
2274 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2275 tem1
= fold_binary (MINUS_EXPR
, type
,
2276 fold_convert (type
, rangei
->exp
), lowi
);
2277 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2278 lowj
= build_int_cst (type
, 0);
2279 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2280 NULL
, rangei
->in_p
, lowj
, tem2
,
2281 rangei
->strict_overflow_p
2282 || rangej
->strict_overflow_p
))
2287 /* It does some common checks for function optimize_range_tests_xor and
2288 optimize_range_tests_diff.
2289 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2290 Else it calls optimize_range_tests_diff. */
2293 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2294 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2295 struct range_entry
*ranges
)
2298 bool any_changes
= false;
2299 for (i
= first
; i
< length
; i
++)
2301 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2303 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2305 type
= TREE_TYPE (ranges
[i
].exp
);
2306 if (!INTEGRAL_TYPE_P (type
))
2308 lowi
= ranges
[i
].low
;
2309 if (lowi
== NULL_TREE
)
2310 lowi
= TYPE_MIN_VALUE (type
);
2311 highi
= ranges
[i
].high
;
2312 if (highi
== NULL_TREE
)
2314 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2317 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2319 lowj
= ranges
[j
].low
;
2320 if (lowj
== NULL_TREE
)
2322 highj
= ranges
[j
].high
;
2323 if (highj
== NULL_TREE
)
2324 highj
= TYPE_MAX_VALUE (type
);
2325 /* Check lowj > highi. */
2326 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2328 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2331 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2333 ranges
+ i
, ranges
+ j
);
2335 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2337 ranges
+ i
, ranges
+ j
);
2348 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2349 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2350 EXP on success, NULL otherwise. */
2353 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2354 wide_int
*mask
, tree
*totallowp
)
2356 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2357 if (tem
== NULL_TREE
2358 || TREE_CODE (tem
) != INTEGER_CST
2359 || TREE_OVERFLOW (tem
)
2360 || tree_int_cst_sgn (tem
) == -1
2361 || compare_tree_int (tem
, prec
) != -1)
2364 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2365 *mask
= wi::shifted_mask (0, max
, false, prec
);
2366 if (TREE_CODE (exp
) == BIT_AND_EXPR
2367 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2369 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2370 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2371 if (wi::popcount (msk
) == 1
2372 && wi::ltu_p (msk
, prec
- max
))
2374 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2375 max
+= msk
.to_uhwi ();
2376 exp
= TREE_OPERAND (exp
, 0);
2377 if (integer_zerop (low
)
2378 && TREE_CODE (exp
) == PLUS_EXPR
2379 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2381 tree ret
= TREE_OPERAND (exp
, 0);
2384 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2385 TYPE_PRECISION (TREE_TYPE (low
))));
2386 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
2392 else if (!tree_int_cst_lt (totallow
, tbias
))
2394 bias
= wi::to_widest (tbias
);
2395 bias
-= wi::to_widest (totallow
);
2396 if (wi::ges_p (bias
, 0) && wi::lts_p (bias
, prec
- max
))
2398 *mask
= wi::lshift (*mask
, bias
);
2406 if (!tree_int_cst_lt (totallow
, low
))
2408 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2409 if (tem
== NULL_TREE
2410 || TREE_CODE (tem
) != INTEGER_CST
2411 || TREE_OVERFLOW (tem
)
2412 || compare_tree_int (tem
, prec
- max
) == 1)
2415 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2419 /* Attempt to optimize small range tests using bit test.
2421 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2422 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2423 has been by earlier optimizations optimized into:
2424 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2425 As all the 43 through 82 range is less than 64 numbers,
2426 for 64-bit word targets optimize that into:
2427 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2430 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2431 vec
<operand_entry_t
> *ops
,
2432 struct range_entry
*ranges
)
2435 bool any_changes
= false;
2436 int prec
= GET_MODE_BITSIZE (word_mode
);
2437 auto_vec
<struct range_entry
*, 64> candidates
;
2439 for (i
= first
; i
< length
- 2; i
++)
2441 tree lowi
, highi
, lowj
, highj
, type
;
2443 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2445 type
= TREE_TYPE (ranges
[i
].exp
);
2446 if (!INTEGRAL_TYPE_P (type
))
2448 lowi
= ranges
[i
].low
;
2449 if (lowi
== NULL_TREE
)
2450 lowi
= TYPE_MIN_VALUE (type
);
2451 highi
= ranges
[i
].high
;
2452 if (highi
== NULL_TREE
)
2455 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2456 highi
, &mask
, &lowi
);
2457 if (exp
== NULL_TREE
)
2459 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2460 candidates
.truncate (0);
2461 int end
= MIN (i
+ 64, length
);
2462 for (j
= i
+ 1; j
< end
; j
++)
2465 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2467 if (ranges
[j
].exp
== exp
)
2469 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2471 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2474 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2476 exp2
= TREE_OPERAND (exp2
, 0);
2486 lowj
= ranges
[j
].low
;
2487 if (lowj
== NULL_TREE
)
2489 highj
= ranges
[j
].high
;
2490 if (highj
== NULL_TREE
)
2491 highj
= TYPE_MAX_VALUE (type
);
2493 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2494 highj
, &mask2
, NULL
);
2498 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2499 candidates
.safe_push (&ranges
[j
]);
2502 /* If we need otherwise 3 or more comparisons, use a bit test. */
2503 if (candidates
.length () >= 2)
2505 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2506 wi::to_widest (lowi
)
2507 + prec
- 1 - wi::clz (mask
));
2508 operand_entry_t oe
= (*ops
)[ranges
[i
].idx
];
2510 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
)
2511 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2512 location_t loc
= gimple_location (stmt
);
2513 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2515 /* See if it isn't cheaper to pretend the minimum value of the
2516 range is 0, if maximum value is small enough.
2517 We can avoid then subtraction of the minimum value, but the
2518 mask constant could be perhaps more expensive. */
2519 if (compare_tree_int (lowi
, 0) > 0
2520 && compare_tree_int (high
, prec
) < 0)
2523 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2524 rtx reg
= gen_raw_REG (word_mode
, 10000);
2525 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2526 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2527 GEN_INT (-m
)), speed_p
);
2528 rtx r
= immed_wide_int_const (mask
, word_mode
);
2529 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2530 word_mode
, speed_p
);
2531 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2532 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2533 word_mode
, speed_p
);
2536 mask
= wi::lshift (mask
, m
);
2537 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2541 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2543 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2545 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2546 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2547 fold_convert_loc (loc
, etype
, exp
),
2548 fold_convert_loc (loc
, etype
, lowi
));
2549 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2550 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2551 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2552 build_int_cst (word_type
, 1), exp
);
2553 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2554 wide_int_to_tree (word_type
, mask
));
2555 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2556 build_zero_cst (word_type
));
2557 if (is_gimple_val (exp
))
2560 /* The shift might have undefined behavior if TEM is true,
2561 but reassociate_bb isn't prepared to have basic blocks
2562 split when it is running. So, temporarily emit a code
2563 with BIT_IOR_EXPR instead of &&, and fix it up in
2566 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2567 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2568 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2570 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2571 gimple_seq_add_seq_without_update (&seq
, seq2
);
2572 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2573 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2574 gimple g
= gimple_build_assign (make_ssa_name (optype
),
2575 BIT_IOR_EXPR
, tem
, exp
);
2576 gimple_set_location (g
, loc
);
2577 gimple_seq_add_stmt_without_update (&seq
, g
);
2578 exp
= gimple_assign_lhs (g
);
2579 tree val
= build_zero_cst (optype
);
2580 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2581 candidates
.length (), opcode
, ops
, exp
,
2582 seq
, false, val
, val
, strict_overflow_p
))
2585 reassoc_branch_fixups
.safe_push (tem
);
2588 gimple_seq_discard (seq
);
2594 /* Optimize range tests, similarly how fold_range_test optimizes
2595 it on trees. The tree code for the binary
2596 operation between all the operands is OPCODE.
2597 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2598 maybe_optimize_range_tests for inter-bb range optimization.
2599 In that case if oe->op is NULL, oe->id is bb->index whose
2600 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2601 the actual opcode. */
2604 optimize_range_tests (enum tree_code opcode
,
2605 vec
<operand_entry_t
> *ops
)
2607 unsigned int length
= ops
->length (), i
, j
, first
;
2609 struct range_entry
*ranges
;
2610 bool any_changes
= false;
2615 ranges
= XNEWVEC (struct range_entry
, length
);
2616 for (i
= 0; i
< length
; i
++)
2620 init_range_entry (ranges
+ i
, oe
->op
,
2622 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2623 /* For | invert it now, we will invert it again before emitting
2624 the optimized expression. */
2625 if (opcode
== BIT_IOR_EXPR
2626 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2627 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2630 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2631 for (i
= 0; i
< length
; i
++)
2632 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2635 /* Try to merge ranges. */
2636 for (first
= i
; i
< length
; i
++)
2638 tree low
= ranges
[i
].low
;
2639 tree high
= ranges
[i
].high
;
2640 int in_p
= ranges
[i
].in_p
;
2641 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2642 int update_fail_count
= 0;
2644 for (j
= i
+ 1; j
< length
; j
++)
2646 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2648 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2649 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2651 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2657 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
2658 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
2659 low
, high
, strict_overflow_p
))
2664 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2665 while update_range_test would fail. */
2666 else if (update_fail_count
== 64)
2669 ++update_fail_count
;
2672 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2675 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2676 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2678 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
2679 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
2682 if (any_changes
&& opcode
!= ERROR_MARK
)
2685 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2687 if (oe
->op
== error_mark_node
)
2696 XDELETEVEC (ranges
);
2700 /* Return true if STMT is a cast like:
2706 # _345 = PHI <_123(N), 1(...), 1(...)>
2707 where _234 has bool type, _123 has single use and
2708 bb N has a single successor M. This is commonly used in
2709 the last block of a range test. */
2712 final_range_test_p (gimple stmt
)
2714 basic_block bb
, rhs_bb
;
2717 use_operand_p use_p
;
2720 if (!gimple_assign_cast_p (stmt
))
2722 bb
= gimple_bb (stmt
);
2723 if (!single_succ_p (bb
))
2725 e
= single_succ_edge (bb
);
2726 if (e
->flags
& EDGE_COMPLEX
)
2729 lhs
= gimple_assign_lhs (stmt
);
2730 rhs
= gimple_assign_rhs1 (stmt
);
2731 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2732 || TREE_CODE (rhs
) != SSA_NAME
2733 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2736 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2737 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2740 if (gimple_code (use_stmt
) != GIMPLE_PHI
2741 || gimple_bb (use_stmt
) != e
->dest
)
2744 /* And that the rhs is defined in the same loop. */
2745 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2747 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2753 /* Return true if BB is suitable basic block for inter-bb range test
2754 optimization. If BACKWARD is true, BB should be the only predecessor
2755 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2756 or compared with to find a common basic block to which all conditions
2757 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2758 be the only predecessor of BB. */
2761 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2764 edge_iterator ei
, ei2
;
2768 bool other_edge_seen
= false;
2773 /* Check last stmt first. */
2774 stmt
= last_stmt (bb
);
2776 || (gimple_code (stmt
) != GIMPLE_COND
2777 && (backward
|| !final_range_test_p (stmt
)))
2778 || gimple_visited_p (stmt
)
2779 || stmt_could_throw_p (stmt
)
2782 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2785 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2786 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2787 to *OTHER_BB (if not set yet, try to find it out). */
2788 if (EDGE_COUNT (bb
->succs
) != 2)
2790 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2792 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2794 if (e
->dest
== test_bb
)
2803 if (*other_bb
== NULL
)
2805 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2806 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2808 else if (e
->dest
== e2
->dest
)
2809 *other_bb
= e
->dest
;
2810 if (*other_bb
== NULL
)
2813 if (e
->dest
== *other_bb
)
2814 other_edge_seen
= true;
2818 if (*other_bb
== NULL
|| !other_edge_seen
)
2821 else if (single_succ (bb
) != *other_bb
)
2824 /* Now check all PHIs of *OTHER_BB. */
2825 e
= find_edge (bb
, *other_bb
);
2826 e2
= find_edge (test_bb
, *other_bb
);
2827 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2829 gphi
*phi
= gsi
.phi ();
2830 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2831 corresponding to BB and TEST_BB predecessor must be the same. */
2832 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2833 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2835 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2836 one of the PHIs should have the lhs of the last stmt in
2837 that block as PHI arg and that PHI should have 0 or 1
2838 corresponding to it in all other range test basic blocks
2842 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2843 == gimple_assign_lhs (stmt
)
2844 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2845 || integer_onep (gimple_phi_arg_def (phi
,
2851 gimple test_last
= last_stmt (test_bb
);
2852 if (gimple_code (test_last
) != GIMPLE_COND
2853 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2854 == gimple_assign_lhs (test_last
)
2855 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2856 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2866 /* Return true if BB doesn't have side-effects that would disallow
2867 range test optimization, all SSA_NAMEs set in the bb are consumed
2868 in the bb and there are no PHIs. */
2871 no_side_effect_bb (basic_block bb
)
2873 gimple_stmt_iterator gsi
;
2876 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2878 last
= last_stmt (bb
);
2879 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2881 gimple stmt
= gsi_stmt (gsi
);
2883 imm_use_iterator imm_iter
;
2884 use_operand_p use_p
;
2886 if (is_gimple_debug (stmt
))
2888 if (gimple_has_side_effects (stmt
))
2892 if (!is_gimple_assign (stmt
))
2894 lhs
= gimple_assign_lhs (stmt
);
2895 if (TREE_CODE (lhs
) != SSA_NAME
)
2897 if (gimple_assign_rhs_could_trap_p (stmt
))
2899 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2901 gimple use_stmt
= USE_STMT (use_p
);
2902 if (is_gimple_debug (use_stmt
))
2904 if (gimple_bb (use_stmt
) != bb
)
2911 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2912 return true and fill in *OPS recursively. */
2915 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2918 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2922 if (!is_reassociable_op (stmt
, code
, loop
))
2925 rhs
[0] = gimple_assign_rhs1 (stmt
);
2926 rhs
[1] = gimple_assign_rhs2 (stmt
);
2927 gimple_set_visited (stmt
, true);
2928 for (i
= 0; i
< 2; i
++)
2929 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2930 && !get_ops (rhs
[i
], code
, ops
, loop
)
2931 && has_single_use (rhs
[i
]))
2933 operand_entry_t oe
= operand_entry_pool
.allocate ();
2939 ops
->safe_push (oe
);
2944 /* Find the ops that were added by get_ops starting from VAR, see if
2945 they were changed during update_range_test and if yes, create new
2949 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
2950 unsigned int *pidx
, struct loop
*loop
)
2952 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2956 if (!is_reassociable_op (stmt
, code
, loop
))
2959 rhs
[0] = gimple_assign_rhs1 (stmt
);
2960 rhs
[1] = gimple_assign_rhs2 (stmt
);
2963 for (i
= 0; i
< 2; i
++)
2964 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
2966 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
2967 if (rhs
[2 + i
] == NULL_TREE
)
2969 if (has_single_use (rhs
[i
]))
2970 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
2972 rhs
[2 + i
] = rhs
[i
];
2975 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
2976 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
2978 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
2979 var
= make_ssa_name (TREE_TYPE (var
));
2980 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
2982 gimple_set_uid (g
, gimple_uid (stmt
));
2983 gimple_set_visited (g
, true);
2984 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2989 /* Structure to track the initial value passed to get_ops and
2990 the range in the ops vector for each basic block. */
2992 struct inter_bb_range_test_entry
2995 unsigned int first_idx
, last_idx
;
2998 /* Inter-bb range test optimization. */
3001 maybe_optimize_range_tests (gimple stmt
)
3003 basic_block first_bb
= gimple_bb (stmt
);
3004 basic_block last_bb
= first_bb
;
3005 basic_block other_bb
= NULL
;
3009 auto_vec
<operand_entry_t
> ops
;
3010 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3011 bool any_changes
= false;
3013 /* Consider only basic blocks that end with GIMPLE_COND or
3014 a cast statement satisfying final_range_test_p. All
3015 but the last bb in the first_bb .. last_bb range
3016 should end with GIMPLE_COND. */
3017 if (gimple_code (stmt
) == GIMPLE_COND
)
3019 if (EDGE_COUNT (first_bb
->succs
) != 2)
3022 else if (final_range_test_p (stmt
))
3023 other_bb
= single_succ (first_bb
);
3027 if (stmt_could_throw_p (stmt
))
3030 /* As relative ordering of post-dominator sons isn't fixed,
3031 maybe_optimize_range_tests can be called first on any
3032 bb in the range we want to optimize. So, start searching
3033 backwards, if first_bb can be set to a predecessor. */
3034 while (single_pred_p (first_bb
))
3036 basic_block pred_bb
= single_pred (first_bb
);
3037 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3039 if (!no_side_effect_bb (first_bb
))
3043 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3044 Before starting forward search in last_bb successors, find
3045 out the other_bb. */
3046 if (first_bb
== last_bb
)
3049 /* As non-GIMPLE_COND last stmt always terminates the range,
3050 if forward search didn't discover anything, just give up. */
3051 if (gimple_code (stmt
) != GIMPLE_COND
)
3053 /* Look at both successors. Either it ends with a GIMPLE_COND
3054 and satisfies suitable_cond_bb, or ends with a cast and
3055 other_bb is that cast's successor. */
3056 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
3057 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
3058 || e
->dest
== first_bb
)
3060 else if (single_pred_p (e
->dest
))
3062 stmt
= last_stmt (e
->dest
);
3064 && gimple_code (stmt
) == GIMPLE_COND
3065 && EDGE_COUNT (e
->dest
->succs
) == 2)
3067 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
3073 && final_range_test_p (stmt
)
3074 && find_edge (first_bb
, single_succ (e
->dest
)))
3076 other_bb
= single_succ (e
->dest
);
3077 if (other_bb
== first_bb
)
3081 if (other_bb
== NULL
)
3084 /* Now do the forward search, moving last_bb to successor bbs
3085 that aren't other_bb. */
3086 while (EDGE_COUNT (last_bb
->succs
) == 2)
3088 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
3089 if (e
->dest
!= other_bb
)
3093 if (!single_pred_p (e
->dest
))
3095 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
3097 if (!no_side_effect_bb (e
->dest
))
3101 if (first_bb
== last_bb
)
3103 /* Here basic blocks first_bb through last_bb's predecessor
3104 end with GIMPLE_COND, all of them have one of the edges to
3105 other_bb and another to another block in the range,
3106 all blocks except first_bb don't have side-effects and
3107 last_bb ends with either GIMPLE_COND, or cast satisfying
3108 final_range_test_p. */
3109 for (bb
= last_bb
; ; bb
= single_pred (bb
))
3111 enum tree_code code
;
3113 inter_bb_range_test_entry bb_ent
;
3115 bb_ent
.op
= NULL_TREE
;
3116 bb_ent
.first_idx
= ops
.length ();
3117 bb_ent
.last_idx
= bb_ent
.first_idx
;
3118 e
= find_edge (bb
, other_bb
);
3119 stmt
= last_stmt (bb
);
3120 gimple_set_visited (stmt
, true);
3121 if (gimple_code (stmt
) != GIMPLE_COND
)
3123 use_operand_p use_p
;
3128 lhs
= gimple_assign_lhs (stmt
);
3129 rhs
= gimple_assign_rhs1 (stmt
);
3130 gcc_assert (bb
== last_bb
);
3137 # _345 = PHI <_123(N), 1(...), 1(...)>
3139 or 0 instead of 1. If it is 0, the _234
3140 range test is anded together with all the
3141 other range tests, if it is 1, it is ored with
3143 single_imm_use (lhs
, &use_p
, &phi
);
3144 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
3145 e2
= find_edge (first_bb
, other_bb
);
3147 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
3148 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
3149 code
= BIT_AND_EXPR
;
3152 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
3153 code
= BIT_IOR_EXPR
;
3156 /* If _234 SSA_NAME_DEF_STMT is
3158 (or &, corresponding to 1/0 in the phi arguments,
3159 push into ops the individual range test arguments
3160 of the bitwise or resp. and, recursively. */
3161 if (!get_ops (rhs
, code
, &ops
,
3162 loop_containing_stmt (stmt
))
3163 && has_single_use (rhs
))
3165 /* Otherwise, push the _234 range test itself. */
3166 operand_entry_t oe
= operand_entry_pool
.allocate ();
3176 bb_ent
.last_idx
= ops
.length ();
3178 bbinfo
.safe_push (bb_ent
);
3181 /* Otherwise stmt is GIMPLE_COND. */
3182 code
= gimple_cond_code (stmt
);
3183 lhs
= gimple_cond_lhs (stmt
);
3184 rhs
= gimple_cond_rhs (stmt
);
3185 if (TREE_CODE (lhs
) == SSA_NAME
3186 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3187 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
3188 || rhs
!= boolean_false_node
3189 /* Either push into ops the individual bitwise
3190 or resp. and operands, depending on which
3191 edge is other_bb. */
3192 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
3193 ^ (code
== EQ_EXPR
))
3194 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
3195 loop_containing_stmt (stmt
))))
3197 /* Or push the GIMPLE_COND stmt itself. */
3198 operand_entry_t oe
= operand_entry_pool
.allocate ();
3201 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
3202 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3203 /* oe->op = NULL signs that there is no SSA_NAME
3204 for the range test, and oe->id instead is the
3205 basic block number, at which's end the GIMPLE_COND
3213 else if (ops
.length () > bb_ent
.first_idx
)
3216 bb_ent
.last_idx
= ops
.length ();
3218 bbinfo
.safe_push (bb_ent
);
3222 if (ops
.length () > 1)
3223 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
3227 /* update_ops relies on has_single_use predicates returning the
3228 same values as it did during get_ops earlier. Additionally it
3229 never removes statements, only adds new ones and it should walk
3230 from the single imm use and check the predicate already before
3231 making those changes.
3232 On the other side, the handling of GIMPLE_COND directly can turn
3233 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3234 it needs to be done in a separate loop afterwards. */
3235 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3237 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3238 && bbinfo
[idx
].op
!= NULL_TREE
)
3242 stmt
= last_stmt (bb
);
3243 new_op
= update_ops (bbinfo
[idx
].op
,
3245 ops
[bbinfo
[idx
].first_idx
]->rank
,
3246 ops
, &bbinfo
[idx
].first_idx
,
3247 loop_containing_stmt (stmt
));
3248 if (new_op
== NULL_TREE
)
3250 gcc_assert (bb
== last_bb
);
3251 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
3253 if (bbinfo
[idx
].op
!= new_op
)
3255 imm_use_iterator iter
;
3256 use_operand_p use_p
;
3257 gimple use_stmt
, cast_stmt
= NULL
;
3259 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
3260 if (is_gimple_debug (use_stmt
))
3262 else if (gimple_code (use_stmt
) == GIMPLE_COND
3263 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3264 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3265 SET_USE (use_p
, new_op
);
3266 else if (gimple_assign_cast_p (use_stmt
))
3267 cast_stmt
= use_stmt
;
3272 gcc_assert (bb
== last_bb
);
3273 tree lhs
= gimple_assign_lhs (cast_stmt
);
3274 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
3275 enum tree_code rhs_code
3276 = gimple_assign_rhs_code (cast_stmt
);
3278 if (is_gimple_min_invariant (new_op
))
3280 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
3281 g
= gimple_build_assign (new_lhs
, new_op
);
3284 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
3285 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
3286 gimple_set_uid (g
, gimple_uid (cast_stmt
));
3287 gimple_set_visited (g
, true);
3288 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3289 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3290 if (is_gimple_debug (use_stmt
))
3292 else if (gimple_code (use_stmt
) == GIMPLE_COND
3293 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3294 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3295 SET_USE (use_p
, new_lhs
);
3304 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3306 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3307 && bbinfo
[idx
].op
== NULL_TREE
3308 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
3310 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
3311 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
3312 gimple_cond_make_false (cond_stmt
);
3313 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
3314 gimple_cond_make_true (cond_stmt
);
3317 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
3318 gimple_cond_set_lhs (cond_stmt
,
3319 ops
[bbinfo
[idx
].first_idx
]->op
);
3320 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
3322 update_stmt (cond_stmt
);
3330 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3331 of STMT in it's operands. This is also known as a "destructive
3332 update" operation. */
3335 is_phi_for_stmt (gimple stmt
, tree operand
)
3340 use_operand_p arg_p
;
3343 if (TREE_CODE (operand
) != SSA_NAME
)
3346 lhs
= gimple_assign_lhs (stmt
);
3348 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3349 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
3353 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
3354 if (lhs
== USE_FROM_PTR (arg_p
))
3359 /* Remove def stmt of VAR if VAR has zero uses and recurse
3360 on rhs1 operand if so. */
3363 remove_visited_stmt_chain (tree var
)
3366 gimple_stmt_iterator gsi
;
3370 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3372 stmt
= SSA_NAME_DEF_STMT (var
);
3373 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3375 var
= gimple_assign_rhs1 (stmt
);
3376 gsi
= gsi_for_stmt (stmt
);
3377 reassoc_remove_stmt (&gsi
);
3378 release_defs (stmt
);
3385 /* This function checks three consequtive operands in
3386 passed operands vector OPS starting from OPINDEX and
3387 swaps two operands if it is profitable for binary operation
3388 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3390 We pair ops with the same rank if possible.
3392 The alternative we try is to see if STMT is a destructive
3393 update style statement, which is like:
3396 In that case, we want to use the destructive update form to
3397 expose the possible vectorizer sum reduction opportunity.
3398 In that case, the third operand will be the phi node. This
3399 check is not performed if STMT is null.
3401 We could, of course, try to be better as noted above, and do a
3402 lot of work to try to find these opportunities in >3 operand
3403 cases, but it is unlikely to be worth it. */
3406 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3407 unsigned int opindex
, gimple stmt
)
3409 operand_entry_t oe1
, oe2
, oe3
;
3412 oe2
= ops
[opindex
+ 1];
3413 oe3
= ops
[opindex
+ 2];
3415 if ((oe1
->rank
== oe2
->rank
3416 && oe2
->rank
!= oe3
->rank
)
3417 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3418 && !is_phi_for_stmt (stmt
, oe1
->op
)
3419 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3421 struct operand_entry temp
= *oe3
;
3423 oe3
->rank
= oe1
->rank
;
3425 oe1
->rank
= temp
.rank
;
3427 else if ((oe1
->rank
== oe3
->rank
3428 && oe2
->rank
!= oe3
->rank
)
3429 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3430 && !is_phi_for_stmt (stmt
, oe1
->op
)
3431 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3433 struct operand_entry temp
= *oe2
;
3435 oe2
->rank
= oe1
->rank
;
3437 oe1
->rank
= temp
.rank
;
3441 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3442 two definitions, otherwise return STMT. */
3444 static inline gimple
3445 find_insert_point (gimple stmt
, tree rhs1
, tree rhs2
)
3447 if (TREE_CODE (rhs1
) == SSA_NAME
3448 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3449 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3450 if (TREE_CODE (rhs2
) == SSA_NAME
3451 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3452 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3456 /* Recursively rewrite our linearized statements so that the operators
3457 match those in OPS[OPINDEX], putting the computation in rank
3458 order. Return new lhs. */
3461 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3462 vec
<operand_entry_t
> ops
, bool changed
)
3464 tree rhs1
= gimple_assign_rhs1 (stmt
);
3465 tree rhs2
= gimple_assign_rhs2 (stmt
);
3466 tree lhs
= gimple_assign_lhs (stmt
);
3469 /* The final recursion case for this function is that you have
3470 exactly two operations left.
3471 If we had exactly one op in the entire list to start with, we
3472 would have never called this function, and the tail recursion
3473 rewrites them one at a time. */
3474 if (opindex
+ 2 == ops
.length ())
3476 operand_entry_t oe1
, oe2
;
3479 oe2
= ops
[opindex
+ 1];
3481 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3483 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3484 unsigned int uid
= gimple_uid (stmt
);
3486 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3488 fprintf (dump_file
, "Transforming ");
3489 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3492 /* Even when changed is false, reassociation could have e.g. removed
3493 some redundant operations, so unless we are just swapping the
3494 arguments or unless there is no change at all (then we just
3495 return lhs), force creation of a new SSA_NAME. */
3496 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
3498 gimple insert_point
= find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3499 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3501 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3503 gimple_set_uid (stmt
, uid
);
3504 gimple_set_visited (stmt
, true);
3505 if (insert_point
== gsi_stmt (gsi
))
3506 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3508 insert_stmt_after (stmt
, insert_point
);
3512 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3514 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3515 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3519 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3520 remove_visited_stmt_chain (rhs1
);
3522 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3524 fprintf (dump_file
, " into ");
3525 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3531 /* If we hit here, we should have 3 or more ops left. */
3532 gcc_assert (opindex
+ 2 < ops
.length ());
3534 /* Rewrite the next operator. */
3537 /* Recurse on the LHS of the binary operator, which is guaranteed to
3538 be the non-leaf side. */
3540 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3541 changed
|| oe
->op
!= rhs2
);
3543 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3545 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3547 fprintf (dump_file
, "Transforming ");
3548 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3551 /* If changed is false, this is either opindex == 0
3552 or all outer rhs2's were equal to corresponding oe->op,
3553 and powi_result is NULL.
3554 That means lhs is equivalent before and after reassociation.
3555 Otherwise ensure the old lhs SSA_NAME is not reused and
3556 create a new stmt as well, so that any debug stmts will be
3557 properly adjusted. */
3560 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3561 unsigned int uid
= gimple_uid (stmt
);
3562 gimple insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3564 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3565 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3567 gimple_set_uid (stmt
, uid
);
3568 gimple_set_visited (stmt
, true);
3569 if (insert_point
== gsi_stmt (gsi
))
3570 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3572 insert_stmt_after (stmt
, insert_point
);
3576 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3578 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3579 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3583 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3585 fprintf (dump_file
, " into ");
3586 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3592 /* Find out how many cycles we need to compute statements chain.
3593 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3594 maximum number of independent statements we may execute per cycle. */
3597 get_required_cycles (int ops_num
, int cpu_width
)
3603 /* While we have more than 2 * cpu_width operands
3604 we may reduce number of operands by cpu_width
3606 res
= ops_num
/ (2 * cpu_width
);
3608 /* Remained operands count may be reduced twice per cycle
3609 until we have only one operand. */
3610 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3611 elog
= exact_log2 (rest
);
3615 res
+= floor_log2 (rest
) + 1;
3620 /* Returns an optimal number of registers to use for computation of
3621 given statements. */
3624 get_reassociation_width (int ops_num
, enum tree_code opc
,
3627 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3632 if (param_width
> 0)
3633 width
= param_width
;
3635 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3640 /* Get the minimal time required for sequence computation. */
3641 cycles_best
= get_required_cycles (ops_num
, width
);
3643 /* Check if we may use less width and still compute sequence for
3644 the same time. It will allow us to reduce registers usage.
3645 get_required_cycles is monotonically increasing with lower width
3646 so we can perform a binary search for the minimal width that still
3647 results in the optimal cycle count. */
3649 while (width
> width_min
)
3651 int width_mid
= (width
+ width_min
) / 2;
3653 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3655 else if (width_min
< width_mid
)
3656 width_min
= width_mid
;
3664 /* Recursively rewrite our linearized statements so that the operators
3665 match those in OPS[OPINDEX], putting the computation in rank
3666 order and trying to allow operations to be executed in
3670 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
3671 vec
<operand_entry_t
> ops
)
3673 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3674 int op_num
= ops
.length ();
3675 int stmt_num
= op_num
- 1;
3676 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3677 int op_index
= op_num
- 1;
3679 int ready_stmts_end
= 0;
3681 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3683 /* We start expression rewriting from the top statements.
3684 So, in this loop we create a full list of statements
3685 we will work with. */
3686 stmts
[stmt_num
- 1] = stmt
;
3687 for (i
= stmt_num
- 2; i
>= 0; i
--)
3688 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3690 for (i
= 0; i
< stmt_num
; i
++)
3694 /* Determine whether we should use results of
3695 already handled statements or not. */
3696 if (ready_stmts_end
== 0
3697 && (i
- stmt_index
>= width
|| op_index
< 1))
3698 ready_stmts_end
= i
;
3700 /* Now we choose operands for the next statement. Non zero
3701 value in ready_stmts_end means here that we should use
3702 the result of already generated statements as new operand. */
3703 if (ready_stmts_end
> 0)
3705 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3706 if (ready_stmts_end
> stmt_index
)
3707 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3708 else if (op_index
>= 0)
3709 op2
= ops
[op_index
--]->op
;
3712 gcc_assert (stmt_index
< i
);
3713 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3716 if (stmt_index
>= ready_stmts_end
)
3717 ready_stmts_end
= 0;
3722 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3723 op2
= ops
[op_index
--]->op
;
3724 op1
= ops
[op_index
--]->op
;
3727 /* If we emit the last statement then we should put
3728 operands into the last statement. It will also
3730 if (op_index
< 0 && stmt_index
== i
)
3733 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3735 fprintf (dump_file
, "Transforming ");
3736 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3739 /* We keep original statement only for the last one. All
3740 others are recreated. */
3741 if (i
== stmt_num
- 1)
3743 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3744 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3745 update_stmt (stmts
[i
]);
3748 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3750 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3752 fprintf (dump_file
, " into ");
3753 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3757 remove_visited_stmt_chain (last_rhs1
);
3760 /* Transform STMT, which is really (A +B) + (C + D) into the left
3761 linear form, ((A+B)+C)+D.
3762 Recurse on D if necessary. */
3765 linearize_expr (gimple stmt
)
3767 gimple_stmt_iterator gsi
;
3768 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3769 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3770 gimple oldbinrhs
= binrhs
;
3771 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3772 gimple newbinrhs
= NULL
;
3773 struct loop
*loop
= loop_containing_stmt (stmt
);
3774 tree lhs
= gimple_assign_lhs (stmt
);
3776 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3777 && is_reassociable_op (binrhs
, rhscode
, loop
));
3779 gsi
= gsi_for_stmt (stmt
);
3781 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3782 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
3783 gimple_assign_rhs_code (binrhs
),
3784 gimple_assign_lhs (binlhs
),
3785 gimple_assign_rhs2 (binrhs
));
3786 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3787 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3788 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3790 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3791 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3793 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3795 fprintf (dump_file
, "Linearized: ");
3796 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3799 reassociate_stats
.linearized
++;
3802 gsi
= gsi_for_stmt (oldbinrhs
);
3803 reassoc_remove_stmt (&gsi
);
3804 release_defs (oldbinrhs
);
3806 gimple_set_visited (stmt
, true);
3807 gimple_set_visited (binlhs
, true);
3808 gimple_set_visited (binrhs
, true);
3810 /* Tail recurse on the new rhs if it still needs reassociation. */
3811 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3812 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3813 want to change the algorithm while converting to tuples. */
3814 linearize_expr (stmt
);
3817 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3818 it. Otherwise, return NULL. */
3821 get_single_immediate_use (tree lhs
)
3823 use_operand_p immuse
;
3826 if (TREE_CODE (lhs
) == SSA_NAME
3827 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3828 && is_gimple_assign (immusestmt
))
3834 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3835 representing the negated value. Insertions of any necessary
3836 instructions go before GSI.
3837 This function is recursive in that, if you hand it "a_5" as the
3838 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3839 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3842 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3844 gimple negatedefstmt
= NULL
;
3845 tree resultofnegate
;
3846 gimple_stmt_iterator gsi
;
3849 /* If we are trying to negate a name, defined by an add, negate the
3850 add operands instead. */
3851 if (TREE_CODE (tonegate
) == SSA_NAME
)
3852 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3853 if (TREE_CODE (tonegate
) == SSA_NAME
3854 && is_gimple_assign (negatedefstmt
)
3855 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3856 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3857 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3859 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3860 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3861 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3864 gsi
= gsi_for_stmt (negatedefstmt
);
3865 rhs1
= negate_value (rhs1
, &gsi
);
3867 gsi
= gsi_for_stmt (negatedefstmt
);
3868 rhs2
= negate_value (rhs2
, &gsi
);
3870 gsi
= gsi_for_stmt (negatedefstmt
);
3871 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3872 gimple_set_visited (negatedefstmt
, true);
3873 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
3874 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3875 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3879 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3880 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3881 NULL_TREE
, true, GSI_SAME_STMT
);
3883 uid
= gimple_uid (gsi_stmt (gsi
));
3884 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3886 gimple stmt
= gsi_stmt (gsi
);
3887 if (gimple_uid (stmt
) != 0)
3889 gimple_set_uid (stmt
, uid
);
3891 return resultofnegate
;
3894 /* Return true if we should break up the subtract in STMT into an add
3895 with negate. This is true when we the subtract operands are really
3896 adds, or the subtract itself is used in an add expression. In
3897 either case, breaking up the subtract into an add with negate
3898 exposes the adds to reassociation. */
3901 should_break_up_subtract (gimple stmt
)
3903 tree lhs
= gimple_assign_lhs (stmt
);
3904 tree binlhs
= gimple_assign_rhs1 (stmt
);
3905 tree binrhs
= gimple_assign_rhs2 (stmt
);
3907 struct loop
*loop
= loop_containing_stmt (stmt
);
3909 if (TREE_CODE (binlhs
) == SSA_NAME
3910 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3913 if (TREE_CODE (binrhs
) == SSA_NAME
3914 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3917 if (TREE_CODE (lhs
) == SSA_NAME
3918 && (immusestmt
= get_single_immediate_use (lhs
))
3919 && is_gimple_assign (immusestmt
)
3920 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3921 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3926 /* Transform STMT from A - B into A + -B. */
3929 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3931 tree rhs1
= gimple_assign_rhs1 (stmt
);
3932 tree rhs2
= gimple_assign_rhs2 (stmt
);
3934 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3936 fprintf (dump_file
, "Breaking up subtract ");
3937 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3940 rhs2
= negate_value (rhs2
, gsip
);
3941 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3945 /* Determine whether STMT is a builtin call that raises an SSA name
3946 to an integer power and has only one use. If so, and this is early
3947 reassociation and unsafe math optimizations are permitted, place
3948 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3949 If any of these conditions does not hold, return FALSE. */
3952 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3955 REAL_VALUE_TYPE c
, cint
;
3957 if (!first_pass_instance
3958 || !flag_unsafe_math_optimizations
3959 || !is_gimple_call (stmt
)
3960 || !has_single_use (gimple_call_lhs (stmt
)))
3963 fndecl
= gimple_call_fndecl (stmt
);
3966 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3969 switch (DECL_FUNCTION_CODE (fndecl
))
3971 CASE_FLT_FN (BUILT_IN_POW
):
3972 if (flag_errno_math
)
3975 *base
= gimple_call_arg (stmt
, 0);
3976 arg1
= gimple_call_arg (stmt
, 1);
3978 if (TREE_CODE (arg1
) != REAL_CST
)
3981 c
= TREE_REAL_CST (arg1
);
3983 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
3986 *exponent
= real_to_integer (&c
);
3987 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
3988 if (!real_identical (&c
, &cint
))
3993 CASE_FLT_FN (BUILT_IN_POWI
):
3994 *base
= gimple_call_arg (stmt
, 0);
3995 arg1
= gimple_call_arg (stmt
, 1);
3997 if (!tree_fits_shwi_p (arg1
))
4000 *exponent
= tree_to_shwi (arg1
);
4007 /* Expanding negative exponents is generally unproductive, so we don't
4008 complicate matters with those. Exponents of zero and one should
4009 have been handled by expression folding. */
4010 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
4016 /* Recursively linearize a binary expression that is the RHS of STMT.
4017 Place the operands of the expression tree in the vector named OPS. */
4020 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
4021 bool is_associative
, bool set_visited
)
4023 tree binlhs
= gimple_assign_rhs1 (stmt
);
4024 tree binrhs
= gimple_assign_rhs2 (stmt
);
4025 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
4026 bool binlhsisreassoc
= false;
4027 bool binrhsisreassoc
= false;
4028 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4029 struct loop
*loop
= loop_containing_stmt (stmt
);
4030 tree base
= NULL_TREE
;
4031 HOST_WIDE_INT exponent
= 0;
4034 gimple_set_visited (stmt
, true);
4036 if (TREE_CODE (binlhs
) == SSA_NAME
)
4038 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
4039 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
4040 && !stmt_could_throw_p (binlhsdef
));
4043 if (TREE_CODE (binrhs
) == SSA_NAME
)
4045 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
4046 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
4047 && !stmt_could_throw_p (binrhsdef
));
4050 /* If the LHS is not reassociable, but the RHS is, we need to swap
4051 them. If neither is reassociable, there is nothing we can do, so
4052 just put them in the ops vector. If the LHS is reassociable,
4053 linearize it. If both are reassociable, then linearize the RHS
4056 if (!binlhsisreassoc
)
4058 /* If this is not a associative operation like division, give up. */
4059 if (!is_associative
)
4061 add_to_ops_vec (ops
, binrhs
);
4065 if (!binrhsisreassoc
)
4067 if (rhscode
== MULT_EXPR
4068 && TREE_CODE (binrhs
) == SSA_NAME
4069 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
4071 add_repeat_to_ops_vec (ops
, base
, exponent
);
4072 gimple_set_visited (binrhsdef
, true);
4075 add_to_ops_vec (ops
, binrhs
);
4077 if (rhscode
== MULT_EXPR
4078 && TREE_CODE (binlhs
) == SSA_NAME
4079 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
4081 add_repeat_to_ops_vec (ops
, base
, exponent
);
4082 gimple_set_visited (binlhsdef
, true);
4085 add_to_ops_vec (ops
, binlhs
);
4090 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4092 fprintf (dump_file
, "swapping operands of ");
4093 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4096 swap_ssa_operands (stmt
,
4097 gimple_assign_rhs1_ptr (stmt
),
4098 gimple_assign_rhs2_ptr (stmt
));
4101 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4103 fprintf (dump_file
, " is now ");
4104 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4107 /* We want to make it so the lhs is always the reassociative op,
4109 std::swap (binlhs
, binrhs
);
4111 else if (binrhsisreassoc
)
4113 linearize_expr (stmt
);
4114 binlhs
= gimple_assign_rhs1 (stmt
);
4115 binrhs
= gimple_assign_rhs2 (stmt
);
4118 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
4119 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
4121 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
4122 is_associative
, set_visited
);
4124 if (rhscode
== MULT_EXPR
4125 && TREE_CODE (binrhs
) == SSA_NAME
4126 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
4128 add_repeat_to_ops_vec (ops
, base
, exponent
);
4129 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
4132 add_to_ops_vec (ops
, binrhs
);
4135 /* Repropagate the negates back into subtracts, since no other pass
4136 currently does it. */
4139 repropagate_negates (void)
4144 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
4146 gimple user
= get_single_immediate_use (negate
);
4148 if (!user
|| !is_gimple_assign (user
))
4151 /* The negate operand can be either operand of a PLUS_EXPR
4152 (it can be the LHS if the RHS is a constant for example).
4154 Force the negate operand to the RHS of the PLUS_EXPR, then
4155 transform the PLUS_EXPR into a MINUS_EXPR. */
4156 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
4158 /* If the negated operand appears on the LHS of the
4159 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4160 to force the negated operand to the RHS of the PLUS_EXPR. */
4161 if (gimple_assign_rhs1 (user
) == negate
)
4163 swap_ssa_operands (user
,
4164 gimple_assign_rhs1_ptr (user
),
4165 gimple_assign_rhs2_ptr (user
));
4168 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4169 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4170 if (gimple_assign_rhs2 (user
) == negate
)
4172 tree rhs1
= gimple_assign_rhs1 (user
);
4173 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
4174 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4175 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
4179 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
4181 if (gimple_assign_rhs1 (user
) == negate
)
4186 which we transform into
4189 This pushes down the negate which we possibly can merge
4190 into some other operation, hence insert it into the
4191 plus_negates vector. */
4192 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4193 tree a
= gimple_assign_rhs1 (feed
);
4194 tree b
= gimple_assign_rhs2 (user
);
4195 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
4196 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
4197 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
4198 gimple g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
4199 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
4200 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
4201 user
= gsi_stmt (gsi2
);
4203 reassoc_remove_stmt (&gsi
);
4204 release_defs (feed
);
4205 plus_negates
.safe_push (gimple_assign_lhs (user
));
4209 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4210 rid of one operation. */
4211 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4212 tree a
= gimple_assign_rhs1 (feed
);
4213 tree rhs1
= gimple_assign_rhs1 (user
);
4214 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4215 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
4216 update_stmt (gsi_stmt (gsi
));
4222 /* Returns true if OP is of a type for which we can do reassociation.
4223 That is for integral or non-saturating fixed-point types, and for
4224 floating point type when associative-math is enabled. */
4227 can_reassociate_p (tree op
)
4229 tree type
= TREE_TYPE (op
);
4230 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
4231 || NON_SAT_FIXED_POINT_TYPE_P (type
)
4232 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
4237 /* Break up subtract operations in block BB.
4239 We do this top down because we don't know whether the subtract is
4240 part of a possible chain of reassociation except at the top.
4249 we want to break up k = t - q, but we won't until we've transformed q
4250 = b - r, which won't be broken up until we transform b = c - d.
4252 En passant, clear the GIMPLE visited flag on every statement
4253 and set UIDs within each basic block. */
4256 break_up_subtract_bb (basic_block bb
)
4258 gimple_stmt_iterator gsi
;
4260 unsigned int uid
= 1;
4262 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4264 gimple stmt
= gsi_stmt (gsi
);
4265 gimple_set_visited (stmt
, false);
4266 gimple_set_uid (stmt
, uid
++);
4268 if (!is_gimple_assign (stmt
)
4269 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
4272 /* Look for simple gimple subtract operations. */
4273 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
4275 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
4276 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
4279 /* Check for a subtract used only in an addition. If this
4280 is the case, transform it into add of a negate for better
4281 reassociation. IE transform C = A-B into C = A + -B if C
4282 is only used in an addition. */
4283 if (should_break_up_subtract (stmt
))
4284 break_up_subtract (stmt
, &gsi
);
4286 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
4287 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
4288 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
4290 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
4292 son
= next_dom_son (CDI_DOMINATORS
, son
))
4293 break_up_subtract_bb (son
);
4296 /* Used for repeated factor analysis. */
4297 struct repeat_factor_d
4299 /* An SSA name that occurs in a multiply chain. */
4302 /* Cached rank of the factor. */
4305 /* Number of occurrences of the factor in the chain. */
4306 HOST_WIDE_INT count
;
4308 /* An SSA name representing the product of this factor and
4309 all factors appearing later in the repeated factor vector. */
4313 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
4314 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
4317 static vec
<repeat_factor
> repeat_factor_vec
;
4319 /* Used for sorting the repeat factor vector. Sort primarily by
4320 ascending occurrence count, secondarily by descending rank. */
4323 compare_repeat_factors (const void *x1
, const void *x2
)
4325 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
4326 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
4328 if (rf1
->count
!= rf2
->count
)
4329 return rf1
->count
- rf2
->count
;
4331 return rf2
->rank
- rf1
->rank
;
4334 /* Look for repeated operands in OPS in the multiply tree rooted at
4335 STMT. Replace them with an optimal sequence of multiplies and powi
4336 builtin calls, and remove the used operands from OPS. Return an
4337 SSA name representing the value of the replacement sequence. */
4340 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
4342 unsigned i
, j
, vec_len
;
4345 repeat_factor_t rf1
, rf2
;
4346 repeat_factor rfnew
;
4347 tree result
= NULL_TREE
;
4348 tree target_ssa
, iter_result
;
4349 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4350 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4351 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4352 gimple mul_stmt
, pow_stmt
;
4354 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4359 /* Allocate the repeated factor vector. */
4360 repeat_factor_vec
.create (10);
4362 /* Scan the OPS vector for all SSA names in the product and build
4363 up a vector of occurrence counts for each factor. */
4364 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4366 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4368 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4370 if (rf1
->factor
== oe
->op
)
4372 rf1
->count
+= oe
->count
;
4377 if (j
>= repeat_factor_vec
.length ())
4379 rfnew
.factor
= oe
->op
;
4380 rfnew
.rank
= oe
->rank
;
4381 rfnew
.count
= oe
->count
;
4382 rfnew
.repr
= NULL_TREE
;
4383 repeat_factor_vec
.safe_push (rfnew
);
4388 /* Sort the repeated factor vector by (a) increasing occurrence count,
4389 and (b) decreasing rank. */
4390 repeat_factor_vec
.qsort (compare_repeat_factors
);
4392 /* It is generally best to combine as many base factors as possible
4393 into a product before applying __builtin_powi to the result.
4394 However, the sort order chosen for the repeated factor vector
4395 allows us to cache partial results for the product of the base
4396 factors for subsequent use. When we already have a cached partial
4397 result from a previous iteration, it is best to make use of it
4398 before looking for another __builtin_pow opportunity.
4400 As an example, consider x * x * y * y * y * z * z * z * z.
4401 We want to first compose the product x * y * z, raise it to the
4402 second power, then multiply this by y * z, and finally multiply
4403 by z. This can be done in 5 multiplies provided we cache y * z
4404 for use in both expressions:
4412 If we instead ignored the cached y * z and first multiplied by
4413 the __builtin_pow opportunity z * z, we would get the inferior:
4422 vec_len
= repeat_factor_vec
.length ();
4424 /* Repeatedly look for opportunities to create a builtin_powi call. */
4427 HOST_WIDE_INT power
;
4429 /* First look for the largest cached product of factors from
4430 preceding iterations. If found, create a builtin_powi for
4431 it if the minimum occurrence count for its factors is at
4432 least 2, or just use this cached product as our next
4433 multiplicand if the minimum occurrence count is 1. */
4434 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4436 if (rf1
->repr
&& rf1
->count
> 0)
4446 iter_result
= rf1
->repr
;
4448 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4452 fputs ("Multiplying by cached product ", dump_file
);
4453 for (elt
= j
; elt
< vec_len
; elt
++)
4455 rf
= &repeat_factor_vec
[elt
];
4456 print_generic_expr (dump_file
, rf
->factor
, 0);
4457 if (elt
< vec_len
- 1)
4458 fputs (" * ", dump_file
);
4460 fputs ("\n", dump_file
);
4465 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4466 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4467 build_int_cst (integer_type_node
,
4469 gimple_call_set_lhs (pow_stmt
, iter_result
);
4470 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4471 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4473 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4477 fputs ("Building __builtin_pow call for cached product (",
4479 for (elt
= j
; elt
< vec_len
; elt
++)
4481 rf
= &repeat_factor_vec
[elt
];
4482 print_generic_expr (dump_file
, rf
->factor
, 0);
4483 if (elt
< vec_len
- 1)
4484 fputs (" * ", dump_file
);
4486 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
4493 /* Otherwise, find the first factor in the repeated factor
4494 vector whose occurrence count is at least 2. If no such
4495 factor exists, there are no builtin_powi opportunities
4497 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4499 if (rf1
->count
>= 2)
4508 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4512 fputs ("Building __builtin_pow call for (", dump_file
);
4513 for (elt
= j
; elt
< vec_len
; elt
++)
4515 rf
= &repeat_factor_vec
[elt
];
4516 print_generic_expr (dump_file
, rf
->factor
, 0);
4517 if (elt
< vec_len
- 1)
4518 fputs (" * ", dump_file
);
4520 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4523 reassociate_stats
.pows_created
++;
4525 /* Visit each element of the vector in reverse order (so that
4526 high-occurrence elements are visited first, and within the
4527 same occurrence count, lower-ranked elements are visited
4528 first). Form a linear product of all elements in this order
4529 whose occurrencce count is at least that of element J.
4530 Record the SSA name representing the product of each element
4531 with all subsequent elements in the vector. */
4532 if (j
== vec_len
- 1)
4533 rf1
->repr
= rf1
->factor
;
4536 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4540 rf1
= &repeat_factor_vec
[ii
];
4541 rf2
= &repeat_factor_vec
[ii
+ 1];
4543 /* Init the last factor's representative to be itself. */
4545 rf2
->repr
= rf2
->factor
;
4550 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4551 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
4553 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4554 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4555 rf1
->repr
= target_ssa
;
4557 /* Don't reprocess the multiply we just introduced. */
4558 gimple_set_visited (mul_stmt
, true);
4562 /* Form a call to __builtin_powi for the maximum product
4563 just formed, raised to the power obtained earlier. */
4564 rf1
= &repeat_factor_vec
[j
];
4565 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4566 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4567 build_int_cst (integer_type_node
,
4569 gimple_call_set_lhs (pow_stmt
, iter_result
);
4570 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4571 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4574 /* If we previously formed at least one other builtin_powi call,
4575 form the product of this one and those others. */
4578 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4579 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
4580 result
, iter_result
);
4581 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4582 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4583 gimple_set_visited (mul_stmt
, true);
4584 result
= new_result
;
4587 result
= iter_result
;
4589 /* Decrement the occurrence count of each element in the product
4590 by the count found above, and remove this many copies of each
4592 for (i
= j
; i
< vec_len
; i
++)
4597 rf1
= &repeat_factor_vec
[i
];
4598 rf1
->count
-= power
;
4600 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4602 if (oe
->op
== rf1
->factor
)
4606 ops
->ordered_remove (n
);
4622 /* At this point all elements in the repeated factor vector have a
4623 remaining occurrence count of 0 or 1, and those with a count of 1
4624 don't have cached representatives. Re-sort the ops vector and
4626 ops
->qsort (sort_by_operand_rank
);
4627 repeat_factor_vec
.release ();
4629 /* Return the final product computed herein. Note that there may
4630 still be some elements with single occurrence count left in OPS;
4631 those will be handled by the normal reassociation logic. */
4635 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4638 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4642 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4644 fprintf (dump_file
, "Transforming ");
4645 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4648 rhs1
= gimple_assign_rhs1 (stmt
);
4649 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4651 remove_visited_stmt_chain (rhs1
);
4653 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4655 fprintf (dump_file
, " into ");
4656 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4660 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4663 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4664 tree rhs1
, tree rhs2
)
4666 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4668 fprintf (dump_file
, "Transforming ");
4669 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4672 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4673 update_stmt (gsi_stmt (*gsi
));
4674 remove_visited_stmt_chain (rhs1
);
4676 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4678 fprintf (dump_file
, " into ");
4679 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4683 /* Reassociate expressions in basic block BB and its post-dominator as
4687 reassociate_bb (basic_block bb
)
4689 gimple_stmt_iterator gsi
;
4691 gimple stmt
= last_stmt (bb
);
4693 if (stmt
&& !gimple_visited_p (stmt
))
4694 maybe_optimize_range_tests (stmt
);
4696 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4698 stmt
= gsi_stmt (gsi
);
4700 if (is_gimple_assign (stmt
)
4701 && !stmt_could_throw_p (stmt
))
4703 tree lhs
, rhs1
, rhs2
;
4704 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4706 /* If this is not a gimple binary expression, there is
4707 nothing for us to do with it. */
4708 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4711 /* If this was part of an already processed statement,
4712 we don't need to touch it again. */
4713 if (gimple_visited_p (stmt
))
4715 /* This statement might have become dead because of previous
4717 if (has_zero_uses (gimple_get_lhs (stmt
)))
4719 reassoc_remove_stmt (&gsi
);
4720 release_defs (stmt
);
4721 /* We might end up removing the last stmt above which
4722 places the iterator to the end of the sequence.
4723 Reset it to the last stmt in this case which might
4724 be the end of the sequence as well if we removed
4725 the last statement of the sequence. In which case
4726 we need to bail out. */
4727 if (gsi_end_p (gsi
))
4729 gsi
= gsi_last_bb (bb
);
4730 if (gsi_end_p (gsi
))
4737 lhs
= gimple_assign_lhs (stmt
);
4738 rhs1
= gimple_assign_rhs1 (stmt
);
4739 rhs2
= gimple_assign_rhs2 (stmt
);
4741 /* For non-bit or min/max operations we can't associate
4742 all types. Verify that here. */
4743 if (rhs_code
!= BIT_IOR_EXPR
4744 && rhs_code
!= BIT_AND_EXPR
4745 && rhs_code
!= BIT_XOR_EXPR
4746 && rhs_code
!= MIN_EXPR
4747 && rhs_code
!= MAX_EXPR
4748 && (!can_reassociate_p (lhs
)
4749 || !can_reassociate_p (rhs1
)
4750 || !can_reassociate_p (rhs2
)))
4753 if (associative_tree_code (rhs_code
))
4755 auto_vec
<operand_entry_t
> ops
;
4756 tree powi_result
= NULL_TREE
;
4758 /* There may be no immediate uses left by the time we
4759 get here because we may have eliminated them all. */
4760 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4763 gimple_set_visited (stmt
, true);
4764 linearize_expr_tree (&ops
, stmt
, true, true);
4765 ops
.qsort (sort_by_operand_rank
);
4766 optimize_ops_list (rhs_code
, &ops
);
4767 if (undistribute_ops_list (rhs_code
, &ops
,
4768 loop_containing_stmt (stmt
)))
4770 ops
.qsort (sort_by_operand_rank
);
4771 optimize_ops_list (rhs_code
, &ops
);
4774 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4775 optimize_range_tests (rhs_code
, &ops
);
4777 if (first_pass_instance
4778 && rhs_code
== MULT_EXPR
4779 && flag_unsafe_math_optimizations
)
4780 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4782 /* If the operand vector is now empty, all operands were
4783 consumed by the __builtin_powi optimization. */
4784 if (ops
.length () == 0)
4785 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4786 else if (ops
.length () == 1)
4788 tree last_op
= ops
.last ()->op
;
4791 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4794 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4798 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4799 int ops_num
= ops
.length ();
4800 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4803 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4805 "Width = %d was chosen for reassociation\n", width
);
4808 && ops
.length () > 3)
4809 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
4813 /* When there are three operands left, we want
4814 to make sure the ones that get the double
4815 binary op are chosen wisely. */
4816 int len
= ops
.length ();
4818 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4820 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4821 powi_result
!= NULL
);
4824 /* If we combined some repeated factors into a
4825 __builtin_powi call, multiply that result by the
4826 reassociated operands. */
4829 gimple mul_stmt
, lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4830 tree type
= TREE_TYPE (lhs
);
4831 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4833 gimple_set_lhs (lhs_stmt
, target_ssa
);
4834 update_stmt (lhs_stmt
);
4836 target_ssa
= new_lhs
;
4837 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
4838 powi_result
, target_ssa
);
4839 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4840 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4846 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4848 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4849 reassociate_bb (son
);
4852 /* Add jumps around shifts for range tests turned into bit tests.
4853 For each SSA_NAME VAR we have code like:
4854 VAR = ...; // final stmt of range comparison
4855 // bit test here...;
4856 OTHERVAR = ...; // final stmt of the bit test sequence
4857 RES = VAR | OTHERVAR;
4858 Turn the above into:
4865 // bit test here...;
4868 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4876 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
4878 gimple def_stmt
= SSA_NAME_DEF_STMT (var
);
4881 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
4883 && is_gimple_assign (use_stmt
)
4884 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
4885 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
4887 basic_block cond_bb
= gimple_bb (def_stmt
);
4888 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
4889 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
4891 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
4892 gimple g
= gimple_build_cond (NE_EXPR
, var
,
4893 build_zero_cst (TREE_TYPE (var
)),
4894 NULL_TREE
, NULL_TREE
);
4895 location_t loc
= gimple_location (use_stmt
);
4896 gimple_set_location (g
, loc
);
4897 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
4899 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
4900 etrue
->probability
= REG_BR_PROB_BASE
/ 2;
4901 etrue
->count
= cond_bb
->count
/ 2;
4902 edge efalse
= find_edge (cond_bb
, then_bb
);
4903 efalse
->flags
= EDGE_FALSE_VALUE
;
4904 efalse
->probability
-= etrue
->probability
;
4905 efalse
->count
-= etrue
->count
;
4906 then_bb
->count
-= etrue
->count
;
4908 tree othervar
= NULL_TREE
;
4909 if (gimple_assign_rhs1 (use_stmt
) == var
)
4910 othervar
= gimple_assign_rhs2 (use_stmt
);
4911 else if (gimple_assign_rhs2 (use_stmt
) == var
)
4912 othervar
= gimple_assign_rhs1 (use_stmt
);
4915 tree lhs
= gimple_assign_lhs (use_stmt
);
4916 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
4917 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
4918 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
4919 gsi
= gsi_for_stmt (use_stmt
);
4920 gsi_remove (&gsi
, true);
4922 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
4923 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
4925 reassoc_branch_fixups
.release ();
4928 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4929 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4931 /* Dump the operand entry vector OPS to FILE. */
4934 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4939 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4941 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4942 print_generic_expr (file
, oe
->op
, 0);
4946 /* Dump the operand entry vector OPS to STDERR. */
4949 debug_ops_vector (vec
<operand_entry_t
> ops
)
4951 dump_ops_vector (stderr
, ops
);
4957 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
4958 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
4961 /* Initialize the reassociation pass. */
4968 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
4970 /* Find the loops, so that we can prevent moving calculations in
4972 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
4974 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
4976 next_operand_entry_id
= 0;
4978 /* Reverse RPO (Reverse Post Order) will give us something where
4979 deeper loops come later. */
4980 pre_and_rev_post_order_compute (NULL
, bbs
, false);
4981 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
4982 operand_rank
= new hash_map
<tree
, long>;
4984 /* Give each default definition a distinct rank. This includes
4985 parameters and the static chain. Walk backwards over all
4986 SSA names so that we get proper rank ordering according
4987 to tree_swap_operands_p. */
4988 for (i
= num_ssa_names
- 1; i
> 0; --i
)
4990 tree name
= ssa_name (i
);
4991 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
4992 insert_operand_rank (name
, ++rank
);
4995 /* Set up rank for each BB */
4996 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
4997 bb_rank
[bbs
[i
]] = ++rank
<< 16;
5000 calculate_dominance_info (CDI_POST_DOMINATORS
);
5001 plus_negates
= vNULL
;
5004 /* Cleanup after the reassociation pass, and print stats if
5010 statistics_counter_event (cfun
, "Linearized",
5011 reassociate_stats
.linearized
);
5012 statistics_counter_event (cfun
, "Constants eliminated",
5013 reassociate_stats
.constants_eliminated
);
5014 statistics_counter_event (cfun
, "Ops eliminated",
5015 reassociate_stats
.ops_eliminated
);
5016 statistics_counter_event (cfun
, "Statements rewritten",
5017 reassociate_stats
.rewritten
);
5018 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
5019 reassociate_stats
.pows_encountered
);
5020 statistics_counter_event (cfun
, "Built-in powi calls created",
5021 reassociate_stats
.pows_created
);
5023 delete operand_rank
;
5024 operand_entry_pool
.release ();
5026 plus_negates
.release ();
5027 free_dominance_info (CDI_POST_DOMINATORS
);
5028 loop_optimizer_finalize ();
5031 /* Gate and execute functions for Reassociation. */
5034 execute_reassoc (void)
5039 repropagate_negates ();
5048 const pass_data pass_data_reassoc
=
5050 GIMPLE_PASS
, /* type */
5051 "reassoc", /* name */
5052 OPTGROUP_NONE
, /* optinfo_flags */
5053 TV_TREE_REASSOC
, /* tv_id */
5054 ( PROP_cfg
| PROP_ssa
), /* properties_required */
5055 0, /* properties_provided */
5056 0, /* properties_destroyed */
5057 0, /* todo_flags_start */
5058 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
5061 class pass_reassoc
: public gimple_opt_pass
5064 pass_reassoc (gcc::context
*ctxt
)
5065 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
5068 /* opt_pass methods: */
5069 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
5070 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
5071 virtual unsigned int execute (function
*) { return execute_reassoc (); }
5073 }; // class pass_reassoc
5078 make_pass_reassoc (gcc::context
*ctxt
)
5080 return new pass_reassoc (ctxt
);