1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "fold-const.h"
31 #include "stor-layout.h"
33 #include "hard-reg-set.h"
35 #include "dominance.h"
38 #include "basic-block.h"
39 #include "gimple-pretty-print.h"
40 #include "tree-inline.h"
41 #include "tree-ssa-alias.h"
42 #include "internal-fn.h"
43 #include "gimple-fold.h"
45 #include "gimple-expr.h"
47 #include "gimple-iterator.h"
48 #include "gimplify-me.h"
49 #include "gimple-ssa.h"
51 #include "tree-phinodes.h"
52 #include "ssa-iterators.h"
53 #include "stringpool.h"
54 #include "tree-ssanames.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "tree-ssa-loop.h"
58 #include "insn-config.h"
69 #include "tree-iterator.h"
70 #include "tree-pass.h"
71 #include "alloc-pool.h"
72 #include "langhooks.h"
76 #include "diagnostic-core.h"
79 #include "insn-codes.h"
82 /* This is a simple global reassociation pass. It is, in part, based
83 on the LLVM pass of the same name (They do some things more/less
84 than we do, in different orders, etc).
86 It consists of five steps:
88 1. Breaking up subtract operations into addition + negate, where
89 it would promote the reassociation of adds.
91 2. Left linearization of the expression trees, so that (A+B)+(C+D)
92 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
93 During linearization, we place the operands of the binary
94 expressions into a vector of operand_entry_t
96 3. Optimization of the operand lists, eliminating things like a +
99 3a. Combine repeated factors with the same occurrence counts
100 into a __builtin_powi call that will later be optimized into
101 an optimal number of multiplies.
103 4. Rewrite the expression trees we linearized and optimized so
104 they are in proper rank order.
106 5. Repropagate negates, as nothing else will clean it up ATM.
108 A bit of theory on #4, since nobody seems to write anything down
109 about why it makes sense to do it the way they do it:
111 We could do this much nicer theoretically, but don't (for reasons
112 explained after how to do it theoretically nice :P).
114 In order to promote the most redundancy elimination, you want
115 binary expressions whose operands are the same rank (or
116 preferably, the same value) exposed to the redundancy eliminator,
117 for possible elimination.
119 So the way to do this if we really cared, is to build the new op
120 tree from the leaves to the roots, merging as you go, and putting the
121 new op on the end of the worklist, until you are left with one
122 thing on the worklist.
124 IE if you have to rewrite the following set of operands (listed with
125 rank in parentheses), with opcode PLUS_EXPR:
127 a (1), b (1), c (1), d (2), e (2)
130 We start with our merge worklist empty, and the ops list with all of
133 You want to first merge all leaves of the same rank, as much as
136 So first build a binary op of
138 mergetmp = a + b, and put "mergetmp" on the merge worklist.
140 Because there is no three operand form of PLUS_EXPR, c is not going to
141 be exposed to redundancy elimination as a rank 1 operand.
143 So you might as well throw it on the merge worklist (you could also
144 consider it to now be a rank two operand, and merge it with d and e,
145 but in this case, you then have evicted e from a binary op. So at
146 least in this situation, you can't win.)
148 Then build a binary op of d + e
151 and put mergetmp2 on the merge worklist.
153 so merge worklist = {mergetmp, c, mergetmp2}
155 Continue building binary ops of these operations until you have only
156 one operation left on the worklist.
161 mergetmp3 = mergetmp + c
163 worklist = {mergetmp2, mergetmp3}
165 mergetmp4 = mergetmp2 + mergetmp3
167 worklist = {mergetmp4}
169 because we have one operation left, we can now just set the original
170 statement equal to the result of that operation.
172 This will at least expose a + b and d + e to redundancy elimination
173 as binary operations.
175 For extra points, you can reuse the old statements to build the
176 mergetmps, since you shouldn't run out.
178 So why don't we do this?
180 Because it's expensive, and rarely will help. Most trees we are
181 reassociating have 3 or less ops. If they have 2 ops, they already
182 will be written into a nice single binary op. If you have 3 ops, a
183 single simple check suffices to tell you whether the first two are of the
184 same rank. If so, you know to order it
187 newstmt = mergetmp + op3
191 newstmt = mergetmp + op1
193 If all three are of the same rank, you can't expose them all in a
194 single binary operator anyway, so the above is *still* the best you
197 Thus, this is what we do. When we have three ops left, we check to see
198 what order to put them in, and call it a day. As a nod to vector sum
199 reduction, we check if any of the ops are really a phi node that is a
200 destructive update for the associating op, and keep the destructive
201 update together for vector sum reduction recognition. */
208 int constants_eliminated
;
211 int pows_encountered
;
215 /* Operator, rank pair. */
216 typedef struct operand_entry
224 static pool_allocator
<operand_entry
> operand_entry_pool ("operand entry pool",
227 /* This is used to assign a unique ID to each struct operand_entry
228 so that qsort results are identical on different hosts. */
229 static int next_operand_entry_id
;
231 /* Starting rank number for a given basic block, so that we can rank
232 operations using unmovable instructions in that BB based on the bb
234 static long *bb_rank
;
236 /* Operand->rank hashtable. */
237 static hash_map
<tree
, long> *operand_rank
;
239 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
240 all basic blocks the CFG should be adjusted - basic blocks
241 split right after that SSA_NAME's definition statement and before
242 the only use, which must be a bit ior. */
243 static vec
<tree
> reassoc_branch_fixups
;
246 static long get_rank (tree
);
247 static bool reassoc_stmt_dominates_stmt_p (gimple
, gimple
);
249 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
250 possibly added by gsi_remove. */
253 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
255 gimple stmt
= gsi_stmt (*gsi
);
257 if (!MAY_HAVE_DEBUG_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
258 return gsi_remove (gsi
, true);
260 gimple_stmt_iterator prev
= *gsi
;
262 unsigned uid
= gimple_uid (stmt
);
263 basic_block bb
= gimple_bb (stmt
);
264 bool ret
= gsi_remove (gsi
, true);
265 if (!gsi_end_p (prev
))
268 prev
= gsi_start_bb (bb
);
269 gimple end_stmt
= gsi_stmt (*gsi
);
270 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
272 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
273 gimple_set_uid (stmt
, uid
);
279 /* Bias amount for loop-carried phis. We want this to be larger than
280 the depth of any reassociation tree we can see, but not larger than
281 the rank difference between two blocks. */
282 #define PHI_LOOP_BIAS (1 << 15)
284 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
285 an innermost loop, and the phi has only a single use which is inside
286 the loop, then the rank is the block rank of the loop latch plus an
287 extra bias for the loop-carried dependence. This causes expressions
288 calculated into an accumulator variable to be independent for each
289 iteration of the loop. If STMT is some other phi, the rank is the
290 block rank of its containing block. */
292 phi_rank (gimple stmt
)
294 basic_block bb
= gimple_bb (stmt
);
295 struct loop
*father
= bb
->loop_father
;
301 /* We only care about real loops (those with a latch). */
303 return bb_rank
[bb
->index
];
305 /* Interesting phis must be in headers of innermost loops. */
306 if (bb
!= father
->header
308 return bb_rank
[bb
->index
];
310 /* Ignore virtual SSA_NAMEs. */
311 res
= gimple_phi_result (stmt
);
312 if (virtual_operand_p (res
))
313 return bb_rank
[bb
->index
];
315 /* The phi definition must have a single use, and that use must be
316 within the loop. Otherwise this isn't an accumulator pattern. */
317 if (!single_imm_use (res
, &use
, &use_stmt
)
318 || gimple_bb (use_stmt
)->loop_father
!= father
)
319 return bb_rank
[bb
->index
];
321 /* Look for phi arguments from within the loop. If found, bias this phi. */
322 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
324 tree arg
= gimple_phi_arg_def (stmt
, i
);
325 if (TREE_CODE (arg
) == SSA_NAME
326 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
328 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
329 if (gimple_bb (def_stmt
)->loop_father
== father
)
330 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
334 /* Must be an uninteresting phi. */
335 return bb_rank
[bb
->index
];
338 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
339 loop-carried dependence of an innermost loop, return TRUE; else
342 loop_carried_phi (tree exp
)
347 if (TREE_CODE (exp
) != SSA_NAME
348 || SSA_NAME_IS_DEFAULT_DEF (exp
))
351 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
353 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
356 /* Non-loop-carried phis have block rank. Loop-carried phis have
357 an additional bias added in. If this phi doesn't have block rank,
358 it's biased and should not be propagated. */
359 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
361 if (phi_rank (phi_stmt
) != block_rank
)
367 /* Return the maximum of RANK and the rank that should be propagated
368 from expression OP. For most operands, this is just the rank of OP.
369 For loop-carried phis, the value is zero to avoid undoing the bias
370 in favor of the phi. */
372 propagate_rank (long rank
, tree op
)
376 if (loop_carried_phi (op
))
379 op_rank
= get_rank (op
);
381 return MAX (rank
, op_rank
);
384 /* Look up the operand rank structure for expression E. */
387 find_operand_rank (tree e
)
389 long *slot
= operand_rank
->get (e
);
390 return slot
? *slot
: -1;
393 /* Insert {E,RANK} into the operand rank hashtable. */
396 insert_operand_rank (tree e
, long rank
)
398 gcc_assert (rank
> 0);
399 gcc_assert (!operand_rank
->put (e
, rank
));
402 /* Given an expression E, return the rank of the expression. */
407 /* SSA_NAME's have the rank of the expression they are the result
409 For globals and uninitialized values, the rank is 0.
410 For function arguments, use the pre-setup rank.
411 For PHI nodes, stores, asm statements, etc, we use the rank of
413 For simple operations, the rank is the maximum rank of any of
414 its operands, or the bb_rank, whichever is less.
415 I make no claims that this is optimal, however, it gives good
418 /* We make an exception to the normal ranking system to break
419 dependences of accumulator variables in loops. Suppose we
420 have a simple one-block loop containing:
427 As shown, each iteration of the calculation into x is fully
428 dependent upon the iteration before it. We would prefer to
429 see this in the form:
436 If the loop is unrolled, the calculations of b and c from
437 different iterations can be interleaved.
439 To obtain this result during reassociation, we bias the rank
440 of the phi definition x_1 upward, when it is recognized as an
441 accumulator pattern. The artificial rank causes it to be
442 added last, providing the desired independence. */
444 if (TREE_CODE (e
) == SSA_NAME
)
451 if (SSA_NAME_IS_DEFAULT_DEF (e
))
452 return find_operand_rank (e
);
454 stmt
= SSA_NAME_DEF_STMT (e
);
455 if (gimple_code (stmt
) == GIMPLE_PHI
)
456 return phi_rank (stmt
);
458 if (!is_gimple_assign (stmt
))
459 return bb_rank
[gimple_bb (stmt
)->index
];
461 /* If we already have a rank for this expression, use that. */
462 rank
= find_operand_rank (e
);
466 /* Otherwise, find the maximum rank for the operands. As an
467 exception, remove the bias from loop-carried phis when propagating
468 the rank so that dependent operations are not also biased. */
469 /* Simply walk over all SSA uses - this takes advatage of the
470 fact that non-SSA operands are is_gimple_min_invariant and
473 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
474 rank
= propagate_rank (rank
, op
);
476 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
478 fprintf (dump_file
, "Rank for ");
479 print_generic_expr (dump_file
, e
, 0);
480 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
483 /* Note the rank in the hashtable so we don't recompute it. */
484 insert_operand_rank (e
, (rank
+ 1));
488 /* Constants, globals, etc., are rank 0 */
493 /* We want integer ones to end up last no matter what, since they are
494 the ones we can do the most with. */
495 #define INTEGER_CONST_TYPE 1 << 3
496 #define FLOAT_CONST_TYPE 1 << 2
497 #define OTHER_CONST_TYPE 1 << 1
499 /* Classify an invariant tree into integer, float, or other, so that
500 we can sort them to be near other constants of the same type. */
502 constant_type (tree t
)
504 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
505 return INTEGER_CONST_TYPE
;
506 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
507 return FLOAT_CONST_TYPE
;
509 return OTHER_CONST_TYPE
;
512 /* qsort comparison function to sort operand entries PA and PB by rank
513 so that the sorted array is ordered by rank in decreasing order. */
515 sort_by_operand_rank (const void *pa
, const void *pb
)
517 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
518 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
520 /* It's nicer for optimize_expression if constants that are likely
521 to fold when added/multiplied//whatever are put next to each
522 other. Since all constants have rank 0, order them by type. */
523 if (oeb
->rank
== 0 && oea
->rank
== 0)
525 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
526 return constant_type (oeb
->op
) - constant_type (oea
->op
);
528 /* To make sorting result stable, we use unique IDs to determine
530 return oeb
->id
- oea
->id
;
533 /* Lastly, make sure the versions that are the same go next to each
535 if ((oeb
->rank
- oea
->rank
== 0)
536 && TREE_CODE (oea
->op
) == SSA_NAME
537 && TREE_CODE (oeb
->op
) == SSA_NAME
)
539 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
540 versions of removed SSA_NAMEs, so if possible, prefer to sort
541 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
543 if (!SSA_NAME_IS_DEFAULT_DEF (oea
->op
)
544 && !SSA_NAME_IS_DEFAULT_DEF (oeb
->op
)
545 && SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
547 gimple stmta
= SSA_NAME_DEF_STMT (oea
->op
);
548 gimple stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
549 basic_block bba
= gimple_bb (stmta
);
550 basic_block bbb
= gimple_bb (stmtb
);
553 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
554 return bb_rank
[bbb
->index
] - bb_rank
[bba
->index
];
558 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
559 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
565 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
566 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
568 return oeb
->id
- oea
->id
;
571 if (oeb
->rank
!= oea
->rank
)
572 return oeb
->rank
- oea
->rank
;
574 return oeb
->id
- oea
->id
;
577 /* Add an operand entry to *OPS for the tree operand OP. */
580 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
582 operand_entry_t oe
= operand_entry_pool
.allocate ();
585 oe
->rank
= get_rank (op
);
586 oe
->id
= next_operand_entry_id
++;
591 /* Add an operand entry to *OPS for the tree operand OP with repeat
595 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
596 HOST_WIDE_INT repeat
)
598 operand_entry_t oe
= operand_entry_pool
.allocate ();
601 oe
->rank
= get_rank (op
);
602 oe
->id
= next_operand_entry_id
++;
606 reassociate_stats
.pows_encountered
++;
609 /* Return true if STMT is reassociable operation containing a binary
610 operation with tree code CODE, and is inside LOOP. */
613 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
615 basic_block bb
= gimple_bb (stmt
);
617 if (gimple_bb (stmt
) == NULL
)
620 if (!flow_bb_inside_loop_p (loop
, bb
))
623 if (is_gimple_assign (stmt
)
624 && gimple_assign_rhs_code (stmt
) == code
625 && has_single_use (gimple_assign_lhs (stmt
)))
632 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
633 operand of the negate operation. Otherwise, return NULL. */
636 get_unary_op (tree name
, enum tree_code opcode
)
638 gimple stmt
= SSA_NAME_DEF_STMT (name
);
640 if (!is_gimple_assign (stmt
))
643 if (gimple_assign_rhs_code (stmt
) == opcode
)
644 return gimple_assign_rhs1 (stmt
);
648 /* If CURR and LAST are a pair of ops that OPCODE allows us to
649 eliminate through equivalences, do so, remove them from OPS, and
650 return true. Otherwise, return false. */
653 eliminate_duplicate_pair (enum tree_code opcode
,
654 vec
<operand_entry_t
> *ops
,
657 operand_entry_t curr
,
658 operand_entry_t last
)
661 /* If we have two of the same op, and the opcode is & |, min, or max,
662 we can eliminate one of them.
663 If we have two of the same op, and the opcode is ^, we can
664 eliminate both of them. */
666 if (last
&& last
->op
== curr
->op
)
674 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
676 fprintf (dump_file
, "Equivalence: ");
677 print_generic_expr (dump_file
, curr
->op
, 0);
678 fprintf (dump_file
, " [&|minmax] ");
679 print_generic_expr (dump_file
, last
->op
, 0);
680 fprintf (dump_file
, " -> ");
681 print_generic_stmt (dump_file
, last
->op
, 0);
684 ops
->ordered_remove (i
);
685 reassociate_stats
.ops_eliminated
++;
690 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
692 fprintf (dump_file
, "Equivalence: ");
693 print_generic_expr (dump_file
, curr
->op
, 0);
694 fprintf (dump_file
, " ^ ");
695 print_generic_expr (dump_file
, last
->op
, 0);
696 fprintf (dump_file
, " -> nothing\n");
699 reassociate_stats
.ops_eliminated
+= 2;
701 if (ops
->length () == 2)
704 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
709 ops
->ordered_remove (i
-1);
710 ops
->ordered_remove (i
-1);
722 static vec
<tree
> plus_negates
;
724 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
725 expression, look in OPS for a corresponding positive operation to cancel
726 it out. If we find one, remove the other from OPS, replace
727 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
731 eliminate_plus_minus_pair (enum tree_code opcode
,
732 vec
<operand_entry_t
> *ops
,
733 unsigned int currindex
,
734 operand_entry_t curr
)
741 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
744 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
745 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
746 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
749 /* Any non-negated version will have a rank that is one less than
750 the current rank. So once we hit those ranks, if we don't find
753 for (i
= currindex
+ 1;
754 ops
->iterate (i
, &oe
)
755 && oe
->rank
>= curr
->rank
- 1 ;
758 if (oe
->op
== negateop
)
761 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
763 fprintf (dump_file
, "Equivalence: ");
764 print_generic_expr (dump_file
, negateop
, 0);
765 fprintf (dump_file
, " + -");
766 print_generic_expr (dump_file
, oe
->op
, 0);
767 fprintf (dump_file
, " -> 0\n");
770 ops
->ordered_remove (i
);
771 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
772 ops
->ordered_remove (currindex
);
773 reassociate_stats
.ops_eliminated
++;
777 else if (oe
->op
== notop
)
779 tree op_type
= TREE_TYPE (oe
->op
);
781 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
783 fprintf (dump_file
, "Equivalence: ");
784 print_generic_expr (dump_file
, notop
, 0);
785 fprintf (dump_file
, " + ~");
786 print_generic_expr (dump_file
, oe
->op
, 0);
787 fprintf (dump_file
, " -> -1\n");
790 ops
->ordered_remove (i
);
791 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
792 ops
->ordered_remove (currindex
);
793 reassociate_stats
.ops_eliminated
++;
799 /* CURR->OP is a negate expr in a plus expr: save it for later
800 inspection in repropagate_negates(). */
801 if (negateop
!= NULL_TREE
)
802 plus_negates
.safe_push (curr
->op
);
807 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
808 bitwise not expression, look in OPS for a corresponding operand to
809 cancel it out. If we find one, remove the other from OPS, replace
810 OPS[CURRINDEX] with 0, and return true. Otherwise, return
814 eliminate_not_pairs (enum tree_code opcode
,
815 vec
<operand_entry_t
> *ops
,
816 unsigned int currindex
,
817 operand_entry_t curr
)
823 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
824 || TREE_CODE (curr
->op
) != SSA_NAME
)
827 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
828 if (notop
== NULL_TREE
)
831 /* Any non-not version will have a rank that is one less than
832 the current rank. So once we hit those ranks, if we don't find
835 for (i
= currindex
+ 1;
836 ops
->iterate (i
, &oe
)
837 && oe
->rank
>= curr
->rank
- 1;
842 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
844 fprintf (dump_file
, "Equivalence: ");
845 print_generic_expr (dump_file
, notop
, 0);
846 if (opcode
== BIT_AND_EXPR
)
847 fprintf (dump_file
, " & ~");
848 else if (opcode
== BIT_IOR_EXPR
)
849 fprintf (dump_file
, " | ~");
850 print_generic_expr (dump_file
, oe
->op
, 0);
851 if (opcode
== BIT_AND_EXPR
)
852 fprintf (dump_file
, " -> 0\n");
853 else if (opcode
== BIT_IOR_EXPR
)
854 fprintf (dump_file
, " -> -1\n");
857 if (opcode
== BIT_AND_EXPR
)
858 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
859 else if (opcode
== BIT_IOR_EXPR
)
860 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
862 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
864 ops
->quick_push (oe
);
872 /* Use constant value that may be present in OPS to try to eliminate
873 operands. Note that this function is only really used when we've
874 eliminated ops for other reasons, or merged constants. Across
875 single statements, fold already does all of this, plus more. There
876 is little point in duplicating logic, so I've only included the
877 identities that I could ever construct testcases to trigger. */
880 eliminate_using_constants (enum tree_code opcode
,
881 vec
<operand_entry_t
> *ops
)
883 operand_entry_t oelast
= ops
->last ();
884 tree type
= TREE_TYPE (oelast
->op
);
886 if (oelast
->rank
== 0
887 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
892 if (integer_zerop (oelast
->op
))
894 if (ops
->length () != 1)
896 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
897 fprintf (dump_file
, "Found & 0, removing all other ops\n");
899 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
902 ops
->quick_push (oelast
);
906 else if (integer_all_onesp (oelast
->op
))
908 if (ops
->length () != 1)
910 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
911 fprintf (dump_file
, "Found & -1, removing\n");
913 reassociate_stats
.ops_eliminated
++;
918 if (integer_all_onesp (oelast
->op
))
920 if (ops
->length () != 1)
922 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
923 fprintf (dump_file
, "Found | -1, removing all other ops\n");
925 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
928 ops
->quick_push (oelast
);
932 else if (integer_zerop (oelast
->op
))
934 if (ops
->length () != 1)
936 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
937 fprintf (dump_file
, "Found | 0, removing\n");
939 reassociate_stats
.ops_eliminated
++;
944 if (integer_zerop (oelast
->op
)
945 || (FLOAT_TYPE_P (type
)
946 && !HONOR_NANS (type
)
947 && !HONOR_SIGNED_ZEROS (type
)
948 && real_zerop (oelast
->op
)))
950 if (ops
->length () != 1)
952 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
953 fprintf (dump_file
, "Found * 0, removing all other ops\n");
955 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
957 ops
->quick_push (oelast
);
961 else if (integer_onep (oelast
->op
)
962 || (FLOAT_TYPE_P (type
)
963 && !HONOR_SNANS (type
)
964 && real_onep (oelast
->op
)))
966 if (ops
->length () != 1)
968 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
969 fprintf (dump_file
, "Found * 1, removing\n");
971 reassociate_stats
.ops_eliminated
++;
979 if (integer_zerop (oelast
->op
)
980 || (FLOAT_TYPE_P (type
)
981 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
982 && fold_real_zero_addition_p (type
, oelast
->op
,
983 opcode
== MINUS_EXPR
)))
985 if (ops
->length () != 1)
987 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
988 fprintf (dump_file
, "Found [|^+] 0, removing\n");
990 reassociate_stats
.ops_eliminated
++;
1002 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
1005 /* Structure for tracking and counting operands. */
1006 typedef struct oecount_s
{
1009 enum tree_code oecode
;
1014 /* The heap for the oecount hashtable and the sorted list of operands. */
1015 static vec
<oecount
> cvec
;
1018 /* Oecount hashtable helpers. */
1020 struct oecount_hasher
1022 typedef int value_type
;
1023 typedef int compare_type
;
1024 static inline hashval_t
hash (const value_type
&);
1025 static inline bool equal (const value_type
&, const compare_type
&);
1026 static bool is_deleted (int &v
) { return v
== 1; }
1027 static void mark_deleted (int &e
) { e
= 1; }
1028 static bool is_empty (int &v
) { return v
== 0; }
1029 static void mark_empty (int &e
) { e
= 0; }
1030 static void remove (int &) {}
1033 /* Hash function for oecount. */
1036 oecount_hasher::hash (const value_type
&p
)
1038 const oecount
*c
= &cvec
[p
- 42];
1039 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1042 /* Comparison function for oecount. */
1045 oecount_hasher::equal (const value_type
&p1
, const compare_type
&p2
)
1047 const oecount
*c1
= &cvec
[p1
- 42];
1048 const oecount
*c2
= &cvec
[p2
- 42];
1049 return (c1
->oecode
== c2
->oecode
1050 && c1
->op
== c2
->op
);
1053 /* Comparison function for qsort sorting oecount elements by count. */
1056 oecount_cmp (const void *p1
, const void *p2
)
1058 const oecount
*c1
= (const oecount
*)p1
;
1059 const oecount
*c2
= (const oecount
*)p2
;
1060 if (c1
->cnt
!= c2
->cnt
)
1061 return c1
->cnt
- c2
->cnt
;
1063 /* If counts are identical, use unique IDs to stabilize qsort. */
1064 return c1
->id
- c2
->id
;
1067 /* Return TRUE iff STMT represents a builtin call that raises OP
1068 to some exponent. */
1071 stmt_is_power_of_op (gimple stmt
, tree op
)
1075 if (!is_gimple_call (stmt
))
1078 fndecl
= gimple_call_fndecl (stmt
);
1081 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1084 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1086 CASE_FLT_FN (BUILT_IN_POW
):
1087 CASE_FLT_FN (BUILT_IN_POWI
):
1088 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1095 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1096 in place and return the result. Assumes that stmt_is_power_of_op
1097 was previously called for STMT and returned TRUE. */
1099 static HOST_WIDE_INT
1100 decrement_power (gimple stmt
)
1102 REAL_VALUE_TYPE c
, cint
;
1103 HOST_WIDE_INT power
;
1106 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1108 CASE_FLT_FN (BUILT_IN_POW
):
1109 arg1
= gimple_call_arg (stmt
, 1);
1110 c
= TREE_REAL_CST (arg1
);
1111 power
= real_to_integer (&c
) - 1;
1112 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1113 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1116 CASE_FLT_FN (BUILT_IN_POWI
):
1117 arg1
= gimple_call_arg (stmt
, 1);
1118 power
= TREE_INT_CST_LOW (arg1
) - 1;
1119 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1127 /* Find the single immediate use of STMT's LHS, and replace it
1128 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1129 replace *DEF with OP as well. */
1132 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1137 gimple_stmt_iterator gsi
;
1139 if (is_gimple_call (stmt
))
1140 lhs
= gimple_call_lhs (stmt
);
1142 lhs
= gimple_assign_lhs (stmt
);
1144 gcc_assert (has_single_use (lhs
));
1145 single_imm_use (lhs
, &use
, &use_stmt
);
1149 if (TREE_CODE (op
) != SSA_NAME
)
1150 update_stmt (use_stmt
);
1151 gsi
= gsi_for_stmt (stmt
);
1152 unlink_stmt_vdef (stmt
);
1153 reassoc_remove_stmt (&gsi
);
1154 release_defs (stmt
);
1157 /* Walks the linear chain with result *DEF searching for an operation
1158 with operand OP and code OPCODE removing that from the chain. *DEF
1159 is updated if there is only one operand but no operation left. */
1162 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1164 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1170 if (opcode
== MULT_EXPR
1171 && stmt_is_power_of_op (stmt
, op
))
1173 if (decrement_power (stmt
) == 1)
1174 propagate_op_to_single_use (op
, stmt
, def
);
1178 name
= gimple_assign_rhs1 (stmt
);
1180 /* If this is the operation we look for and one of the operands
1181 is ours simply propagate the other operand into the stmts
1183 if (gimple_assign_rhs_code (stmt
) == opcode
1185 || gimple_assign_rhs2 (stmt
) == op
))
1188 name
= gimple_assign_rhs2 (stmt
);
1189 propagate_op_to_single_use (name
, stmt
, def
);
1193 /* We might have a multiply of two __builtin_pow* calls, and
1194 the operand might be hiding in the rightmost one. */
1195 if (opcode
== MULT_EXPR
1196 && gimple_assign_rhs_code (stmt
) == opcode
1197 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1198 && has_single_use (gimple_assign_rhs2 (stmt
)))
1200 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1201 if (stmt_is_power_of_op (stmt2
, op
))
1203 if (decrement_power (stmt2
) == 1)
1204 propagate_op_to_single_use (op
, stmt2
, def
);
1209 /* Continue walking the chain. */
1210 gcc_assert (name
!= op
1211 && TREE_CODE (name
) == SSA_NAME
);
1212 stmt
= SSA_NAME_DEF_STMT (name
);
1217 /* Returns true if statement S1 dominates statement S2. Like
1218 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1221 reassoc_stmt_dominates_stmt_p (gimple s1
, gimple s2
)
1223 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1225 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1226 SSA_NAME. Assume it lives at the beginning of function and
1227 thus dominates everything. */
1228 if (!bb1
|| s1
== s2
)
1231 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1237 /* PHIs in the same basic block are assumed to be
1238 executed all in parallel, if only one stmt is a PHI,
1239 it dominates the other stmt in the same basic block. */
1240 if (gimple_code (s1
) == GIMPLE_PHI
)
1243 if (gimple_code (s2
) == GIMPLE_PHI
)
1246 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1248 if (gimple_uid (s1
) < gimple_uid (s2
))
1251 if (gimple_uid (s1
) > gimple_uid (s2
))
1254 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1255 unsigned int uid
= gimple_uid (s1
);
1256 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1258 gimple s
= gsi_stmt (gsi
);
1259 if (gimple_uid (s
) != uid
)
1268 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1271 /* Insert STMT after INSERT_POINT. */
1274 insert_stmt_after (gimple stmt
, gimple insert_point
)
1276 gimple_stmt_iterator gsi
;
1279 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1280 bb
= gimple_bb (insert_point
);
1281 else if (!stmt_ends_bb_p (insert_point
))
1283 gsi
= gsi_for_stmt (insert_point
);
1284 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1285 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1289 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1290 thus if it must end a basic block, it should be a call that can
1291 throw, or some assignment that can throw. If it throws, the LHS
1292 of it will not be initialized though, so only valid places using
1293 the SSA_NAME should be dominated by the fallthru edge. */
1294 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1295 gsi
= gsi_after_labels (bb
);
1296 if (gsi_end_p (gsi
))
1298 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1299 gimple_set_uid (stmt
,
1300 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1303 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1304 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1307 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1308 the result. Places the statement after the definition of either
1309 OP1 or OP2. Returns the new statement. */
1312 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1314 gimple op1def
= NULL
, op2def
= NULL
;
1315 gimple_stmt_iterator gsi
;
1319 /* Create the addition statement. */
1320 op
= make_ssa_name (type
);
1321 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1323 /* Find an insertion place and insert. */
1324 if (TREE_CODE (op1
) == SSA_NAME
)
1325 op1def
= SSA_NAME_DEF_STMT (op1
);
1326 if (TREE_CODE (op2
) == SSA_NAME
)
1327 op2def
= SSA_NAME_DEF_STMT (op2
);
1328 if ((!op1def
|| gimple_nop_p (op1def
))
1329 && (!op2def
|| gimple_nop_p (op2def
)))
1331 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1332 if (gsi_end_p (gsi
))
1334 gimple_stmt_iterator gsi2
1335 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1336 gimple_set_uid (sum
,
1337 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1340 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1341 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1345 gimple insert_point
;
1346 if ((!op1def
|| gimple_nop_p (op1def
))
1347 || (op2def
&& !gimple_nop_p (op2def
)
1348 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1349 insert_point
= op2def
;
1351 insert_point
= op1def
;
1352 insert_stmt_after (sum
, insert_point
);
1359 /* Perform un-distribution of divisions and multiplications.
1360 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1361 to (A + B) / X for real X.
1363 The algorithm is organized as follows.
1365 - First we walk the addition chain *OPS looking for summands that
1366 are defined by a multiplication or a real division. This results
1367 in the candidates bitmap with relevant indices into *OPS.
1369 - Second we build the chains of multiplications or divisions for
1370 these candidates, counting the number of occurrences of (operand, code)
1371 pairs in all of the candidates chains.
1373 - Third we sort the (operand, code) pairs by number of occurrence and
1374 process them starting with the pair with the most uses.
1376 * For each such pair we walk the candidates again to build a
1377 second candidate bitmap noting all multiplication/division chains
1378 that have at least one occurrence of (operand, code).
1380 * We build an alternate addition chain only covering these
1381 candidates with one (operand, code) operation removed from their
1382 multiplication/division chain.
1384 * The first candidate gets replaced by the alternate addition chain
1385 multiplied/divided by the operand.
1387 * All candidate chains get disabled for further processing and
1388 processing of (operand, code) pairs continues.
1390 The alternate addition chains built are re-processed by the main
1391 reassociation algorithm which allows optimizing a * x * y + b * y * x
1392 to (a + b ) * x * y in one invocation of the reassociation pass. */
1395 undistribute_ops_list (enum tree_code opcode
,
1396 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1398 unsigned int length
= ops
->length ();
1399 operand_entry_t oe1
;
1401 sbitmap candidates
, candidates2
;
1402 unsigned nr_candidates
, nr_candidates2
;
1403 sbitmap_iterator sbi0
;
1404 vec
<operand_entry_t
> *subops
;
1405 bool changed
= false;
1406 int next_oecount_id
= 0;
1409 || opcode
!= PLUS_EXPR
)
1412 /* Build a list of candidates to process. */
1413 candidates
= sbitmap_alloc (length
);
1414 bitmap_clear (candidates
);
1416 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1418 enum tree_code dcode
;
1421 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1423 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1424 if (!is_gimple_assign (oe1def
))
1426 dcode
= gimple_assign_rhs_code (oe1def
);
1427 if ((dcode
!= MULT_EXPR
1428 && dcode
!= RDIV_EXPR
)
1429 || !is_reassociable_op (oe1def
, dcode
, loop
))
1432 bitmap_set_bit (candidates
, i
);
1436 if (nr_candidates
< 2)
1438 sbitmap_free (candidates
);
1442 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1444 fprintf (dump_file
, "searching for un-distribute opportunities ");
1445 print_generic_expr (dump_file
,
1446 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1447 fprintf (dump_file
, " %d\n", nr_candidates
);
1450 /* Build linearized sub-operand lists and the counting table. */
1453 hash_table
<oecount_hasher
> ctable (15);
1455 /* ??? Macro arguments cannot have multi-argument template types in
1456 them. This typedef is needed to workaround that limitation. */
1457 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1458 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1459 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1462 enum tree_code oecode
;
1465 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1466 oecode
= gimple_assign_rhs_code (oedef
);
1467 linearize_expr_tree (&subops
[i
], oedef
,
1468 associative_tree_code (oecode
), false);
1470 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1477 c
.id
= next_oecount_id
++;
1480 idx
= cvec
.length () + 41;
1481 slot
= ctable
.find_slot (idx
, INSERT
);
1489 cvec
[*slot
- 42].cnt
++;
1494 /* Sort the counting table. */
1495 cvec
.qsort (oecount_cmp
);
1497 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1500 fprintf (dump_file
, "Candidates:\n");
1501 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1503 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1504 c
->oecode
== MULT_EXPR
1505 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1506 print_generic_expr (dump_file
, c
->op
, 0);
1507 fprintf (dump_file
, "\n");
1511 /* Process the (operand, code) pairs in order of most occurrence. */
1512 candidates2
= sbitmap_alloc (length
);
1513 while (!cvec
.is_empty ())
1515 oecount
*c
= &cvec
.last ();
1519 /* Now collect the operands in the outer chain that contain
1520 the common operand in their inner chain. */
1521 bitmap_clear (candidates2
);
1523 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1526 enum tree_code oecode
;
1528 tree op
= (*ops
)[i
]->op
;
1530 /* If we undistributed in this chain already this may be
1532 if (TREE_CODE (op
) != SSA_NAME
)
1535 oedef
= SSA_NAME_DEF_STMT (op
);
1536 oecode
= gimple_assign_rhs_code (oedef
);
1537 if (oecode
!= c
->oecode
)
1540 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1542 if (oe1
->op
== c
->op
)
1544 bitmap_set_bit (candidates2
, i
);
1551 if (nr_candidates2
>= 2)
1553 operand_entry_t oe1
, oe2
;
1555 int first
= bitmap_first_set_bit (candidates2
);
1557 /* Build the new addition chain. */
1558 oe1
= (*ops
)[first
];
1559 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1561 fprintf (dump_file
, "Building (");
1562 print_generic_expr (dump_file
, oe1
->op
, 0);
1564 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1565 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1569 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1571 fprintf (dump_file
, " + ");
1572 print_generic_expr (dump_file
, oe2
->op
, 0);
1574 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1575 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1576 oe1
->op
, oe2
->op
, opcode
);
1577 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1579 oe1
->op
= gimple_get_lhs (sum
);
1582 /* Apply the multiplication/division. */
1583 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1584 oe1
->op
, c
->op
, c
->oecode
);
1585 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1587 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1588 print_generic_expr (dump_file
, c
->op
, 0);
1589 fprintf (dump_file
, "\n");
1592 /* Record it in the addition chain and disable further
1593 undistribution with this op. */
1594 oe1
->op
= gimple_assign_lhs (prod
);
1595 oe1
->rank
= get_rank (oe1
->op
);
1596 subops
[first
].release ();
1604 for (i
= 0; i
< ops
->length (); ++i
)
1605 subops
[i
].release ();
1608 sbitmap_free (candidates
);
1609 sbitmap_free (candidates2
);
1614 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1615 expression, examine the other OPS to see if any of them are comparisons
1616 of the same values, which we may be able to combine or eliminate.
1617 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1620 eliminate_redundant_comparison (enum tree_code opcode
,
1621 vec
<operand_entry_t
> *ops
,
1622 unsigned int currindex
,
1623 operand_entry_t curr
)
1626 enum tree_code lcode
, rcode
;
1631 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1634 /* Check that CURR is a comparison. */
1635 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1637 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1638 if (!is_gimple_assign (def1
))
1640 lcode
= gimple_assign_rhs_code (def1
);
1641 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1643 op1
= gimple_assign_rhs1 (def1
);
1644 op2
= gimple_assign_rhs2 (def1
);
1646 /* Now look for a similar comparison in the remaining OPS. */
1647 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1651 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1653 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1654 if (!is_gimple_assign (def2
))
1656 rcode
= gimple_assign_rhs_code (def2
);
1657 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1660 /* If we got here, we have a match. See if we can combine the
1662 if (opcode
== BIT_IOR_EXPR
)
1663 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1664 rcode
, gimple_assign_rhs1 (def2
),
1665 gimple_assign_rhs2 (def2
));
1667 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1668 rcode
, gimple_assign_rhs1 (def2
),
1669 gimple_assign_rhs2 (def2
));
1673 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1674 always give us a boolean_type_node value back. If the original
1675 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1676 we need to convert. */
1677 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1678 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1680 if (TREE_CODE (t
) != INTEGER_CST
1681 && !operand_equal_p (t
, curr
->op
, 0))
1683 enum tree_code subcode
;
1684 tree newop1
, newop2
;
1685 if (!COMPARISON_CLASS_P (t
))
1687 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1688 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1689 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1690 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1694 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1696 fprintf (dump_file
, "Equivalence: ");
1697 print_generic_expr (dump_file
, curr
->op
, 0);
1698 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1699 print_generic_expr (dump_file
, oe
->op
, 0);
1700 fprintf (dump_file
, " -> ");
1701 print_generic_expr (dump_file
, t
, 0);
1702 fprintf (dump_file
, "\n");
1705 /* Now we can delete oe, as it has been subsumed by the new combined
1707 ops
->ordered_remove (i
);
1708 reassociate_stats
.ops_eliminated
++;
1710 /* If t is the same as curr->op, we're done. Otherwise we must
1711 replace curr->op with t. Special case is if we got a constant
1712 back, in which case we add it to the end instead of in place of
1713 the current entry. */
1714 if (TREE_CODE (t
) == INTEGER_CST
)
1716 ops
->ordered_remove (currindex
);
1717 add_to_ops_vec (ops
, t
);
1719 else if (!operand_equal_p (t
, curr
->op
, 0))
1722 enum tree_code subcode
;
1725 gcc_assert (COMPARISON_CLASS_P (t
));
1726 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1727 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1728 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1729 gcc_checking_assert (is_gimple_val (newop1
)
1730 && is_gimple_val (newop2
));
1731 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1732 curr
->op
= gimple_get_lhs (sum
);
1740 /* Perform various identities and other optimizations on the list of
1741 operand entries, stored in OPS. The tree code for the binary
1742 operation between all the operands is OPCODE. */
1745 optimize_ops_list (enum tree_code opcode
,
1746 vec
<operand_entry_t
> *ops
)
1748 unsigned int length
= ops
->length ();
1751 operand_entry_t oelast
= NULL
;
1752 bool iterate
= false;
1757 oelast
= ops
->last ();
1759 /* If the last two are constants, pop the constants off, merge them
1760 and try the next two. */
1761 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1763 operand_entry_t oelm1
= (*ops
)[length
- 2];
1765 if (oelm1
->rank
== 0
1766 && is_gimple_min_invariant (oelm1
->op
)
1767 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1768 TREE_TYPE (oelast
->op
)))
1770 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1771 oelm1
->op
, oelast
->op
);
1773 if (folded
&& is_gimple_min_invariant (folded
))
1775 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1776 fprintf (dump_file
, "Merging constants\n");
1781 add_to_ops_vec (ops
, folded
);
1782 reassociate_stats
.constants_eliminated
++;
1784 optimize_ops_list (opcode
, ops
);
1790 eliminate_using_constants (opcode
, ops
);
1793 for (i
= 0; ops
->iterate (i
, &oe
);)
1797 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1799 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1800 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1801 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1813 length
= ops
->length ();
1814 oelast
= ops
->last ();
1817 optimize_ops_list (opcode
, ops
);
1820 /* The following functions are subroutines to optimize_range_tests and allow
1821 it to try to change a logical combination of comparisons into a range
1825 X == 2 || X == 5 || X == 3 || X == 4
1829 (unsigned) (X - 2) <= 3
1831 For more information see comments above fold_test_range in fold-const.c,
1832 this implementation is for GIMPLE. */
1840 bool strict_overflow_p
;
1841 unsigned int idx
, next
;
1844 /* This is similar to make_range in fold-const.c, but on top of
1845 GIMPLE instead of trees. If EXP is non-NULL, it should be
1846 an SSA_NAME and STMT argument is ignored, otherwise STMT
1847 argument should be a GIMPLE_COND. */
1850 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1854 bool is_bool
, strict_overflow_p
;
1858 r
->strict_overflow_p
= false;
1860 r
->high
= NULL_TREE
;
1861 if (exp
!= NULL_TREE
1862 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1865 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1866 and see if we can refine the range. Some of the cases below may not
1867 happen, but it doesn't seem worth worrying about this. We "continue"
1868 the outer loop when we've changed something; otherwise we "break"
1869 the switch, which will "break" the while. */
1870 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1873 strict_overflow_p
= false;
1875 if (exp
== NULL_TREE
)
1877 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1879 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1884 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1889 enum tree_code code
;
1890 tree arg0
, arg1
, exp_type
;
1894 if (exp
!= NULL_TREE
)
1896 if (TREE_CODE (exp
) != SSA_NAME
1897 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
1900 stmt
= SSA_NAME_DEF_STMT (exp
);
1901 if (!is_gimple_assign (stmt
))
1904 code
= gimple_assign_rhs_code (stmt
);
1905 arg0
= gimple_assign_rhs1 (stmt
);
1906 arg1
= gimple_assign_rhs2 (stmt
);
1907 exp_type
= TREE_TYPE (exp
);
1911 code
= gimple_cond_code (stmt
);
1912 arg0
= gimple_cond_lhs (stmt
);
1913 arg1
= gimple_cond_rhs (stmt
);
1914 exp_type
= boolean_type_node
;
1917 if (TREE_CODE (arg0
) != SSA_NAME
)
1919 loc
= gimple_location (stmt
);
1923 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1924 /* Ensure the range is either +[-,0], +[0,0],
1925 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1926 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1927 or similar expression of unconditional true or
1928 false, it should not be negated. */
1929 && ((high
&& integer_zerop (high
))
1930 || (low
&& integer_onep (low
))))
1943 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1945 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1950 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1965 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1967 &strict_overflow_p
);
1968 if (nexp
!= NULL_TREE
)
1971 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
1984 r
->strict_overflow_p
= strict_overflow_p
;
1988 /* Comparison function for qsort. Sort entries
1989 without SSA_NAME exp first, then with SSA_NAMEs sorted
1990 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1991 by increasing ->low and if ->low is the same, by increasing
1992 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1996 range_entry_cmp (const void *a
, const void *b
)
1998 const struct range_entry
*p
= (const struct range_entry
*) a
;
1999 const struct range_entry
*q
= (const struct range_entry
*) b
;
2001 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2003 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2005 /* Group range_entries for the same SSA_NAME together. */
2006 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2008 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2010 /* If ->low is different, NULL low goes first, then by
2012 if (p
->low
!= NULL_TREE
)
2014 if (q
->low
!= NULL_TREE
)
2016 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2018 if (tem
&& integer_onep (tem
))
2020 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2022 if (tem
&& integer_onep (tem
))
2028 else if (q
->low
!= NULL_TREE
)
2030 /* If ->high is different, NULL high goes last, before that by
2032 if (p
->high
!= NULL_TREE
)
2034 if (q
->high
!= NULL_TREE
)
2036 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2038 if (tem
&& integer_onep (tem
))
2040 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2042 if (tem
&& integer_onep (tem
))
2048 else if (q
->high
!= NULL_TREE
)
2050 /* If both ranges are the same, sort below by ascending idx. */
2055 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2058 if (p
->idx
< q
->idx
)
2062 gcc_checking_assert (p
->idx
> q
->idx
);
2067 /* Helper routine of optimize_range_test.
2068 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2069 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2070 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2071 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2072 an array of COUNT pointers to other ranges. Return
2073 true if the range merge has been successful.
2074 If OPCODE is ERROR_MARK, this is called from within
2075 maybe_optimize_range_tests and is performing inter-bb range optimization.
2076 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2080 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2081 struct range_entry
**otherrangep
,
2082 unsigned int count
, enum tree_code opcode
,
2083 vec
<operand_entry_t
> *ops
, tree exp
, gimple_seq seq
,
2084 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2086 operand_entry_t oe
= (*ops
)[range
->idx
];
2088 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) :
2089 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2090 location_t loc
= gimple_location (stmt
);
2091 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2092 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2094 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2095 gimple_stmt_iterator gsi
;
2098 if (tem
== NULL_TREE
)
2101 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2102 warning_at (loc
, OPT_Wstrict_overflow
,
2103 "assuming signed overflow does not occur "
2104 "when simplifying range test");
2106 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2108 struct range_entry
*r
;
2109 fprintf (dump_file
, "Optimizing range tests ");
2110 print_generic_expr (dump_file
, range
->exp
, 0);
2111 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2112 print_generic_expr (dump_file
, range
->low
, 0);
2113 fprintf (dump_file
, ", ");
2114 print_generic_expr (dump_file
, range
->high
, 0);
2115 fprintf (dump_file
, "]");
2116 for (i
= 0; i
< count
; i
++)
2122 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2123 print_generic_expr (dump_file
, r
->low
, 0);
2124 fprintf (dump_file
, ", ");
2125 print_generic_expr (dump_file
, r
->high
, 0);
2126 fprintf (dump_file
, "]");
2128 fprintf (dump_file
, "\n into ");
2129 print_generic_expr (dump_file
, tem
, 0);
2130 fprintf (dump_file
, "\n");
2133 if (opcode
== BIT_IOR_EXPR
2134 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2135 tem
= invert_truthvalue_loc (loc
, tem
);
2137 tem
= fold_convert_loc (loc
, optype
, tem
);
2138 gsi
= gsi_for_stmt (stmt
);
2139 unsigned int uid
= gimple_uid (stmt
);
2140 /* In rare cases range->exp can be equal to lhs of stmt.
2141 In that case we have to insert after the stmt rather then before
2142 it. If stmt is a PHI, insert it at the start of the basic block. */
2143 if (op
!= range
->exp
)
2145 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2146 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2150 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2152 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2153 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2154 GSI_CONTINUE_LINKING
);
2158 gsi
= gsi_after_labels (gimple_bb (stmt
));
2159 if (!gsi_end_p (gsi
))
2160 uid
= gimple_uid (gsi_stmt (gsi
));
2163 gsi
= gsi_start_bb (gimple_bb (stmt
));
2165 while (!gsi_end_p (gsi
))
2167 uid
= gimple_uid (gsi_stmt (gsi
));
2171 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2172 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2174 if (gsi_end_p (gsi
))
2175 gsi
= gsi_last_bb (gimple_bb (stmt
));
2179 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2180 if (gimple_uid (gsi_stmt (gsi
)))
2183 gimple_set_uid (gsi_stmt (gsi
), uid
);
2190 range
->strict_overflow_p
= false;
2192 for (i
= 0; i
< count
; i
++)
2195 range
= otherrange
+ i
;
2197 range
= otherrangep
[i
];
2198 oe
= (*ops
)[range
->idx
];
2199 /* Now change all the other range test immediate uses, so that
2200 those tests will be optimized away. */
2201 if (opcode
== ERROR_MARK
)
2204 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2205 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2207 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2208 ? boolean_false_node
: boolean_true_node
);
2211 oe
->op
= error_mark_node
;
2212 range
->exp
= NULL_TREE
;
2217 /* Optimize X == CST1 || X == CST2
2218 if popcount (CST1 ^ CST2) == 1 into
2219 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2220 Similarly for ranges. E.g.
2221 X != 2 && X != 3 && X != 10 && X != 11
2222 will be transformed by the previous optimization into
2223 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2224 and this loop can transform that into
2225 !(((X & ~8) - 2U) <= 1U). */
2228 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2229 tree lowi
, tree lowj
, tree highi
, tree highj
,
2230 vec
<operand_entry_t
> *ops
,
2231 struct range_entry
*rangei
,
2232 struct range_entry
*rangej
)
2234 tree lowxor
, highxor
, tem
, exp
;
2235 /* Check lowi ^ lowj == highi ^ highj and
2236 popcount (lowi ^ lowj) == 1. */
2237 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2238 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2240 if (!integer_pow2p (lowxor
))
2242 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2243 if (!tree_int_cst_equal (lowxor
, highxor
))
2246 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2247 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2248 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2249 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2250 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2251 NULL
, rangei
->in_p
, lowj
, highj
,
2252 rangei
->strict_overflow_p
2253 || rangej
->strict_overflow_p
))
2258 /* Optimize X == CST1 || X == CST2
2259 if popcount (CST2 - CST1) == 1 into
2260 ((X - CST1) & ~(CST2 - CST1)) == 0.
2261 Similarly for ranges. E.g.
2262 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2263 || X == 75 || X == 45
2264 will be transformed by the previous optimization into
2265 (X - 43U) <= 3U || (X - 75U) <= 3U
2266 and this loop can transform that into
2267 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2269 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2270 tree lowi
, tree lowj
, tree highi
, tree highj
,
2271 vec
<operand_entry_t
> *ops
,
2272 struct range_entry
*rangei
,
2273 struct range_entry
*rangej
)
2275 tree tem1
, tem2
, mask
;
2276 /* Check highi - lowi == highj - lowj. */
2277 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2278 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2280 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2281 if (!tree_int_cst_equal (tem1
, tem2
))
2283 /* Check popcount (lowj - lowi) == 1. */
2284 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2285 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2287 if (!integer_pow2p (tem1
))
2290 type
= unsigned_type_for (type
);
2291 tem1
= fold_convert (type
, tem1
);
2292 tem2
= fold_convert (type
, tem2
);
2293 lowi
= fold_convert (type
, lowi
);
2294 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2295 tem1
= fold_binary (MINUS_EXPR
, type
,
2296 fold_convert (type
, rangei
->exp
), lowi
);
2297 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2298 lowj
= build_int_cst (type
, 0);
2299 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2300 NULL
, rangei
->in_p
, lowj
, tem2
,
2301 rangei
->strict_overflow_p
2302 || rangej
->strict_overflow_p
))
2307 /* It does some common checks for function optimize_range_tests_xor and
2308 optimize_range_tests_diff.
2309 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2310 Else it calls optimize_range_tests_diff. */
2313 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2314 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2315 struct range_entry
*ranges
)
2318 bool any_changes
= false;
2319 for (i
= first
; i
< length
; i
++)
2321 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2323 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2325 type
= TREE_TYPE (ranges
[i
].exp
);
2326 if (!INTEGRAL_TYPE_P (type
))
2328 lowi
= ranges
[i
].low
;
2329 if (lowi
== NULL_TREE
)
2330 lowi
= TYPE_MIN_VALUE (type
);
2331 highi
= ranges
[i
].high
;
2332 if (highi
== NULL_TREE
)
2334 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2337 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2339 lowj
= ranges
[j
].low
;
2340 if (lowj
== NULL_TREE
)
2342 highj
= ranges
[j
].high
;
2343 if (highj
== NULL_TREE
)
2344 highj
= TYPE_MAX_VALUE (type
);
2345 /* Check lowj > highi. */
2346 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2348 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2351 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2353 ranges
+ i
, ranges
+ j
);
2355 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2357 ranges
+ i
, ranges
+ j
);
2368 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2369 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2370 EXP on success, NULL otherwise. */
2373 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2374 wide_int
*mask
, tree
*totallowp
)
2376 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2377 if (tem
== NULL_TREE
2378 || TREE_CODE (tem
) != INTEGER_CST
2379 || TREE_OVERFLOW (tem
)
2380 || tree_int_cst_sgn (tem
) == -1
2381 || compare_tree_int (tem
, prec
) != -1)
2384 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2385 *mask
= wi::shifted_mask (0, max
, false, prec
);
2386 if (TREE_CODE (exp
) == BIT_AND_EXPR
2387 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2389 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2390 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2391 if (wi::popcount (msk
) == 1
2392 && wi::ltu_p (msk
, prec
- max
))
2394 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2395 max
+= msk
.to_uhwi ();
2396 exp
= TREE_OPERAND (exp
, 0);
2397 if (integer_zerop (low
)
2398 && TREE_CODE (exp
) == PLUS_EXPR
2399 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2401 tree ret
= TREE_OPERAND (exp
, 0);
2404 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2405 TYPE_PRECISION (TREE_TYPE (low
))));
2406 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
2412 else if (!tree_int_cst_lt (totallow
, tbias
))
2414 bias
= wi::to_widest (tbias
);
2415 bias
-= wi::to_widest (totallow
);
2416 if (wi::ges_p (bias
, 0) && wi::lts_p (bias
, prec
- max
))
2418 *mask
= wi::lshift (*mask
, bias
);
2426 if (!tree_int_cst_lt (totallow
, low
))
2428 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2429 if (tem
== NULL_TREE
2430 || TREE_CODE (tem
) != INTEGER_CST
2431 || TREE_OVERFLOW (tem
)
2432 || compare_tree_int (tem
, prec
- max
) == 1)
2435 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2439 /* Attempt to optimize small range tests using bit test.
2441 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2442 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2443 has been by earlier optimizations optimized into:
2444 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2445 As all the 43 through 82 range is less than 64 numbers,
2446 for 64-bit word targets optimize that into:
2447 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2450 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2451 vec
<operand_entry_t
> *ops
,
2452 struct range_entry
*ranges
)
2455 bool any_changes
= false;
2456 int prec
= GET_MODE_BITSIZE (word_mode
);
2457 auto_vec
<struct range_entry
*, 64> candidates
;
2459 for (i
= first
; i
< length
- 2; i
++)
2461 tree lowi
, highi
, lowj
, highj
, type
;
2463 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2465 type
= TREE_TYPE (ranges
[i
].exp
);
2466 if (!INTEGRAL_TYPE_P (type
))
2468 lowi
= ranges
[i
].low
;
2469 if (lowi
== NULL_TREE
)
2470 lowi
= TYPE_MIN_VALUE (type
);
2471 highi
= ranges
[i
].high
;
2472 if (highi
== NULL_TREE
)
2475 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2476 highi
, &mask
, &lowi
);
2477 if (exp
== NULL_TREE
)
2479 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2480 candidates
.truncate (0);
2481 int end
= MIN (i
+ 64, length
);
2482 for (j
= i
+ 1; j
< end
; j
++)
2485 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2487 if (ranges
[j
].exp
== exp
)
2489 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2491 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2494 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2496 exp2
= TREE_OPERAND (exp2
, 0);
2506 lowj
= ranges
[j
].low
;
2507 if (lowj
== NULL_TREE
)
2509 highj
= ranges
[j
].high
;
2510 if (highj
== NULL_TREE
)
2511 highj
= TYPE_MAX_VALUE (type
);
2513 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2514 highj
, &mask2
, NULL
);
2518 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2519 candidates
.safe_push (&ranges
[j
]);
2522 /* If we need otherwise 3 or more comparisons, use a bit test. */
2523 if (candidates
.length () >= 2)
2525 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2526 wi::to_widest (lowi
)
2527 + prec
- 1 - wi::clz (mask
));
2528 operand_entry_t oe
= (*ops
)[ranges
[i
].idx
];
2530 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
)
2531 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2532 location_t loc
= gimple_location (stmt
);
2533 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2535 /* See if it isn't cheaper to pretend the minimum value of the
2536 range is 0, if maximum value is small enough.
2537 We can avoid then subtraction of the minimum value, but the
2538 mask constant could be perhaps more expensive. */
2539 if (compare_tree_int (lowi
, 0) > 0
2540 && compare_tree_int (high
, prec
) < 0)
2543 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2544 rtx reg
= gen_raw_REG (word_mode
, 10000);
2545 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2546 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2547 GEN_INT (-m
)), speed_p
);
2548 rtx r
= immed_wide_int_const (mask
, word_mode
);
2549 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2551 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2552 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2556 mask
= wi::lshift (mask
, m
);
2557 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2561 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2563 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2565 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2566 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2567 fold_convert_loc (loc
, etype
, exp
),
2568 fold_convert_loc (loc
, etype
, lowi
));
2569 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2570 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2571 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2572 build_int_cst (word_type
, 1), exp
);
2573 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2574 wide_int_to_tree (word_type
, mask
));
2575 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2576 build_zero_cst (word_type
));
2577 if (is_gimple_val (exp
))
2580 /* The shift might have undefined behavior if TEM is true,
2581 but reassociate_bb isn't prepared to have basic blocks
2582 split when it is running. So, temporarily emit a code
2583 with BIT_IOR_EXPR instead of &&, and fix it up in
2586 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2587 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2588 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2590 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2591 gimple_seq_add_seq_without_update (&seq
, seq2
);
2592 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2593 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2594 gimple g
= gimple_build_assign (make_ssa_name (optype
),
2595 BIT_IOR_EXPR
, tem
, exp
);
2596 gimple_set_location (g
, loc
);
2597 gimple_seq_add_stmt_without_update (&seq
, g
);
2598 exp
= gimple_assign_lhs (g
);
2599 tree val
= build_zero_cst (optype
);
2600 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2601 candidates
.length (), opcode
, ops
, exp
,
2602 seq
, false, val
, val
, strict_overflow_p
))
2605 reassoc_branch_fixups
.safe_push (tem
);
2608 gimple_seq_discard (seq
);
2614 /* Optimize range tests, similarly how fold_range_test optimizes
2615 it on trees. The tree code for the binary
2616 operation between all the operands is OPCODE.
2617 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2618 maybe_optimize_range_tests for inter-bb range optimization.
2619 In that case if oe->op is NULL, oe->id is bb->index whose
2620 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2621 the actual opcode. */
2624 optimize_range_tests (enum tree_code opcode
,
2625 vec
<operand_entry_t
> *ops
)
2627 unsigned int length
= ops
->length (), i
, j
, first
;
2629 struct range_entry
*ranges
;
2630 bool any_changes
= false;
2635 ranges
= XNEWVEC (struct range_entry
, length
);
2636 for (i
= 0; i
< length
; i
++)
2640 init_range_entry (ranges
+ i
, oe
->op
,
2642 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2643 /* For | invert it now, we will invert it again before emitting
2644 the optimized expression. */
2645 if (opcode
== BIT_IOR_EXPR
2646 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2647 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2650 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2651 for (i
= 0; i
< length
; i
++)
2652 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2655 /* Try to merge ranges. */
2656 for (first
= i
; i
< length
; i
++)
2658 tree low
= ranges
[i
].low
;
2659 tree high
= ranges
[i
].high
;
2660 int in_p
= ranges
[i
].in_p
;
2661 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2662 int update_fail_count
= 0;
2664 for (j
= i
+ 1; j
< length
; j
++)
2666 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2668 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2669 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2671 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2677 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
2678 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
2679 low
, high
, strict_overflow_p
))
2684 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2685 while update_range_test would fail. */
2686 else if (update_fail_count
== 64)
2689 ++update_fail_count
;
2692 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2695 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2696 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2698 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
2699 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
2702 if (any_changes
&& opcode
!= ERROR_MARK
)
2705 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2707 if (oe
->op
== error_mark_node
)
2716 XDELETEVEC (ranges
);
2720 /* Return true if STMT is a cast like:
2726 # _345 = PHI <_123(N), 1(...), 1(...)>
2727 where _234 has bool type, _123 has single use and
2728 bb N has a single successor M. This is commonly used in
2729 the last block of a range test. */
2732 final_range_test_p (gimple stmt
)
2734 basic_block bb
, rhs_bb
;
2737 use_operand_p use_p
;
2740 if (!gimple_assign_cast_p (stmt
))
2742 bb
= gimple_bb (stmt
);
2743 if (!single_succ_p (bb
))
2745 e
= single_succ_edge (bb
);
2746 if (e
->flags
& EDGE_COMPLEX
)
2749 lhs
= gimple_assign_lhs (stmt
);
2750 rhs
= gimple_assign_rhs1 (stmt
);
2751 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2752 || TREE_CODE (rhs
) != SSA_NAME
2753 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2756 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2757 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2760 if (gimple_code (use_stmt
) != GIMPLE_PHI
2761 || gimple_bb (use_stmt
) != e
->dest
)
2764 /* And that the rhs is defined in the same loop. */
2765 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2767 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2773 /* Return true if BB is suitable basic block for inter-bb range test
2774 optimization. If BACKWARD is true, BB should be the only predecessor
2775 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2776 or compared with to find a common basic block to which all conditions
2777 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2778 be the only predecessor of BB. */
2781 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2784 edge_iterator ei
, ei2
;
2788 bool other_edge_seen
= false;
2793 /* Check last stmt first. */
2794 stmt
= last_stmt (bb
);
2796 || (gimple_code (stmt
) != GIMPLE_COND
2797 && (backward
|| !final_range_test_p (stmt
)))
2798 || gimple_visited_p (stmt
)
2799 || stmt_could_throw_p (stmt
)
2802 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2805 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2806 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2807 to *OTHER_BB (if not set yet, try to find it out). */
2808 if (EDGE_COUNT (bb
->succs
) != 2)
2810 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2812 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2814 if (e
->dest
== test_bb
)
2823 if (*other_bb
== NULL
)
2825 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2826 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2828 else if (e
->dest
== e2
->dest
)
2829 *other_bb
= e
->dest
;
2830 if (*other_bb
== NULL
)
2833 if (e
->dest
== *other_bb
)
2834 other_edge_seen
= true;
2838 if (*other_bb
== NULL
|| !other_edge_seen
)
2841 else if (single_succ (bb
) != *other_bb
)
2844 /* Now check all PHIs of *OTHER_BB. */
2845 e
= find_edge (bb
, *other_bb
);
2846 e2
= find_edge (test_bb
, *other_bb
);
2847 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2849 gphi
*phi
= gsi
.phi ();
2850 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2851 corresponding to BB and TEST_BB predecessor must be the same. */
2852 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2853 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2855 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2856 one of the PHIs should have the lhs of the last stmt in
2857 that block as PHI arg and that PHI should have 0 or 1
2858 corresponding to it in all other range test basic blocks
2862 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2863 == gimple_assign_lhs (stmt
)
2864 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2865 || integer_onep (gimple_phi_arg_def (phi
,
2871 gimple test_last
= last_stmt (test_bb
);
2872 if (gimple_code (test_last
) != GIMPLE_COND
2873 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2874 == gimple_assign_lhs (test_last
)
2875 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2876 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2886 /* Return true if BB doesn't have side-effects that would disallow
2887 range test optimization, all SSA_NAMEs set in the bb are consumed
2888 in the bb and there are no PHIs. */
2891 no_side_effect_bb (basic_block bb
)
2893 gimple_stmt_iterator gsi
;
2896 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2898 last
= last_stmt (bb
);
2899 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2901 gimple stmt
= gsi_stmt (gsi
);
2903 imm_use_iterator imm_iter
;
2904 use_operand_p use_p
;
2906 if (is_gimple_debug (stmt
))
2908 if (gimple_has_side_effects (stmt
))
2912 if (!is_gimple_assign (stmt
))
2914 lhs
= gimple_assign_lhs (stmt
);
2915 if (TREE_CODE (lhs
) != SSA_NAME
)
2917 if (gimple_assign_rhs_could_trap_p (stmt
))
2919 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2921 gimple use_stmt
= USE_STMT (use_p
);
2922 if (is_gimple_debug (use_stmt
))
2924 if (gimple_bb (use_stmt
) != bb
)
2931 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2932 return true and fill in *OPS recursively. */
2935 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2938 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2942 if (!is_reassociable_op (stmt
, code
, loop
))
2945 rhs
[0] = gimple_assign_rhs1 (stmt
);
2946 rhs
[1] = gimple_assign_rhs2 (stmt
);
2947 gimple_set_visited (stmt
, true);
2948 for (i
= 0; i
< 2; i
++)
2949 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2950 && !get_ops (rhs
[i
], code
, ops
, loop
)
2951 && has_single_use (rhs
[i
]))
2953 operand_entry_t oe
= operand_entry_pool
.allocate ();
2959 ops
->safe_push (oe
);
2964 /* Find the ops that were added by get_ops starting from VAR, see if
2965 they were changed during update_range_test and if yes, create new
2969 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
2970 unsigned int *pidx
, struct loop
*loop
)
2972 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2976 if (!is_reassociable_op (stmt
, code
, loop
))
2979 rhs
[0] = gimple_assign_rhs1 (stmt
);
2980 rhs
[1] = gimple_assign_rhs2 (stmt
);
2983 for (i
= 0; i
< 2; i
++)
2984 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
2986 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
2987 if (rhs
[2 + i
] == NULL_TREE
)
2989 if (has_single_use (rhs
[i
]))
2990 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
2992 rhs
[2 + i
] = rhs
[i
];
2995 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
2996 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
2998 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
2999 var
= make_ssa_name (TREE_TYPE (var
));
3000 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
3002 gimple_set_uid (g
, gimple_uid (stmt
));
3003 gimple_set_visited (g
, true);
3004 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3009 /* Structure to track the initial value passed to get_ops and
3010 the range in the ops vector for each basic block. */
3012 struct inter_bb_range_test_entry
3015 unsigned int first_idx
, last_idx
;
3018 /* Inter-bb range test optimization. */
3021 maybe_optimize_range_tests (gimple stmt
)
3023 basic_block first_bb
= gimple_bb (stmt
);
3024 basic_block last_bb
= first_bb
;
3025 basic_block other_bb
= NULL
;
3029 auto_vec
<operand_entry_t
> ops
;
3030 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3031 bool any_changes
= false;
3033 /* Consider only basic blocks that end with GIMPLE_COND or
3034 a cast statement satisfying final_range_test_p. All
3035 but the last bb in the first_bb .. last_bb range
3036 should end with GIMPLE_COND. */
3037 if (gimple_code (stmt
) == GIMPLE_COND
)
3039 if (EDGE_COUNT (first_bb
->succs
) != 2)
3042 else if (final_range_test_p (stmt
))
3043 other_bb
= single_succ (first_bb
);
3047 if (stmt_could_throw_p (stmt
))
3050 /* As relative ordering of post-dominator sons isn't fixed,
3051 maybe_optimize_range_tests can be called first on any
3052 bb in the range we want to optimize. So, start searching
3053 backwards, if first_bb can be set to a predecessor. */
3054 while (single_pred_p (first_bb
))
3056 basic_block pred_bb
= single_pred (first_bb
);
3057 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3059 if (!no_side_effect_bb (first_bb
))
3063 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3064 Before starting forward search in last_bb successors, find
3065 out the other_bb. */
3066 if (first_bb
== last_bb
)
3069 /* As non-GIMPLE_COND last stmt always terminates the range,
3070 if forward search didn't discover anything, just give up. */
3071 if (gimple_code (stmt
) != GIMPLE_COND
)
3073 /* Look at both successors. Either it ends with a GIMPLE_COND
3074 and satisfies suitable_cond_bb, or ends with a cast and
3075 other_bb is that cast's successor. */
3076 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
3077 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
3078 || e
->dest
== first_bb
)
3080 else if (single_pred_p (e
->dest
))
3082 stmt
= last_stmt (e
->dest
);
3084 && gimple_code (stmt
) == GIMPLE_COND
3085 && EDGE_COUNT (e
->dest
->succs
) == 2)
3087 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
3093 && final_range_test_p (stmt
)
3094 && find_edge (first_bb
, single_succ (e
->dest
)))
3096 other_bb
= single_succ (e
->dest
);
3097 if (other_bb
== first_bb
)
3101 if (other_bb
== NULL
)
3104 /* Now do the forward search, moving last_bb to successor bbs
3105 that aren't other_bb. */
3106 while (EDGE_COUNT (last_bb
->succs
) == 2)
3108 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
3109 if (e
->dest
!= other_bb
)
3113 if (!single_pred_p (e
->dest
))
3115 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
3117 if (!no_side_effect_bb (e
->dest
))
3121 if (first_bb
== last_bb
)
3123 /* Here basic blocks first_bb through last_bb's predecessor
3124 end with GIMPLE_COND, all of them have one of the edges to
3125 other_bb and another to another block in the range,
3126 all blocks except first_bb don't have side-effects and
3127 last_bb ends with either GIMPLE_COND, or cast satisfying
3128 final_range_test_p. */
3129 for (bb
= last_bb
; ; bb
= single_pred (bb
))
3131 enum tree_code code
;
3133 inter_bb_range_test_entry bb_ent
;
3135 bb_ent
.op
= NULL_TREE
;
3136 bb_ent
.first_idx
= ops
.length ();
3137 bb_ent
.last_idx
= bb_ent
.first_idx
;
3138 e
= find_edge (bb
, other_bb
);
3139 stmt
= last_stmt (bb
);
3140 gimple_set_visited (stmt
, true);
3141 if (gimple_code (stmt
) != GIMPLE_COND
)
3143 use_operand_p use_p
;
3148 lhs
= gimple_assign_lhs (stmt
);
3149 rhs
= gimple_assign_rhs1 (stmt
);
3150 gcc_assert (bb
== last_bb
);
3157 # _345 = PHI <_123(N), 1(...), 1(...)>
3159 or 0 instead of 1. If it is 0, the _234
3160 range test is anded together with all the
3161 other range tests, if it is 1, it is ored with
3163 single_imm_use (lhs
, &use_p
, &phi
);
3164 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
3165 e2
= find_edge (first_bb
, other_bb
);
3167 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
3168 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
3169 code
= BIT_AND_EXPR
;
3172 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
3173 code
= BIT_IOR_EXPR
;
3176 /* If _234 SSA_NAME_DEF_STMT is
3178 (or &, corresponding to 1/0 in the phi arguments,
3179 push into ops the individual range test arguments
3180 of the bitwise or resp. and, recursively. */
3181 if (!get_ops (rhs
, code
, &ops
,
3182 loop_containing_stmt (stmt
))
3183 && has_single_use (rhs
))
3185 /* Otherwise, push the _234 range test itself. */
3186 operand_entry_t oe
= operand_entry_pool
.allocate ();
3196 bb_ent
.last_idx
= ops
.length ();
3198 bbinfo
.safe_push (bb_ent
);
3201 /* Otherwise stmt is GIMPLE_COND. */
3202 code
= gimple_cond_code (stmt
);
3203 lhs
= gimple_cond_lhs (stmt
);
3204 rhs
= gimple_cond_rhs (stmt
);
3205 if (TREE_CODE (lhs
) == SSA_NAME
3206 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3207 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
3208 || rhs
!= boolean_false_node
3209 /* Either push into ops the individual bitwise
3210 or resp. and operands, depending on which
3211 edge is other_bb. */
3212 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
3213 ^ (code
== EQ_EXPR
))
3214 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
3215 loop_containing_stmt (stmt
))))
3217 /* Or push the GIMPLE_COND stmt itself. */
3218 operand_entry_t oe
= operand_entry_pool
.allocate ();
3221 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
3222 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3223 /* oe->op = NULL signs that there is no SSA_NAME
3224 for the range test, and oe->id instead is the
3225 basic block number, at which's end the GIMPLE_COND
3233 else if (ops
.length () > bb_ent
.first_idx
)
3236 bb_ent
.last_idx
= ops
.length ();
3238 bbinfo
.safe_push (bb_ent
);
3242 if (ops
.length () > 1)
3243 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
3247 /* update_ops relies on has_single_use predicates returning the
3248 same values as it did during get_ops earlier. Additionally it
3249 never removes statements, only adds new ones and it should walk
3250 from the single imm use and check the predicate already before
3251 making those changes.
3252 On the other side, the handling of GIMPLE_COND directly can turn
3253 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3254 it needs to be done in a separate loop afterwards. */
3255 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3257 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3258 && bbinfo
[idx
].op
!= NULL_TREE
)
3262 stmt
= last_stmt (bb
);
3263 new_op
= update_ops (bbinfo
[idx
].op
,
3265 ops
[bbinfo
[idx
].first_idx
]->rank
,
3266 ops
, &bbinfo
[idx
].first_idx
,
3267 loop_containing_stmt (stmt
));
3268 if (new_op
== NULL_TREE
)
3270 gcc_assert (bb
== last_bb
);
3271 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
3273 if (bbinfo
[idx
].op
!= new_op
)
3275 imm_use_iterator iter
;
3276 use_operand_p use_p
;
3277 gimple use_stmt
, cast_stmt
= NULL
;
3279 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
3280 if (is_gimple_debug (use_stmt
))
3282 else if (gimple_code (use_stmt
) == GIMPLE_COND
3283 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3284 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3285 SET_USE (use_p
, new_op
);
3286 else if (gimple_assign_cast_p (use_stmt
))
3287 cast_stmt
= use_stmt
;
3292 gcc_assert (bb
== last_bb
);
3293 tree lhs
= gimple_assign_lhs (cast_stmt
);
3294 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
3295 enum tree_code rhs_code
3296 = gimple_assign_rhs_code (cast_stmt
);
3298 if (is_gimple_min_invariant (new_op
))
3300 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
3301 g
= gimple_build_assign (new_lhs
, new_op
);
3304 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
3305 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
3306 gimple_set_uid (g
, gimple_uid (cast_stmt
));
3307 gimple_set_visited (g
, true);
3308 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3309 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3310 if (is_gimple_debug (use_stmt
))
3312 else if (gimple_code (use_stmt
) == GIMPLE_COND
3313 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3314 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3315 SET_USE (use_p
, new_lhs
);
3324 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3326 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3327 && bbinfo
[idx
].op
== NULL_TREE
3328 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
3330 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
3331 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
3332 gimple_cond_make_false (cond_stmt
);
3333 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
3334 gimple_cond_make_true (cond_stmt
);
3337 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
3338 gimple_cond_set_lhs (cond_stmt
,
3339 ops
[bbinfo
[idx
].first_idx
]->op
);
3340 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
3342 update_stmt (cond_stmt
);
3350 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3351 of STMT in it's operands. This is also known as a "destructive
3352 update" operation. */
3355 is_phi_for_stmt (gimple stmt
, tree operand
)
3360 use_operand_p arg_p
;
3363 if (TREE_CODE (operand
) != SSA_NAME
)
3366 lhs
= gimple_assign_lhs (stmt
);
3368 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3369 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
3373 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
3374 if (lhs
== USE_FROM_PTR (arg_p
))
3379 /* Remove def stmt of VAR if VAR has zero uses and recurse
3380 on rhs1 operand if so. */
3383 remove_visited_stmt_chain (tree var
)
3386 gimple_stmt_iterator gsi
;
3390 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3392 stmt
= SSA_NAME_DEF_STMT (var
);
3393 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3395 var
= gimple_assign_rhs1 (stmt
);
3396 gsi
= gsi_for_stmt (stmt
);
3397 reassoc_remove_stmt (&gsi
);
3398 release_defs (stmt
);
3405 /* This function checks three consequtive operands in
3406 passed operands vector OPS starting from OPINDEX and
3407 swaps two operands if it is profitable for binary operation
3408 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3410 We pair ops with the same rank if possible.
3412 The alternative we try is to see if STMT is a destructive
3413 update style statement, which is like:
3416 In that case, we want to use the destructive update form to
3417 expose the possible vectorizer sum reduction opportunity.
3418 In that case, the third operand will be the phi node. This
3419 check is not performed if STMT is null.
3421 We could, of course, try to be better as noted above, and do a
3422 lot of work to try to find these opportunities in >3 operand
3423 cases, but it is unlikely to be worth it. */
3426 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3427 unsigned int opindex
, gimple stmt
)
3429 operand_entry_t oe1
, oe2
, oe3
;
3432 oe2
= ops
[opindex
+ 1];
3433 oe3
= ops
[opindex
+ 2];
3435 if ((oe1
->rank
== oe2
->rank
3436 && oe2
->rank
!= oe3
->rank
)
3437 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3438 && !is_phi_for_stmt (stmt
, oe1
->op
)
3439 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3441 struct operand_entry temp
= *oe3
;
3443 oe3
->rank
= oe1
->rank
;
3445 oe1
->rank
= temp
.rank
;
3447 else if ((oe1
->rank
== oe3
->rank
3448 && oe2
->rank
!= oe3
->rank
)
3449 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3450 && !is_phi_for_stmt (stmt
, oe1
->op
)
3451 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3453 struct operand_entry temp
= *oe2
;
3455 oe2
->rank
= oe1
->rank
;
3457 oe1
->rank
= temp
.rank
;
3461 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3462 two definitions, otherwise return STMT. */
3464 static inline gimple
3465 find_insert_point (gimple stmt
, tree rhs1
, tree rhs2
)
3467 if (TREE_CODE (rhs1
) == SSA_NAME
3468 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3469 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3470 if (TREE_CODE (rhs2
) == SSA_NAME
3471 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3472 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3476 /* Recursively rewrite our linearized statements so that the operators
3477 match those in OPS[OPINDEX], putting the computation in rank
3478 order. Return new lhs. */
3481 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3482 vec
<operand_entry_t
> ops
, bool changed
)
3484 tree rhs1
= gimple_assign_rhs1 (stmt
);
3485 tree rhs2
= gimple_assign_rhs2 (stmt
);
3486 tree lhs
= gimple_assign_lhs (stmt
);
3489 /* The final recursion case for this function is that you have
3490 exactly two operations left.
3491 If we had exactly one op in the entire list to start with, we
3492 would have never called this function, and the tail recursion
3493 rewrites them one at a time. */
3494 if (opindex
+ 2 == ops
.length ())
3496 operand_entry_t oe1
, oe2
;
3499 oe2
= ops
[opindex
+ 1];
3501 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3503 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3504 unsigned int uid
= gimple_uid (stmt
);
3506 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3508 fprintf (dump_file
, "Transforming ");
3509 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3512 /* Even when changed is false, reassociation could have e.g. removed
3513 some redundant operations, so unless we are just swapping the
3514 arguments or unless there is no change at all (then we just
3515 return lhs), force creation of a new SSA_NAME. */
3516 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
3518 gimple insert_point
= find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3519 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3521 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3523 gimple_set_uid (stmt
, uid
);
3524 gimple_set_visited (stmt
, true);
3525 if (insert_point
== gsi_stmt (gsi
))
3526 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3528 insert_stmt_after (stmt
, insert_point
);
3532 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3534 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3535 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3539 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3540 remove_visited_stmt_chain (rhs1
);
3542 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3544 fprintf (dump_file
, " into ");
3545 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3551 /* If we hit here, we should have 3 or more ops left. */
3552 gcc_assert (opindex
+ 2 < ops
.length ());
3554 /* Rewrite the next operator. */
3557 /* Recurse on the LHS of the binary operator, which is guaranteed to
3558 be the non-leaf side. */
3560 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3561 changed
|| oe
->op
!= rhs2
);
3563 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3565 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3567 fprintf (dump_file
, "Transforming ");
3568 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3571 /* If changed is false, this is either opindex == 0
3572 or all outer rhs2's were equal to corresponding oe->op,
3573 and powi_result is NULL.
3574 That means lhs is equivalent before and after reassociation.
3575 Otherwise ensure the old lhs SSA_NAME is not reused and
3576 create a new stmt as well, so that any debug stmts will be
3577 properly adjusted. */
3580 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3581 unsigned int uid
= gimple_uid (stmt
);
3582 gimple insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3584 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3585 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3587 gimple_set_uid (stmt
, uid
);
3588 gimple_set_visited (stmt
, true);
3589 if (insert_point
== gsi_stmt (gsi
))
3590 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3592 insert_stmt_after (stmt
, insert_point
);
3596 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3598 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3599 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3603 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3605 fprintf (dump_file
, " into ");
3606 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3612 /* Find out how many cycles we need to compute statements chain.
3613 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3614 maximum number of independent statements we may execute per cycle. */
3617 get_required_cycles (int ops_num
, int cpu_width
)
3623 /* While we have more than 2 * cpu_width operands
3624 we may reduce number of operands by cpu_width
3626 res
= ops_num
/ (2 * cpu_width
);
3628 /* Remained operands count may be reduced twice per cycle
3629 until we have only one operand. */
3630 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3631 elog
= exact_log2 (rest
);
3635 res
+= floor_log2 (rest
) + 1;
3640 /* Returns an optimal number of registers to use for computation of
3641 given statements. */
3644 get_reassociation_width (int ops_num
, enum tree_code opc
,
3647 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3652 if (param_width
> 0)
3653 width
= param_width
;
3655 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3660 /* Get the minimal time required for sequence computation. */
3661 cycles_best
= get_required_cycles (ops_num
, width
);
3663 /* Check if we may use less width and still compute sequence for
3664 the same time. It will allow us to reduce registers usage.
3665 get_required_cycles is monotonically increasing with lower width
3666 so we can perform a binary search for the minimal width that still
3667 results in the optimal cycle count. */
3669 while (width
> width_min
)
3671 int width_mid
= (width
+ width_min
) / 2;
3673 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3675 else if (width_min
< width_mid
)
3676 width_min
= width_mid
;
3684 /* Recursively rewrite our linearized statements so that the operators
3685 match those in OPS[OPINDEX], putting the computation in rank
3686 order and trying to allow operations to be executed in
3690 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
3691 vec
<operand_entry_t
> ops
)
3693 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3694 int op_num
= ops
.length ();
3695 int stmt_num
= op_num
- 1;
3696 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3697 int op_index
= op_num
- 1;
3699 int ready_stmts_end
= 0;
3701 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3703 /* We start expression rewriting from the top statements.
3704 So, in this loop we create a full list of statements
3705 we will work with. */
3706 stmts
[stmt_num
- 1] = stmt
;
3707 for (i
= stmt_num
- 2; i
>= 0; i
--)
3708 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3710 for (i
= 0; i
< stmt_num
; i
++)
3714 /* Determine whether we should use results of
3715 already handled statements or not. */
3716 if (ready_stmts_end
== 0
3717 && (i
- stmt_index
>= width
|| op_index
< 1))
3718 ready_stmts_end
= i
;
3720 /* Now we choose operands for the next statement. Non zero
3721 value in ready_stmts_end means here that we should use
3722 the result of already generated statements as new operand. */
3723 if (ready_stmts_end
> 0)
3725 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3726 if (ready_stmts_end
> stmt_index
)
3727 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3728 else if (op_index
>= 0)
3729 op2
= ops
[op_index
--]->op
;
3732 gcc_assert (stmt_index
< i
);
3733 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3736 if (stmt_index
>= ready_stmts_end
)
3737 ready_stmts_end
= 0;
3742 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3743 op2
= ops
[op_index
--]->op
;
3744 op1
= ops
[op_index
--]->op
;
3747 /* If we emit the last statement then we should put
3748 operands into the last statement. It will also
3750 if (op_index
< 0 && stmt_index
== i
)
3753 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3755 fprintf (dump_file
, "Transforming ");
3756 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3759 /* We keep original statement only for the last one. All
3760 others are recreated. */
3761 if (i
== stmt_num
- 1)
3763 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3764 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3765 update_stmt (stmts
[i
]);
3768 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3770 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3772 fprintf (dump_file
, " into ");
3773 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3777 remove_visited_stmt_chain (last_rhs1
);
3780 /* Transform STMT, which is really (A +B) + (C + D) into the left
3781 linear form, ((A+B)+C)+D.
3782 Recurse on D if necessary. */
3785 linearize_expr (gimple stmt
)
3787 gimple_stmt_iterator gsi
;
3788 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3789 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3790 gimple oldbinrhs
= binrhs
;
3791 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3792 gimple newbinrhs
= NULL
;
3793 struct loop
*loop
= loop_containing_stmt (stmt
);
3794 tree lhs
= gimple_assign_lhs (stmt
);
3796 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3797 && is_reassociable_op (binrhs
, rhscode
, loop
));
3799 gsi
= gsi_for_stmt (stmt
);
3801 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3802 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
3803 gimple_assign_rhs_code (binrhs
),
3804 gimple_assign_lhs (binlhs
),
3805 gimple_assign_rhs2 (binrhs
));
3806 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3807 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3808 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3810 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3811 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3813 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3815 fprintf (dump_file
, "Linearized: ");
3816 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3819 reassociate_stats
.linearized
++;
3822 gsi
= gsi_for_stmt (oldbinrhs
);
3823 reassoc_remove_stmt (&gsi
);
3824 release_defs (oldbinrhs
);
3826 gimple_set_visited (stmt
, true);
3827 gimple_set_visited (binlhs
, true);
3828 gimple_set_visited (binrhs
, true);
3830 /* Tail recurse on the new rhs if it still needs reassociation. */
3831 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3832 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3833 want to change the algorithm while converting to tuples. */
3834 linearize_expr (stmt
);
3837 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3838 it. Otherwise, return NULL. */
3841 get_single_immediate_use (tree lhs
)
3843 use_operand_p immuse
;
3846 if (TREE_CODE (lhs
) == SSA_NAME
3847 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3848 && is_gimple_assign (immusestmt
))
3854 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3855 representing the negated value. Insertions of any necessary
3856 instructions go before GSI.
3857 This function is recursive in that, if you hand it "a_5" as the
3858 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3859 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3862 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3864 gimple negatedefstmt
= NULL
;
3865 tree resultofnegate
;
3866 gimple_stmt_iterator gsi
;
3869 /* If we are trying to negate a name, defined by an add, negate the
3870 add operands instead. */
3871 if (TREE_CODE (tonegate
) == SSA_NAME
)
3872 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3873 if (TREE_CODE (tonegate
) == SSA_NAME
3874 && is_gimple_assign (negatedefstmt
)
3875 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3876 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3877 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3879 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3880 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3881 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3884 gsi
= gsi_for_stmt (negatedefstmt
);
3885 rhs1
= negate_value (rhs1
, &gsi
);
3887 gsi
= gsi_for_stmt (negatedefstmt
);
3888 rhs2
= negate_value (rhs2
, &gsi
);
3890 gsi
= gsi_for_stmt (negatedefstmt
);
3891 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3892 gimple_set_visited (negatedefstmt
, true);
3893 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
3894 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3895 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3899 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3900 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3901 NULL_TREE
, true, GSI_SAME_STMT
);
3903 uid
= gimple_uid (gsi_stmt (gsi
));
3904 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3906 gimple stmt
= gsi_stmt (gsi
);
3907 if (gimple_uid (stmt
) != 0)
3909 gimple_set_uid (stmt
, uid
);
3911 return resultofnegate
;
3914 /* Return true if we should break up the subtract in STMT into an add
3915 with negate. This is true when we the subtract operands are really
3916 adds, or the subtract itself is used in an add expression. In
3917 either case, breaking up the subtract into an add with negate
3918 exposes the adds to reassociation. */
3921 should_break_up_subtract (gimple stmt
)
3923 tree lhs
= gimple_assign_lhs (stmt
);
3924 tree binlhs
= gimple_assign_rhs1 (stmt
);
3925 tree binrhs
= gimple_assign_rhs2 (stmt
);
3927 struct loop
*loop
= loop_containing_stmt (stmt
);
3929 if (TREE_CODE (binlhs
) == SSA_NAME
3930 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3933 if (TREE_CODE (binrhs
) == SSA_NAME
3934 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3937 if (TREE_CODE (lhs
) == SSA_NAME
3938 && (immusestmt
= get_single_immediate_use (lhs
))
3939 && is_gimple_assign (immusestmt
)
3940 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3941 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3946 /* Transform STMT from A - B into A + -B. */
3949 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3951 tree rhs1
= gimple_assign_rhs1 (stmt
);
3952 tree rhs2
= gimple_assign_rhs2 (stmt
);
3954 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3956 fprintf (dump_file
, "Breaking up subtract ");
3957 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3960 rhs2
= negate_value (rhs2
, gsip
);
3961 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3965 /* Determine whether STMT is a builtin call that raises an SSA name
3966 to an integer power and has only one use. If so, and this is early
3967 reassociation and unsafe math optimizations are permitted, place
3968 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3969 If any of these conditions does not hold, return FALSE. */
3972 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3975 REAL_VALUE_TYPE c
, cint
;
3977 if (!first_pass_instance
3978 || !flag_unsafe_math_optimizations
3979 || !is_gimple_call (stmt
)
3980 || !has_single_use (gimple_call_lhs (stmt
)))
3983 fndecl
= gimple_call_fndecl (stmt
);
3986 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3989 switch (DECL_FUNCTION_CODE (fndecl
))
3991 CASE_FLT_FN (BUILT_IN_POW
):
3992 if (flag_errno_math
)
3995 *base
= gimple_call_arg (stmt
, 0);
3996 arg1
= gimple_call_arg (stmt
, 1);
3998 if (TREE_CODE (arg1
) != REAL_CST
)
4001 c
= TREE_REAL_CST (arg1
);
4003 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
4006 *exponent
= real_to_integer (&c
);
4007 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
4008 if (!real_identical (&c
, &cint
))
4013 CASE_FLT_FN (BUILT_IN_POWI
):
4014 *base
= gimple_call_arg (stmt
, 0);
4015 arg1
= gimple_call_arg (stmt
, 1);
4017 if (!tree_fits_shwi_p (arg1
))
4020 *exponent
= tree_to_shwi (arg1
);
4027 /* Expanding negative exponents is generally unproductive, so we don't
4028 complicate matters with those. Exponents of zero and one should
4029 have been handled by expression folding. */
4030 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
4036 /* Recursively linearize a binary expression that is the RHS of STMT.
4037 Place the operands of the expression tree in the vector named OPS. */
4040 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
4041 bool is_associative
, bool set_visited
)
4043 tree binlhs
= gimple_assign_rhs1 (stmt
);
4044 tree binrhs
= gimple_assign_rhs2 (stmt
);
4045 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
4046 bool binlhsisreassoc
= false;
4047 bool binrhsisreassoc
= false;
4048 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4049 struct loop
*loop
= loop_containing_stmt (stmt
);
4050 tree base
= NULL_TREE
;
4051 HOST_WIDE_INT exponent
= 0;
4054 gimple_set_visited (stmt
, true);
4056 if (TREE_CODE (binlhs
) == SSA_NAME
)
4058 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
4059 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
4060 && !stmt_could_throw_p (binlhsdef
));
4063 if (TREE_CODE (binrhs
) == SSA_NAME
)
4065 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
4066 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
4067 && !stmt_could_throw_p (binrhsdef
));
4070 /* If the LHS is not reassociable, but the RHS is, we need to swap
4071 them. If neither is reassociable, there is nothing we can do, so
4072 just put them in the ops vector. If the LHS is reassociable,
4073 linearize it. If both are reassociable, then linearize the RHS
4076 if (!binlhsisreassoc
)
4078 /* If this is not a associative operation like division, give up. */
4079 if (!is_associative
)
4081 add_to_ops_vec (ops
, binrhs
);
4085 if (!binrhsisreassoc
)
4087 if (rhscode
== MULT_EXPR
4088 && TREE_CODE (binrhs
) == SSA_NAME
4089 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
4091 add_repeat_to_ops_vec (ops
, base
, exponent
);
4092 gimple_set_visited (binrhsdef
, true);
4095 add_to_ops_vec (ops
, binrhs
);
4097 if (rhscode
== MULT_EXPR
4098 && TREE_CODE (binlhs
) == SSA_NAME
4099 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
4101 add_repeat_to_ops_vec (ops
, base
, exponent
);
4102 gimple_set_visited (binlhsdef
, true);
4105 add_to_ops_vec (ops
, binlhs
);
4110 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4112 fprintf (dump_file
, "swapping operands of ");
4113 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4116 swap_ssa_operands (stmt
,
4117 gimple_assign_rhs1_ptr (stmt
),
4118 gimple_assign_rhs2_ptr (stmt
));
4121 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4123 fprintf (dump_file
, " is now ");
4124 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4127 /* We want to make it so the lhs is always the reassociative op,
4129 std::swap (binlhs
, binrhs
);
4131 else if (binrhsisreassoc
)
4133 linearize_expr (stmt
);
4134 binlhs
= gimple_assign_rhs1 (stmt
);
4135 binrhs
= gimple_assign_rhs2 (stmt
);
4138 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
4139 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
4141 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
4142 is_associative
, set_visited
);
4144 if (rhscode
== MULT_EXPR
4145 && TREE_CODE (binrhs
) == SSA_NAME
4146 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
4148 add_repeat_to_ops_vec (ops
, base
, exponent
);
4149 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
4152 add_to_ops_vec (ops
, binrhs
);
4155 /* Repropagate the negates back into subtracts, since no other pass
4156 currently does it. */
4159 repropagate_negates (void)
4164 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
4166 gimple user
= get_single_immediate_use (negate
);
4168 if (!user
|| !is_gimple_assign (user
))
4171 /* The negate operand can be either operand of a PLUS_EXPR
4172 (it can be the LHS if the RHS is a constant for example).
4174 Force the negate operand to the RHS of the PLUS_EXPR, then
4175 transform the PLUS_EXPR into a MINUS_EXPR. */
4176 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
4178 /* If the negated operand appears on the LHS of the
4179 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4180 to force the negated operand to the RHS of the PLUS_EXPR. */
4181 if (gimple_assign_rhs1 (user
) == negate
)
4183 swap_ssa_operands (user
,
4184 gimple_assign_rhs1_ptr (user
),
4185 gimple_assign_rhs2_ptr (user
));
4188 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4189 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4190 if (gimple_assign_rhs2 (user
) == negate
)
4192 tree rhs1
= gimple_assign_rhs1 (user
);
4193 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
4194 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4195 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
4199 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
4201 if (gimple_assign_rhs1 (user
) == negate
)
4206 which we transform into
4209 This pushes down the negate which we possibly can merge
4210 into some other operation, hence insert it into the
4211 plus_negates vector. */
4212 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4213 tree a
= gimple_assign_rhs1 (feed
);
4214 tree b
= gimple_assign_rhs2 (user
);
4215 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
4216 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
4217 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
4218 gimple g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
4219 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
4220 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
4221 user
= gsi_stmt (gsi2
);
4223 reassoc_remove_stmt (&gsi
);
4224 release_defs (feed
);
4225 plus_negates
.safe_push (gimple_assign_lhs (user
));
4229 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4230 rid of one operation. */
4231 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4232 tree a
= gimple_assign_rhs1 (feed
);
4233 tree rhs1
= gimple_assign_rhs1 (user
);
4234 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4235 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
4236 update_stmt (gsi_stmt (gsi
));
4242 /* Returns true if OP is of a type for which we can do reassociation.
4243 That is for integral or non-saturating fixed-point types, and for
4244 floating point type when associative-math is enabled. */
4247 can_reassociate_p (tree op
)
4249 tree type
= TREE_TYPE (op
);
4250 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
4251 || NON_SAT_FIXED_POINT_TYPE_P (type
)
4252 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
4257 /* Break up subtract operations in block BB.
4259 We do this top down because we don't know whether the subtract is
4260 part of a possible chain of reassociation except at the top.
4269 we want to break up k = t - q, but we won't until we've transformed q
4270 = b - r, which won't be broken up until we transform b = c - d.
4272 En passant, clear the GIMPLE visited flag on every statement
4273 and set UIDs within each basic block. */
4276 break_up_subtract_bb (basic_block bb
)
4278 gimple_stmt_iterator gsi
;
4280 unsigned int uid
= 1;
4282 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4284 gimple stmt
= gsi_stmt (gsi
);
4285 gimple_set_visited (stmt
, false);
4286 gimple_set_uid (stmt
, uid
++);
4288 if (!is_gimple_assign (stmt
)
4289 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
4292 /* Look for simple gimple subtract operations. */
4293 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
4295 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
4296 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
4299 /* Check for a subtract used only in an addition. If this
4300 is the case, transform it into add of a negate for better
4301 reassociation. IE transform C = A-B into C = A + -B if C
4302 is only used in an addition. */
4303 if (should_break_up_subtract (stmt
))
4304 break_up_subtract (stmt
, &gsi
);
4306 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
4307 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
4308 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
4310 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
4312 son
= next_dom_son (CDI_DOMINATORS
, son
))
4313 break_up_subtract_bb (son
);
4316 /* Used for repeated factor analysis. */
4317 struct repeat_factor_d
4319 /* An SSA name that occurs in a multiply chain. */
4322 /* Cached rank of the factor. */
4325 /* Number of occurrences of the factor in the chain. */
4326 HOST_WIDE_INT count
;
4328 /* An SSA name representing the product of this factor and
4329 all factors appearing later in the repeated factor vector. */
4333 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
4334 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
4337 static vec
<repeat_factor
> repeat_factor_vec
;
4339 /* Used for sorting the repeat factor vector. Sort primarily by
4340 ascending occurrence count, secondarily by descending rank. */
4343 compare_repeat_factors (const void *x1
, const void *x2
)
4345 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
4346 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
4348 if (rf1
->count
!= rf2
->count
)
4349 return rf1
->count
- rf2
->count
;
4351 return rf2
->rank
- rf1
->rank
;
4354 /* Look for repeated operands in OPS in the multiply tree rooted at
4355 STMT. Replace them with an optimal sequence of multiplies and powi
4356 builtin calls, and remove the used operands from OPS. Return an
4357 SSA name representing the value of the replacement sequence. */
4360 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
4362 unsigned i
, j
, vec_len
;
4365 repeat_factor_t rf1
, rf2
;
4366 repeat_factor rfnew
;
4367 tree result
= NULL_TREE
;
4368 tree target_ssa
, iter_result
;
4369 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4370 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4371 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4372 gimple mul_stmt
, pow_stmt
;
4374 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4379 /* Allocate the repeated factor vector. */
4380 repeat_factor_vec
.create (10);
4382 /* Scan the OPS vector for all SSA names in the product and build
4383 up a vector of occurrence counts for each factor. */
4384 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4386 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4388 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4390 if (rf1
->factor
== oe
->op
)
4392 rf1
->count
+= oe
->count
;
4397 if (j
>= repeat_factor_vec
.length ())
4399 rfnew
.factor
= oe
->op
;
4400 rfnew
.rank
= oe
->rank
;
4401 rfnew
.count
= oe
->count
;
4402 rfnew
.repr
= NULL_TREE
;
4403 repeat_factor_vec
.safe_push (rfnew
);
4408 /* Sort the repeated factor vector by (a) increasing occurrence count,
4409 and (b) decreasing rank. */
4410 repeat_factor_vec
.qsort (compare_repeat_factors
);
4412 /* It is generally best to combine as many base factors as possible
4413 into a product before applying __builtin_powi to the result.
4414 However, the sort order chosen for the repeated factor vector
4415 allows us to cache partial results for the product of the base
4416 factors for subsequent use. When we already have a cached partial
4417 result from a previous iteration, it is best to make use of it
4418 before looking for another __builtin_pow opportunity.
4420 As an example, consider x * x * y * y * y * z * z * z * z.
4421 We want to first compose the product x * y * z, raise it to the
4422 second power, then multiply this by y * z, and finally multiply
4423 by z. This can be done in 5 multiplies provided we cache y * z
4424 for use in both expressions:
4432 If we instead ignored the cached y * z and first multiplied by
4433 the __builtin_pow opportunity z * z, we would get the inferior:
4442 vec_len
= repeat_factor_vec
.length ();
4444 /* Repeatedly look for opportunities to create a builtin_powi call. */
4447 HOST_WIDE_INT power
;
4449 /* First look for the largest cached product of factors from
4450 preceding iterations. If found, create a builtin_powi for
4451 it if the minimum occurrence count for its factors is at
4452 least 2, or just use this cached product as our next
4453 multiplicand if the minimum occurrence count is 1. */
4454 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4456 if (rf1
->repr
&& rf1
->count
> 0)
4466 iter_result
= rf1
->repr
;
4468 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4472 fputs ("Multiplying by cached product ", dump_file
);
4473 for (elt
= j
; elt
< vec_len
; elt
++)
4475 rf
= &repeat_factor_vec
[elt
];
4476 print_generic_expr (dump_file
, rf
->factor
, 0);
4477 if (elt
< vec_len
- 1)
4478 fputs (" * ", dump_file
);
4480 fputs ("\n", dump_file
);
4485 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4486 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4487 build_int_cst (integer_type_node
,
4489 gimple_call_set_lhs (pow_stmt
, iter_result
);
4490 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4491 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4493 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4497 fputs ("Building __builtin_pow call for cached product (",
4499 for (elt
= j
; elt
< vec_len
; elt
++)
4501 rf
= &repeat_factor_vec
[elt
];
4502 print_generic_expr (dump_file
, rf
->factor
, 0);
4503 if (elt
< vec_len
- 1)
4504 fputs (" * ", dump_file
);
4506 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
4513 /* Otherwise, find the first factor in the repeated factor
4514 vector whose occurrence count is at least 2. If no such
4515 factor exists, there are no builtin_powi opportunities
4517 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4519 if (rf1
->count
>= 2)
4528 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4532 fputs ("Building __builtin_pow call for (", dump_file
);
4533 for (elt
= j
; elt
< vec_len
; elt
++)
4535 rf
= &repeat_factor_vec
[elt
];
4536 print_generic_expr (dump_file
, rf
->factor
, 0);
4537 if (elt
< vec_len
- 1)
4538 fputs (" * ", dump_file
);
4540 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4543 reassociate_stats
.pows_created
++;
4545 /* Visit each element of the vector in reverse order (so that
4546 high-occurrence elements are visited first, and within the
4547 same occurrence count, lower-ranked elements are visited
4548 first). Form a linear product of all elements in this order
4549 whose occurrencce count is at least that of element J.
4550 Record the SSA name representing the product of each element
4551 with all subsequent elements in the vector. */
4552 if (j
== vec_len
- 1)
4553 rf1
->repr
= rf1
->factor
;
4556 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4560 rf1
= &repeat_factor_vec
[ii
];
4561 rf2
= &repeat_factor_vec
[ii
+ 1];
4563 /* Init the last factor's representative to be itself. */
4565 rf2
->repr
= rf2
->factor
;
4570 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4571 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
4573 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4574 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4575 rf1
->repr
= target_ssa
;
4577 /* Don't reprocess the multiply we just introduced. */
4578 gimple_set_visited (mul_stmt
, true);
4582 /* Form a call to __builtin_powi for the maximum product
4583 just formed, raised to the power obtained earlier. */
4584 rf1
= &repeat_factor_vec
[j
];
4585 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4586 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4587 build_int_cst (integer_type_node
,
4589 gimple_call_set_lhs (pow_stmt
, iter_result
);
4590 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4591 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4594 /* If we previously formed at least one other builtin_powi call,
4595 form the product of this one and those others. */
4598 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4599 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
4600 result
, iter_result
);
4601 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4602 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4603 gimple_set_visited (mul_stmt
, true);
4604 result
= new_result
;
4607 result
= iter_result
;
4609 /* Decrement the occurrence count of each element in the product
4610 by the count found above, and remove this many copies of each
4612 for (i
= j
; i
< vec_len
; i
++)
4617 rf1
= &repeat_factor_vec
[i
];
4618 rf1
->count
-= power
;
4620 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4622 if (oe
->op
== rf1
->factor
)
4626 ops
->ordered_remove (n
);
4642 /* At this point all elements in the repeated factor vector have a
4643 remaining occurrence count of 0 or 1, and those with a count of 1
4644 don't have cached representatives. Re-sort the ops vector and
4646 ops
->qsort (sort_by_operand_rank
);
4647 repeat_factor_vec
.release ();
4649 /* Return the final product computed herein. Note that there may
4650 still be some elements with single occurrence count left in OPS;
4651 those will be handled by the normal reassociation logic. */
4655 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4658 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4662 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4664 fprintf (dump_file
, "Transforming ");
4665 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4668 rhs1
= gimple_assign_rhs1 (stmt
);
4669 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4671 remove_visited_stmt_chain (rhs1
);
4673 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4675 fprintf (dump_file
, " into ");
4676 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4680 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4683 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4684 tree rhs1
, tree rhs2
)
4686 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4688 fprintf (dump_file
, "Transforming ");
4689 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4692 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4693 update_stmt (gsi_stmt (*gsi
));
4694 remove_visited_stmt_chain (rhs1
);
4696 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4698 fprintf (dump_file
, " into ");
4699 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4703 /* Reassociate expressions in basic block BB and its post-dominator as
4707 reassociate_bb (basic_block bb
)
4709 gimple_stmt_iterator gsi
;
4711 gimple stmt
= last_stmt (bb
);
4713 if (stmt
&& !gimple_visited_p (stmt
))
4714 maybe_optimize_range_tests (stmt
);
4716 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4718 stmt
= gsi_stmt (gsi
);
4720 if (is_gimple_assign (stmt
)
4721 && !stmt_could_throw_p (stmt
))
4723 tree lhs
, rhs1
, rhs2
;
4724 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4726 /* If this is not a gimple binary expression, there is
4727 nothing for us to do with it. */
4728 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4731 /* If this was part of an already processed statement,
4732 we don't need to touch it again. */
4733 if (gimple_visited_p (stmt
))
4735 /* This statement might have become dead because of previous
4737 if (has_zero_uses (gimple_get_lhs (stmt
)))
4739 reassoc_remove_stmt (&gsi
);
4740 release_defs (stmt
);
4741 /* We might end up removing the last stmt above which
4742 places the iterator to the end of the sequence.
4743 Reset it to the last stmt in this case which might
4744 be the end of the sequence as well if we removed
4745 the last statement of the sequence. In which case
4746 we need to bail out. */
4747 if (gsi_end_p (gsi
))
4749 gsi
= gsi_last_bb (bb
);
4750 if (gsi_end_p (gsi
))
4757 lhs
= gimple_assign_lhs (stmt
);
4758 rhs1
= gimple_assign_rhs1 (stmt
);
4759 rhs2
= gimple_assign_rhs2 (stmt
);
4761 /* For non-bit or min/max operations we can't associate
4762 all types. Verify that here. */
4763 if (rhs_code
!= BIT_IOR_EXPR
4764 && rhs_code
!= BIT_AND_EXPR
4765 && rhs_code
!= BIT_XOR_EXPR
4766 && rhs_code
!= MIN_EXPR
4767 && rhs_code
!= MAX_EXPR
4768 && (!can_reassociate_p (lhs
)
4769 || !can_reassociate_p (rhs1
)
4770 || !can_reassociate_p (rhs2
)))
4773 if (associative_tree_code (rhs_code
))
4775 auto_vec
<operand_entry_t
> ops
;
4776 tree powi_result
= NULL_TREE
;
4778 /* There may be no immediate uses left by the time we
4779 get here because we may have eliminated them all. */
4780 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4783 gimple_set_visited (stmt
, true);
4784 linearize_expr_tree (&ops
, stmt
, true, true);
4785 ops
.qsort (sort_by_operand_rank
);
4786 optimize_ops_list (rhs_code
, &ops
);
4787 if (undistribute_ops_list (rhs_code
, &ops
,
4788 loop_containing_stmt (stmt
)))
4790 ops
.qsort (sort_by_operand_rank
);
4791 optimize_ops_list (rhs_code
, &ops
);
4794 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4795 optimize_range_tests (rhs_code
, &ops
);
4797 if (first_pass_instance
4798 && rhs_code
== MULT_EXPR
4799 && flag_unsafe_math_optimizations
)
4800 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4802 /* If the operand vector is now empty, all operands were
4803 consumed by the __builtin_powi optimization. */
4804 if (ops
.length () == 0)
4805 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4806 else if (ops
.length () == 1)
4808 tree last_op
= ops
.last ()->op
;
4811 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4814 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4818 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4819 int ops_num
= ops
.length ();
4820 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4823 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4825 "Width = %d was chosen for reassociation\n", width
);
4828 && ops
.length () > 3)
4829 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
4833 /* When there are three operands left, we want
4834 to make sure the ones that get the double
4835 binary op are chosen wisely. */
4836 int len
= ops
.length ();
4838 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4840 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4841 powi_result
!= NULL
);
4844 /* If we combined some repeated factors into a
4845 __builtin_powi call, multiply that result by the
4846 reassociated operands. */
4849 gimple mul_stmt
, lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4850 tree type
= TREE_TYPE (lhs
);
4851 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4853 gimple_set_lhs (lhs_stmt
, target_ssa
);
4854 update_stmt (lhs_stmt
);
4856 target_ssa
= new_lhs
;
4857 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
4858 powi_result
, target_ssa
);
4859 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4860 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4866 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4868 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4869 reassociate_bb (son
);
4872 /* Add jumps around shifts for range tests turned into bit tests.
4873 For each SSA_NAME VAR we have code like:
4874 VAR = ...; // final stmt of range comparison
4875 // bit test here...;
4876 OTHERVAR = ...; // final stmt of the bit test sequence
4877 RES = VAR | OTHERVAR;
4878 Turn the above into:
4885 // bit test here...;
4888 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4896 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
4898 gimple def_stmt
= SSA_NAME_DEF_STMT (var
);
4901 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
4903 && is_gimple_assign (use_stmt
)
4904 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
4905 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
4907 basic_block cond_bb
= gimple_bb (def_stmt
);
4908 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
4909 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
4911 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
4912 gimple g
= gimple_build_cond (NE_EXPR
, var
,
4913 build_zero_cst (TREE_TYPE (var
)),
4914 NULL_TREE
, NULL_TREE
);
4915 location_t loc
= gimple_location (use_stmt
);
4916 gimple_set_location (g
, loc
);
4917 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
4919 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
4920 etrue
->probability
= REG_BR_PROB_BASE
/ 2;
4921 etrue
->count
= cond_bb
->count
/ 2;
4922 edge efalse
= find_edge (cond_bb
, then_bb
);
4923 efalse
->flags
= EDGE_FALSE_VALUE
;
4924 efalse
->probability
-= etrue
->probability
;
4925 efalse
->count
-= etrue
->count
;
4926 then_bb
->count
-= etrue
->count
;
4928 tree othervar
= NULL_TREE
;
4929 if (gimple_assign_rhs1 (use_stmt
) == var
)
4930 othervar
= gimple_assign_rhs2 (use_stmt
);
4931 else if (gimple_assign_rhs2 (use_stmt
) == var
)
4932 othervar
= gimple_assign_rhs1 (use_stmt
);
4935 tree lhs
= gimple_assign_lhs (use_stmt
);
4936 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
4937 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
4938 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
4939 gsi
= gsi_for_stmt (use_stmt
);
4940 gsi_remove (&gsi
, true);
4942 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
4943 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
4945 reassoc_branch_fixups
.release ();
4948 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4949 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4951 /* Dump the operand entry vector OPS to FILE. */
4954 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4959 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4961 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4962 print_generic_expr (file
, oe
->op
, 0);
4966 /* Dump the operand entry vector OPS to STDERR. */
4969 debug_ops_vector (vec
<operand_entry_t
> ops
)
4971 dump_ops_vector (stderr
, ops
);
4977 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
4978 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
4981 /* Initialize the reassociation pass. */
4988 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
4990 /* Find the loops, so that we can prevent moving calculations in
4992 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
4994 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
4996 next_operand_entry_id
= 0;
4998 /* Reverse RPO (Reverse Post Order) will give us something where
4999 deeper loops come later. */
5000 pre_and_rev_post_order_compute (NULL
, bbs
, false);
5001 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
5002 operand_rank
= new hash_map
<tree
, long>;
5004 /* Give each default definition a distinct rank. This includes
5005 parameters and the static chain. Walk backwards over all
5006 SSA names so that we get proper rank ordering according
5007 to tree_swap_operands_p. */
5008 for (i
= num_ssa_names
- 1; i
> 0; --i
)
5010 tree name
= ssa_name (i
);
5011 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
5012 insert_operand_rank (name
, ++rank
);
5015 /* Set up rank for each BB */
5016 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
5017 bb_rank
[bbs
[i
]] = ++rank
<< 16;
5020 calculate_dominance_info (CDI_POST_DOMINATORS
);
5021 plus_negates
= vNULL
;
5024 /* Cleanup after the reassociation pass, and print stats if
5030 statistics_counter_event (cfun
, "Linearized",
5031 reassociate_stats
.linearized
);
5032 statistics_counter_event (cfun
, "Constants eliminated",
5033 reassociate_stats
.constants_eliminated
);
5034 statistics_counter_event (cfun
, "Ops eliminated",
5035 reassociate_stats
.ops_eliminated
);
5036 statistics_counter_event (cfun
, "Statements rewritten",
5037 reassociate_stats
.rewritten
);
5038 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
5039 reassociate_stats
.pows_encountered
);
5040 statistics_counter_event (cfun
, "Built-in powi calls created",
5041 reassociate_stats
.pows_created
);
5043 delete operand_rank
;
5044 operand_entry_pool
.release ();
5046 plus_negates
.release ();
5047 free_dominance_info (CDI_POST_DOMINATORS
);
5048 loop_optimizer_finalize ();
5051 /* Gate and execute functions for Reassociation. */
5054 execute_reassoc (void)
5059 repropagate_negates ();
5068 const pass_data pass_data_reassoc
=
5070 GIMPLE_PASS
, /* type */
5071 "reassoc", /* name */
5072 OPTGROUP_NONE
, /* optinfo_flags */
5073 TV_TREE_REASSOC
, /* tv_id */
5074 ( PROP_cfg
| PROP_ssa
), /* properties_required */
5075 0, /* properties_provided */
5076 0, /* properties_destroyed */
5077 0, /* todo_flags_start */
5078 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
5081 class pass_reassoc
: public gimple_opt_pass
5084 pass_reassoc (gcc::context
*ctxt
)
5085 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
5088 /* opt_pass methods: */
5089 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
5090 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
5091 virtual unsigned int execute (function
*) { return execute_reassoc (); }
5093 }; // class pass_reassoc
5098 make_pass_reassoc (gcc::context
*ctxt
)
5100 return new pass_reassoc (ctxt
);