1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
24 #include "hash-table.h"
31 #include "double-int.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "hard-reg-set.h"
43 #include "dominance.h"
46 #include "basic-block.h"
47 #include "gimple-pretty-print.h"
48 #include "tree-inline.h"
50 #include "tree-ssa-alias.h"
51 #include "internal-fn.h"
52 #include "gimple-fold.h"
54 #include "gimple-expr.h"
57 #include "gimple-iterator.h"
58 #include "gimplify-me.h"
59 #include "gimple-ssa.h"
61 #include "tree-phinodes.h"
62 #include "ssa-iterators.h"
63 #include "stringpool.h"
64 #include "tree-ssanames.h"
65 #include "tree-ssa-loop-niter.h"
66 #include "tree-ssa-loop.h"
69 #include "statistics.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
83 #include "tree-iterator.h"
84 #include "tree-pass.h"
85 #include "alloc-pool.h"
86 #include "langhooks.h"
90 #include "diagnostic-core.h"
93 #include "insn-codes.h"
96 /* This is a simple global reassociation pass. It is, in part, based
97 on the LLVM pass of the same name (They do some things more/less
98 than we do, in different orders, etc).
100 It consists of five steps:
102 1. Breaking up subtract operations into addition + negate, where
103 it would promote the reassociation of adds.
105 2. Left linearization of the expression trees, so that (A+B)+(C+D)
106 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
107 During linearization, we place the operands of the binary
108 expressions into a vector of operand_entry_t
110 3. Optimization of the operand lists, eliminating things like a +
113 3a. Combine repeated factors with the same occurrence counts
114 into a __builtin_powi call that will later be optimized into
115 an optimal number of multiplies.
117 4. Rewrite the expression trees we linearized and optimized so
118 they are in proper rank order.
120 5. Repropagate negates, as nothing else will clean it up ATM.
122 A bit of theory on #4, since nobody seems to write anything down
123 about why it makes sense to do it the way they do it:
125 We could do this much nicer theoretically, but don't (for reasons
126 explained after how to do it theoretically nice :P).
128 In order to promote the most redundancy elimination, you want
129 binary expressions whose operands are the same rank (or
130 preferably, the same value) exposed to the redundancy eliminator,
131 for possible elimination.
133 So the way to do this if we really cared, is to build the new op
134 tree from the leaves to the roots, merging as you go, and putting the
135 new op on the end of the worklist, until you are left with one
136 thing on the worklist.
138 IE if you have to rewrite the following set of operands (listed with
139 rank in parentheses), with opcode PLUS_EXPR:
141 a (1), b (1), c (1), d (2), e (2)
144 We start with our merge worklist empty, and the ops list with all of
147 You want to first merge all leaves of the same rank, as much as
150 So first build a binary op of
152 mergetmp = a + b, and put "mergetmp" on the merge worklist.
154 Because there is no three operand form of PLUS_EXPR, c is not going to
155 be exposed to redundancy elimination as a rank 1 operand.
157 So you might as well throw it on the merge worklist (you could also
158 consider it to now be a rank two operand, and merge it with d and e,
159 but in this case, you then have evicted e from a binary op. So at
160 least in this situation, you can't win.)
162 Then build a binary op of d + e
165 and put mergetmp2 on the merge worklist.
167 so merge worklist = {mergetmp, c, mergetmp2}
169 Continue building binary ops of these operations until you have only
170 one operation left on the worklist.
175 mergetmp3 = mergetmp + c
177 worklist = {mergetmp2, mergetmp3}
179 mergetmp4 = mergetmp2 + mergetmp3
181 worklist = {mergetmp4}
183 because we have one operation left, we can now just set the original
184 statement equal to the result of that operation.
186 This will at least expose a + b and d + e to redundancy elimination
187 as binary operations.
189 For extra points, you can reuse the old statements to build the
190 mergetmps, since you shouldn't run out.
192 So why don't we do this?
194 Because it's expensive, and rarely will help. Most trees we are
195 reassociating have 3 or less ops. If they have 2 ops, they already
196 will be written into a nice single binary op. If you have 3 ops, a
197 single simple check suffices to tell you whether the first two are of the
198 same rank. If so, you know to order it
201 newstmt = mergetmp + op3
205 newstmt = mergetmp + op1
207 If all three are of the same rank, you can't expose them all in a
208 single binary operator anyway, so the above is *still* the best you
211 Thus, this is what we do. When we have three ops left, we check to see
212 what order to put them in, and call it a day. As a nod to vector sum
213 reduction, we check if any of the ops are really a phi node that is a
214 destructive update for the associating op, and keep the destructive
215 update together for vector sum reduction recognition. */
222 int constants_eliminated
;
225 int pows_encountered
;
229 /* Operator, rank pair. */
230 typedef struct operand_entry
238 static alloc_pool operand_entry_pool
;
240 /* This is used to assign a unique ID to each struct operand_entry
241 so that qsort results are identical on different hosts. */
242 static int next_operand_entry_id
;
244 /* Starting rank number for a given basic block, so that we can rank
245 operations using unmovable instructions in that BB based on the bb
247 static long *bb_rank
;
249 /* Operand->rank hashtable. */
250 static hash_map
<tree
, long> *operand_rank
;
252 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
253 all basic blocks the CFG should be adjusted - basic blocks
254 split right after that SSA_NAME's definition statement and before
255 the only use, which must be a bit ior. */
256 static vec
<tree
> reassoc_branch_fixups
;
259 static long get_rank (tree
);
260 static bool reassoc_stmt_dominates_stmt_p (gimple
, gimple
);
262 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
263 possibly added by gsi_remove. */
266 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
268 gimple stmt
= gsi_stmt (*gsi
);
270 if (!MAY_HAVE_DEBUG_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
271 return gsi_remove (gsi
, true);
273 gimple_stmt_iterator prev
= *gsi
;
275 unsigned uid
= gimple_uid (stmt
);
276 basic_block bb
= gimple_bb (stmt
);
277 bool ret
= gsi_remove (gsi
, true);
278 if (!gsi_end_p (prev
))
281 prev
= gsi_start_bb (bb
);
282 gimple end_stmt
= gsi_stmt (*gsi
);
283 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
285 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
286 gimple_set_uid (stmt
, uid
);
292 /* Bias amount for loop-carried phis. We want this to be larger than
293 the depth of any reassociation tree we can see, but not larger than
294 the rank difference between two blocks. */
295 #define PHI_LOOP_BIAS (1 << 15)
297 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
298 an innermost loop, and the phi has only a single use which is inside
299 the loop, then the rank is the block rank of the loop latch plus an
300 extra bias for the loop-carried dependence. This causes expressions
301 calculated into an accumulator variable to be independent for each
302 iteration of the loop. If STMT is some other phi, the rank is the
303 block rank of its containing block. */
305 phi_rank (gimple stmt
)
307 basic_block bb
= gimple_bb (stmt
);
308 struct loop
*father
= bb
->loop_father
;
314 /* We only care about real loops (those with a latch). */
316 return bb_rank
[bb
->index
];
318 /* Interesting phis must be in headers of innermost loops. */
319 if (bb
!= father
->header
321 return bb_rank
[bb
->index
];
323 /* Ignore virtual SSA_NAMEs. */
324 res
= gimple_phi_result (stmt
);
325 if (virtual_operand_p (res
))
326 return bb_rank
[bb
->index
];
328 /* The phi definition must have a single use, and that use must be
329 within the loop. Otherwise this isn't an accumulator pattern. */
330 if (!single_imm_use (res
, &use
, &use_stmt
)
331 || gimple_bb (use_stmt
)->loop_father
!= father
)
332 return bb_rank
[bb
->index
];
334 /* Look for phi arguments from within the loop. If found, bias this phi. */
335 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
337 tree arg
= gimple_phi_arg_def (stmt
, i
);
338 if (TREE_CODE (arg
) == SSA_NAME
339 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
341 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
342 if (gimple_bb (def_stmt
)->loop_father
== father
)
343 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
347 /* Must be an uninteresting phi. */
348 return bb_rank
[bb
->index
];
351 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
352 loop-carried dependence of an innermost loop, return TRUE; else
355 loop_carried_phi (tree exp
)
360 if (TREE_CODE (exp
) != SSA_NAME
361 || SSA_NAME_IS_DEFAULT_DEF (exp
))
364 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
366 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
369 /* Non-loop-carried phis have block rank. Loop-carried phis have
370 an additional bias added in. If this phi doesn't have block rank,
371 it's biased and should not be propagated. */
372 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
374 if (phi_rank (phi_stmt
) != block_rank
)
380 /* Return the maximum of RANK and the rank that should be propagated
381 from expression OP. For most operands, this is just the rank of OP.
382 For loop-carried phis, the value is zero to avoid undoing the bias
383 in favor of the phi. */
385 propagate_rank (long rank
, tree op
)
389 if (loop_carried_phi (op
))
392 op_rank
= get_rank (op
);
394 return MAX (rank
, op_rank
);
397 /* Look up the operand rank structure for expression E. */
400 find_operand_rank (tree e
)
402 long *slot
= operand_rank
->get (e
);
403 return slot
? *slot
: -1;
406 /* Insert {E,RANK} into the operand rank hashtable. */
409 insert_operand_rank (tree e
, long rank
)
411 gcc_assert (rank
> 0);
412 gcc_assert (!operand_rank
->put (e
, rank
));
415 /* Given an expression E, return the rank of the expression. */
420 /* Constants have rank 0. */
421 if (is_gimple_min_invariant (e
))
424 /* SSA_NAME's have the rank of the expression they are the result
426 For globals and uninitialized values, the rank is 0.
427 For function arguments, use the pre-setup rank.
428 For PHI nodes, stores, asm statements, etc, we use the rank of
430 For simple operations, the rank is the maximum rank of any of
431 its operands, or the bb_rank, whichever is less.
432 I make no claims that this is optimal, however, it gives good
435 /* We make an exception to the normal ranking system to break
436 dependences of accumulator variables in loops. Suppose we
437 have a simple one-block loop containing:
444 As shown, each iteration of the calculation into x is fully
445 dependent upon the iteration before it. We would prefer to
446 see this in the form:
453 If the loop is unrolled, the calculations of b and c from
454 different iterations can be interleaved.
456 To obtain this result during reassociation, we bias the rank
457 of the phi definition x_1 upward, when it is recognized as an
458 accumulator pattern. The artificial rank causes it to be
459 added last, providing the desired independence. */
461 if (TREE_CODE (e
) == SSA_NAME
)
468 if (SSA_NAME_IS_DEFAULT_DEF (e
))
469 return find_operand_rank (e
);
471 stmt
= SSA_NAME_DEF_STMT (e
);
472 if (gimple_code (stmt
) == GIMPLE_PHI
)
473 return phi_rank (stmt
);
475 if (!is_gimple_assign (stmt
)
476 || gimple_vdef (stmt
))
477 return bb_rank
[gimple_bb (stmt
)->index
];
479 /* If we already have a rank for this expression, use that. */
480 rank
= find_operand_rank (e
);
484 /* Otherwise, find the maximum rank for the operands. As an
485 exception, remove the bias from loop-carried phis when propagating
486 the rank so that dependent operations are not also biased. */
488 if (gimple_assign_single_p (stmt
))
490 tree rhs
= gimple_assign_rhs1 (stmt
);
491 n
= TREE_OPERAND_LENGTH (rhs
);
493 rank
= propagate_rank (rank
, rhs
);
496 for (i
= 0; i
< n
; i
++)
498 op
= TREE_OPERAND (rhs
, i
);
501 rank
= propagate_rank (rank
, op
);
507 n
= gimple_num_ops (stmt
);
508 for (i
= 1; i
< n
; i
++)
510 op
= gimple_op (stmt
, i
);
512 rank
= propagate_rank (rank
, op
);
516 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
518 fprintf (dump_file
, "Rank for ");
519 print_generic_expr (dump_file
, e
, 0);
520 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
523 /* Note the rank in the hashtable so we don't recompute it. */
524 insert_operand_rank (e
, (rank
+ 1));
528 /* Globals, etc, are rank 0 */
533 /* We want integer ones to end up last no matter what, since they are
534 the ones we can do the most with. */
535 #define INTEGER_CONST_TYPE 1 << 3
536 #define FLOAT_CONST_TYPE 1 << 2
537 #define OTHER_CONST_TYPE 1 << 1
539 /* Classify an invariant tree into integer, float, or other, so that
540 we can sort them to be near other constants of the same type. */
542 constant_type (tree t
)
544 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
545 return INTEGER_CONST_TYPE
;
546 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
547 return FLOAT_CONST_TYPE
;
549 return OTHER_CONST_TYPE
;
552 /* qsort comparison function to sort operand entries PA and PB by rank
553 so that the sorted array is ordered by rank in decreasing order. */
555 sort_by_operand_rank (const void *pa
, const void *pb
)
557 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
558 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
560 /* It's nicer for optimize_expression if constants that are likely
561 to fold when added/multiplied//whatever are put next to each
562 other. Since all constants have rank 0, order them by type. */
563 if (oeb
->rank
== 0 && oea
->rank
== 0)
565 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
566 return constant_type (oeb
->op
) - constant_type (oea
->op
);
568 /* To make sorting result stable, we use unique IDs to determine
570 return oeb
->id
- oea
->id
;
573 /* Lastly, make sure the versions that are the same go next to each
575 if ((oeb
->rank
- oea
->rank
== 0)
576 && TREE_CODE (oea
->op
) == SSA_NAME
577 && TREE_CODE (oeb
->op
) == SSA_NAME
)
579 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
580 versions of removed SSA_NAMEs, so if possible, prefer to sort
581 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
583 if (!SSA_NAME_IS_DEFAULT_DEF (oea
->op
)
584 && !SSA_NAME_IS_DEFAULT_DEF (oeb
->op
)
585 && SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
587 gimple stmta
= SSA_NAME_DEF_STMT (oea
->op
);
588 gimple stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
589 basic_block bba
= gimple_bb (stmta
);
590 basic_block bbb
= gimple_bb (stmtb
);
593 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
594 return bb_rank
[bbb
->index
] - bb_rank
[bba
->index
];
598 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
599 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
605 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
606 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
608 return oeb
->id
- oea
->id
;
611 if (oeb
->rank
!= oea
->rank
)
612 return oeb
->rank
- oea
->rank
;
614 return oeb
->id
- oea
->id
;
617 /* Add an operand entry to *OPS for the tree operand OP. */
620 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
622 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
625 oe
->rank
= get_rank (op
);
626 oe
->id
= next_operand_entry_id
++;
631 /* Add an operand entry to *OPS for the tree operand OP with repeat
635 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
636 HOST_WIDE_INT repeat
)
638 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
641 oe
->rank
= get_rank (op
);
642 oe
->id
= next_operand_entry_id
++;
646 reassociate_stats
.pows_encountered
++;
649 /* Return true if STMT is reassociable operation containing a binary
650 operation with tree code CODE, and is inside LOOP. */
653 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
655 basic_block bb
= gimple_bb (stmt
);
657 if (gimple_bb (stmt
) == NULL
)
660 if (!flow_bb_inside_loop_p (loop
, bb
))
663 if (is_gimple_assign (stmt
)
664 && gimple_assign_rhs_code (stmt
) == code
665 && has_single_use (gimple_assign_lhs (stmt
)))
672 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
673 operand of the negate operation. Otherwise, return NULL. */
676 get_unary_op (tree name
, enum tree_code opcode
)
678 gimple stmt
= SSA_NAME_DEF_STMT (name
);
680 if (!is_gimple_assign (stmt
))
683 if (gimple_assign_rhs_code (stmt
) == opcode
)
684 return gimple_assign_rhs1 (stmt
);
688 /* If CURR and LAST are a pair of ops that OPCODE allows us to
689 eliminate through equivalences, do so, remove them from OPS, and
690 return true. Otherwise, return false. */
693 eliminate_duplicate_pair (enum tree_code opcode
,
694 vec
<operand_entry_t
> *ops
,
697 operand_entry_t curr
,
698 operand_entry_t last
)
701 /* If we have two of the same op, and the opcode is & |, min, or max,
702 we can eliminate one of them.
703 If we have two of the same op, and the opcode is ^, we can
704 eliminate both of them. */
706 if (last
&& last
->op
== curr
->op
)
714 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
716 fprintf (dump_file
, "Equivalence: ");
717 print_generic_expr (dump_file
, curr
->op
, 0);
718 fprintf (dump_file
, " [&|minmax] ");
719 print_generic_expr (dump_file
, last
->op
, 0);
720 fprintf (dump_file
, " -> ");
721 print_generic_stmt (dump_file
, last
->op
, 0);
724 ops
->ordered_remove (i
);
725 reassociate_stats
.ops_eliminated
++;
730 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
732 fprintf (dump_file
, "Equivalence: ");
733 print_generic_expr (dump_file
, curr
->op
, 0);
734 fprintf (dump_file
, " ^ ");
735 print_generic_expr (dump_file
, last
->op
, 0);
736 fprintf (dump_file
, " -> nothing\n");
739 reassociate_stats
.ops_eliminated
+= 2;
741 if (ops
->length () == 2)
744 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
749 ops
->ordered_remove (i
-1);
750 ops
->ordered_remove (i
-1);
762 static vec
<tree
> plus_negates
;
764 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
765 expression, look in OPS for a corresponding positive operation to cancel
766 it out. If we find one, remove the other from OPS, replace
767 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
771 eliminate_plus_minus_pair (enum tree_code opcode
,
772 vec
<operand_entry_t
> *ops
,
773 unsigned int currindex
,
774 operand_entry_t curr
)
781 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
784 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
785 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
786 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
789 /* Any non-negated version will have a rank that is one less than
790 the current rank. So once we hit those ranks, if we don't find
793 for (i
= currindex
+ 1;
794 ops
->iterate (i
, &oe
)
795 && oe
->rank
>= curr
->rank
- 1 ;
798 if (oe
->op
== negateop
)
801 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
803 fprintf (dump_file
, "Equivalence: ");
804 print_generic_expr (dump_file
, negateop
, 0);
805 fprintf (dump_file
, " + -");
806 print_generic_expr (dump_file
, oe
->op
, 0);
807 fprintf (dump_file
, " -> 0\n");
810 ops
->ordered_remove (i
);
811 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
812 ops
->ordered_remove (currindex
);
813 reassociate_stats
.ops_eliminated
++;
817 else if (oe
->op
== notop
)
819 tree op_type
= TREE_TYPE (oe
->op
);
821 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
823 fprintf (dump_file
, "Equivalence: ");
824 print_generic_expr (dump_file
, notop
, 0);
825 fprintf (dump_file
, " + ~");
826 print_generic_expr (dump_file
, oe
->op
, 0);
827 fprintf (dump_file
, " -> -1\n");
830 ops
->ordered_remove (i
);
831 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
832 ops
->ordered_remove (currindex
);
833 reassociate_stats
.ops_eliminated
++;
839 /* CURR->OP is a negate expr in a plus expr: save it for later
840 inspection in repropagate_negates(). */
841 if (negateop
!= NULL_TREE
)
842 plus_negates
.safe_push (curr
->op
);
847 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
848 bitwise not expression, look in OPS for a corresponding operand to
849 cancel it out. If we find one, remove the other from OPS, replace
850 OPS[CURRINDEX] with 0, and return true. Otherwise, return
854 eliminate_not_pairs (enum tree_code opcode
,
855 vec
<operand_entry_t
> *ops
,
856 unsigned int currindex
,
857 operand_entry_t curr
)
863 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
864 || TREE_CODE (curr
->op
) != SSA_NAME
)
867 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
868 if (notop
== NULL_TREE
)
871 /* Any non-not version will have a rank that is one less than
872 the current rank. So once we hit those ranks, if we don't find
875 for (i
= currindex
+ 1;
876 ops
->iterate (i
, &oe
)
877 && oe
->rank
>= curr
->rank
- 1;
882 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
884 fprintf (dump_file
, "Equivalence: ");
885 print_generic_expr (dump_file
, notop
, 0);
886 if (opcode
== BIT_AND_EXPR
)
887 fprintf (dump_file
, " & ~");
888 else if (opcode
== BIT_IOR_EXPR
)
889 fprintf (dump_file
, " | ~");
890 print_generic_expr (dump_file
, oe
->op
, 0);
891 if (opcode
== BIT_AND_EXPR
)
892 fprintf (dump_file
, " -> 0\n");
893 else if (opcode
== BIT_IOR_EXPR
)
894 fprintf (dump_file
, " -> -1\n");
897 if (opcode
== BIT_AND_EXPR
)
898 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
899 else if (opcode
== BIT_IOR_EXPR
)
900 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
902 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
904 ops
->quick_push (oe
);
912 /* Use constant value that may be present in OPS to try to eliminate
913 operands. Note that this function is only really used when we've
914 eliminated ops for other reasons, or merged constants. Across
915 single statements, fold already does all of this, plus more. There
916 is little point in duplicating logic, so I've only included the
917 identities that I could ever construct testcases to trigger. */
920 eliminate_using_constants (enum tree_code opcode
,
921 vec
<operand_entry_t
> *ops
)
923 operand_entry_t oelast
= ops
->last ();
924 tree type
= TREE_TYPE (oelast
->op
);
926 if (oelast
->rank
== 0
927 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
932 if (integer_zerop (oelast
->op
))
934 if (ops
->length () != 1)
936 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
937 fprintf (dump_file
, "Found & 0, removing all other ops\n");
939 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
942 ops
->quick_push (oelast
);
946 else if (integer_all_onesp (oelast
->op
))
948 if (ops
->length () != 1)
950 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
951 fprintf (dump_file
, "Found & -1, removing\n");
953 reassociate_stats
.ops_eliminated
++;
958 if (integer_all_onesp (oelast
->op
))
960 if (ops
->length () != 1)
962 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
963 fprintf (dump_file
, "Found | -1, removing all other ops\n");
965 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
968 ops
->quick_push (oelast
);
972 else if (integer_zerop (oelast
->op
))
974 if (ops
->length () != 1)
976 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
977 fprintf (dump_file
, "Found | 0, removing\n");
979 reassociate_stats
.ops_eliminated
++;
984 if (integer_zerop (oelast
->op
)
985 || (FLOAT_TYPE_P (type
)
986 && !HONOR_NANS (type
)
987 && !HONOR_SIGNED_ZEROS (type
)
988 && real_zerop (oelast
->op
)))
990 if (ops
->length () != 1)
992 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
993 fprintf (dump_file
, "Found * 0, removing all other ops\n");
995 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
997 ops
->quick_push (oelast
);
1001 else if (integer_onep (oelast
->op
)
1002 || (FLOAT_TYPE_P (type
)
1003 && !HONOR_SNANS (type
)
1004 && real_onep (oelast
->op
)))
1006 if (ops
->length () != 1)
1008 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1009 fprintf (dump_file
, "Found * 1, removing\n");
1011 reassociate_stats
.ops_eliminated
++;
1019 if (integer_zerop (oelast
->op
)
1020 || (FLOAT_TYPE_P (type
)
1021 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1022 && fold_real_zero_addition_p (type
, oelast
->op
,
1023 opcode
== MINUS_EXPR
)))
1025 if (ops
->length () != 1)
1027 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1028 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1030 reassociate_stats
.ops_eliminated
++;
1042 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
1045 /* Structure for tracking and counting operands. */
1046 typedef struct oecount_s
{
1049 enum tree_code oecode
;
1054 /* The heap for the oecount hashtable and the sorted list of operands. */
1055 static vec
<oecount
> cvec
;
1058 /* Oecount hashtable helpers. */
1060 struct oecount_hasher
1062 typedef int value_type
;
1063 typedef int compare_type
;
1064 typedef int store_values_directly
;
1065 static inline hashval_t
hash (const value_type
&);
1066 static inline bool equal (const value_type
&, const compare_type
&);
1067 static bool is_deleted (int &v
) { return v
== 1; }
1068 static void mark_deleted (int &e
) { e
= 1; }
1069 static bool is_empty (int &v
) { return v
== 0; }
1070 static void mark_empty (int &e
) { e
= 0; }
1071 static void remove (int &) {}
1074 /* Hash function for oecount. */
1077 oecount_hasher::hash (const value_type
&p
)
1079 const oecount
*c
= &cvec
[p
- 42];
1080 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1083 /* Comparison function for oecount. */
1086 oecount_hasher::equal (const value_type
&p1
, const compare_type
&p2
)
1088 const oecount
*c1
= &cvec
[p1
- 42];
1089 const oecount
*c2
= &cvec
[p2
- 42];
1090 return (c1
->oecode
== c2
->oecode
1091 && c1
->op
== c2
->op
);
1094 /* Comparison function for qsort sorting oecount elements by count. */
1097 oecount_cmp (const void *p1
, const void *p2
)
1099 const oecount
*c1
= (const oecount
*)p1
;
1100 const oecount
*c2
= (const oecount
*)p2
;
1101 if (c1
->cnt
!= c2
->cnt
)
1102 return c1
->cnt
- c2
->cnt
;
1104 /* If counts are identical, use unique IDs to stabilize qsort. */
1105 return c1
->id
- c2
->id
;
1108 /* Return TRUE iff STMT represents a builtin call that raises OP
1109 to some exponent. */
1112 stmt_is_power_of_op (gimple stmt
, tree op
)
1116 if (!is_gimple_call (stmt
))
1119 fndecl
= gimple_call_fndecl (stmt
);
1122 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1125 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1127 CASE_FLT_FN (BUILT_IN_POW
):
1128 CASE_FLT_FN (BUILT_IN_POWI
):
1129 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1136 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1137 in place and return the result. Assumes that stmt_is_power_of_op
1138 was previously called for STMT and returned TRUE. */
1140 static HOST_WIDE_INT
1141 decrement_power (gimple stmt
)
1143 REAL_VALUE_TYPE c
, cint
;
1144 HOST_WIDE_INT power
;
1147 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1149 CASE_FLT_FN (BUILT_IN_POW
):
1150 arg1
= gimple_call_arg (stmt
, 1);
1151 c
= TREE_REAL_CST (arg1
);
1152 power
= real_to_integer (&c
) - 1;
1153 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1154 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1157 CASE_FLT_FN (BUILT_IN_POWI
):
1158 arg1
= gimple_call_arg (stmt
, 1);
1159 power
= TREE_INT_CST_LOW (arg1
) - 1;
1160 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1168 /* Find the single immediate use of STMT's LHS, and replace it
1169 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1170 replace *DEF with OP as well. */
1173 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1178 gimple_stmt_iterator gsi
;
1180 if (is_gimple_call (stmt
))
1181 lhs
= gimple_call_lhs (stmt
);
1183 lhs
= gimple_assign_lhs (stmt
);
1185 gcc_assert (has_single_use (lhs
));
1186 single_imm_use (lhs
, &use
, &use_stmt
);
1190 if (TREE_CODE (op
) != SSA_NAME
)
1191 update_stmt (use_stmt
);
1192 gsi
= gsi_for_stmt (stmt
);
1193 unlink_stmt_vdef (stmt
);
1194 reassoc_remove_stmt (&gsi
);
1195 release_defs (stmt
);
1198 /* Walks the linear chain with result *DEF searching for an operation
1199 with operand OP and code OPCODE removing that from the chain. *DEF
1200 is updated if there is only one operand but no operation left. */
1203 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1205 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1211 if (opcode
== MULT_EXPR
1212 && stmt_is_power_of_op (stmt
, op
))
1214 if (decrement_power (stmt
) == 1)
1215 propagate_op_to_single_use (op
, stmt
, def
);
1219 name
= gimple_assign_rhs1 (stmt
);
1221 /* If this is the operation we look for and one of the operands
1222 is ours simply propagate the other operand into the stmts
1224 if (gimple_assign_rhs_code (stmt
) == opcode
1226 || gimple_assign_rhs2 (stmt
) == op
))
1229 name
= gimple_assign_rhs2 (stmt
);
1230 propagate_op_to_single_use (name
, stmt
, def
);
1234 /* We might have a multiply of two __builtin_pow* calls, and
1235 the operand might be hiding in the rightmost one. */
1236 if (opcode
== MULT_EXPR
1237 && gimple_assign_rhs_code (stmt
) == opcode
1238 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1239 && has_single_use (gimple_assign_rhs2 (stmt
)))
1241 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1242 if (stmt_is_power_of_op (stmt2
, op
))
1244 if (decrement_power (stmt2
) == 1)
1245 propagate_op_to_single_use (op
, stmt2
, def
);
1250 /* Continue walking the chain. */
1251 gcc_assert (name
!= op
1252 && TREE_CODE (name
) == SSA_NAME
);
1253 stmt
= SSA_NAME_DEF_STMT (name
);
1258 /* Returns true if statement S1 dominates statement S2. Like
1259 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1262 reassoc_stmt_dominates_stmt_p (gimple s1
, gimple s2
)
1264 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1266 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1267 SSA_NAME. Assume it lives at the beginning of function and
1268 thus dominates everything. */
1269 if (!bb1
|| s1
== s2
)
1272 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1278 /* PHIs in the same basic block are assumed to be
1279 executed all in parallel, if only one stmt is a PHI,
1280 it dominates the other stmt in the same basic block. */
1281 if (gimple_code (s1
) == GIMPLE_PHI
)
1284 if (gimple_code (s2
) == GIMPLE_PHI
)
1287 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1289 if (gimple_uid (s1
) < gimple_uid (s2
))
1292 if (gimple_uid (s1
) > gimple_uid (s2
))
1295 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1296 unsigned int uid
= gimple_uid (s1
);
1297 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1299 gimple s
= gsi_stmt (gsi
);
1300 if (gimple_uid (s
) != uid
)
1309 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1312 /* Insert STMT after INSERT_POINT. */
1315 insert_stmt_after (gimple stmt
, gimple insert_point
)
1317 gimple_stmt_iterator gsi
;
1320 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1321 bb
= gimple_bb (insert_point
);
1322 else if (!stmt_ends_bb_p (insert_point
))
1324 gsi
= gsi_for_stmt (insert_point
);
1325 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1326 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1330 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1331 thus if it must end a basic block, it should be a call that can
1332 throw, or some assignment that can throw. If it throws, the LHS
1333 of it will not be initialized though, so only valid places using
1334 the SSA_NAME should be dominated by the fallthru edge. */
1335 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1336 gsi
= gsi_after_labels (bb
);
1337 if (gsi_end_p (gsi
))
1339 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1340 gimple_set_uid (stmt
,
1341 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1344 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1345 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1348 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1349 the result. Places the statement after the definition of either
1350 OP1 or OP2. Returns the new statement. */
1353 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1355 gimple op1def
= NULL
, op2def
= NULL
;
1356 gimple_stmt_iterator gsi
;
1360 /* Create the addition statement. */
1361 op
= make_ssa_name (type
);
1362 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1364 /* Find an insertion place and insert. */
1365 if (TREE_CODE (op1
) == SSA_NAME
)
1366 op1def
= SSA_NAME_DEF_STMT (op1
);
1367 if (TREE_CODE (op2
) == SSA_NAME
)
1368 op2def
= SSA_NAME_DEF_STMT (op2
);
1369 if ((!op1def
|| gimple_nop_p (op1def
))
1370 && (!op2def
|| gimple_nop_p (op2def
)))
1372 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1373 if (gsi_end_p (gsi
))
1375 gimple_stmt_iterator gsi2
1376 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1377 gimple_set_uid (sum
,
1378 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1381 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1382 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1386 gimple insert_point
;
1387 if ((!op1def
|| gimple_nop_p (op1def
))
1388 || (op2def
&& !gimple_nop_p (op2def
)
1389 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1390 insert_point
= op2def
;
1392 insert_point
= op1def
;
1393 insert_stmt_after (sum
, insert_point
);
1400 /* Perform un-distribution of divisions and multiplications.
1401 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1402 to (A + B) / X for real X.
1404 The algorithm is organized as follows.
1406 - First we walk the addition chain *OPS looking for summands that
1407 are defined by a multiplication or a real division. This results
1408 in the candidates bitmap with relevant indices into *OPS.
1410 - Second we build the chains of multiplications or divisions for
1411 these candidates, counting the number of occurrences of (operand, code)
1412 pairs in all of the candidates chains.
1414 - Third we sort the (operand, code) pairs by number of occurrence and
1415 process them starting with the pair with the most uses.
1417 * For each such pair we walk the candidates again to build a
1418 second candidate bitmap noting all multiplication/division chains
1419 that have at least one occurrence of (operand, code).
1421 * We build an alternate addition chain only covering these
1422 candidates with one (operand, code) operation removed from their
1423 multiplication/division chain.
1425 * The first candidate gets replaced by the alternate addition chain
1426 multiplied/divided by the operand.
1428 * All candidate chains get disabled for further processing and
1429 processing of (operand, code) pairs continues.
1431 The alternate addition chains built are re-processed by the main
1432 reassociation algorithm which allows optimizing a * x * y + b * y * x
1433 to (a + b ) * x * y in one invocation of the reassociation pass. */
1436 undistribute_ops_list (enum tree_code opcode
,
1437 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1439 unsigned int length
= ops
->length ();
1440 operand_entry_t oe1
;
1442 sbitmap candidates
, candidates2
;
1443 unsigned nr_candidates
, nr_candidates2
;
1444 sbitmap_iterator sbi0
;
1445 vec
<operand_entry_t
> *subops
;
1446 bool changed
= false;
1447 int next_oecount_id
= 0;
1450 || opcode
!= PLUS_EXPR
)
1453 /* Build a list of candidates to process. */
1454 candidates
= sbitmap_alloc (length
);
1455 bitmap_clear (candidates
);
1457 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1459 enum tree_code dcode
;
1462 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1464 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1465 if (!is_gimple_assign (oe1def
))
1467 dcode
= gimple_assign_rhs_code (oe1def
);
1468 if ((dcode
!= MULT_EXPR
1469 && dcode
!= RDIV_EXPR
)
1470 || !is_reassociable_op (oe1def
, dcode
, loop
))
1473 bitmap_set_bit (candidates
, i
);
1477 if (nr_candidates
< 2)
1479 sbitmap_free (candidates
);
1483 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1485 fprintf (dump_file
, "searching for un-distribute opportunities ");
1486 print_generic_expr (dump_file
,
1487 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1488 fprintf (dump_file
, " %d\n", nr_candidates
);
1491 /* Build linearized sub-operand lists and the counting table. */
1494 hash_table
<oecount_hasher
> ctable (15);
1496 /* ??? Macro arguments cannot have multi-argument template types in
1497 them. This typedef is needed to workaround that limitation. */
1498 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1499 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1500 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1503 enum tree_code oecode
;
1506 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1507 oecode
= gimple_assign_rhs_code (oedef
);
1508 linearize_expr_tree (&subops
[i
], oedef
,
1509 associative_tree_code (oecode
), false);
1511 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1518 c
.id
= next_oecount_id
++;
1521 idx
= cvec
.length () + 41;
1522 slot
= ctable
.find_slot (idx
, INSERT
);
1530 cvec
[*slot
- 42].cnt
++;
1535 /* Sort the counting table. */
1536 cvec
.qsort (oecount_cmp
);
1538 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1541 fprintf (dump_file
, "Candidates:\n");
1542 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1544 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1545 c
->oecode
== MULT_EXPR
1546 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1547 print_generic_expr (dump_file
, c
->op
, 0);
1548 fprintf (dump_file
, "\n");
1552 /* Process the (operand, code) pairs in order of most occurrence. */
1553 candidates2
= sbitmap_alloc (length
);
1554 while (!cvec
.is_empty ())
1556 oecount
*c
= &cvec
.last ();
1560 /* Now collect the operands in the outer chain that contain
1561 the common operand in their inner chain. */
1562 bitmap_clear (candidates2
);
1564 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1567 enum tree_code oecode
;
1569 tree op
= (*ops
)[i
]->op
;
1571 /* If we undistributed in this chain already this may be
1573 if (TREE_CODE (op
) != SSA_NAME
)
1576 oedef
= SSA_NAME_DEF_STMT (op
);
1577 oecode
= gimple_assign_rhs_code (oedef
);
1578 if (oecode
!= c
->oecode
)
1581 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1583 if (oe1
->op
== c
->op
)
1585 bitmap_set_bit (candidates2
, i
);
1592 if (nr_candidates2
>= 2)
1594 operand_entry_t oe1
, oe2
;
1596 int first
= bitmap_first_set_bit (candidates2
);
1598 /* Build the new addition chain. */
1599 oe1
= (*ops
)[first
];
1600 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1602 fprintf (dump_file
, "Building (");
1603 print_generic_expr (dump_file
, oe1
->op
, 0);
1605 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1606 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1610 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1612 fprintf (dump_file
, " + ");
1613 print_generic_expr (dump_file
, oe2
->op
, 0);
1615 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1616 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1617 oe1
->op
, oe2
->op
, opcode
);
1618 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1620 oe1
->op
= gimple_get_lhs (sum
);
1623 /* Apply the multiplication/division. */
1624 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1625 oe1
->op
, c
->op
, c
->oecode
);
1626 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1628 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1629 print_generic_expr (dump_file
, c
->op
, 0);
1630 fprintf (dump_file
, "\n");
1633 /* Record it in the addition chain and disable further
1634 undistribution with this op. */
1635 oe1
->op
= gimple_assign_lhs (prod
);
1636 oe1
->rank
= get_rank (oe1
->op
);
1637 subops
[first
].release ();
1645 for (i
= 0; i
< ops
->length (); ++i
)
1646 subops
[i
].release ();
1649 sbitmap_free (candidates
);
1650 sbitmap_free (candidates2
);
1655 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1656 expression, examine the other OPS to see if any of them are comparisons
1657 of the same values, which we may be able to combine or eliminate.
1658 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1661 eliminate_redundant_comparison (enum tree_code opcode
,
1662 vec
<operand_entry_t
> *ops
,
1663 unsigned int currindex
,
1664 operand_entry_t curr
)
1667 enum tree_code lcode
, rcode
;
1672 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1675 /* Check that CURR is a comparison. */
1676 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1678 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1679 if (!is_gimple_assign (def1
))
1681 lcode
= gimple_assign_rhs_code (def1
);
1682 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1684 op1
= gimple_assign_rhs1 (def1
);
1685 op2
= gimple_assign_rhs2 (def1
);
1687 /* Now look for a similar comparison in the remaining OPS. */
1688 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1692 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1694 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1695 if (!is_gimple_assign (def2
))
1697 rcode
= gimple_assign_rhs_code (def2
);
1698 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1701 /* If we got here, we have a match. See if we can combine the
1703 if (opcode
== BIT_IOR_EXPR
)
1704 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1705 rcode
, gimple_assign_rhs1 (def2
),
1706 gimple_assign_rhs2 (def2
));
1708 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1709 rcode
, gimple_assign_rhs1 (def2
),
1710 gimple_assign_rhs2 (def2
));
1714 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1715 always give us a boolean_type_node value back. If the original
1716 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1717 we need to convert. */
1718 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1719 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1721 if (TREE_CODE (t
) != INTEGER_CST
1722 && !operand_equal_p (t
, curr
->op
, 0))
1724 enum tree_code subcode
;
1725 tree newop1
, newop2
;
1726 if (!COMPARISON_CLASS_P (t
))
1728 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1729 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1730 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1731 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1735 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1737 fprintf (dump_file
, "Equivalence: ");
1738 print_generic_expr (dump_file
, curr
->op
, 0);
1739 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1740 print_generic_expr (dump_file
, oe
->op
, 0);
1741 fprintf (dump_file
, " -> ");
1742 print_generic_expr (dump_file
, t
, 0);
1743 fprintf (dump_file
, "\n");
1746 /* Now we can delete oe, as it has been subsumed by the new combined
1748 ops
->ordered_remove (i
);
1749 reassociate_stats
.ops_eliminated
++;
1751 /* If t is the same as curr->op, we're done. Otherwise we must
1752 replace curr->op with t. Special case is if we got a constant
1753 back, in which case we add it to the end instead of in place of
1754 the current entry. */
1755 if (TREE_CODE (t
) == INTEGER_CST
)
1757 ops
->ordered_remove (currindex
);
1758 add_to_ops_vec (ops
, t
);
1760 else if (!operand_equal_p (t
, curr
->op
, 0))
1763 enum tree_code subcode
;
1766 gcc_assert (COMPARISON_CLASS_P (t
));
1767 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1768 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1769 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1770 gcc_checking_assert (is_gimple_val (newop1
)
1771 && is_gimple_val (newop2
));
1772 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1773 curr
->op
= gimple_get_lhs (sum
);
1781 /* Perform various identities and other optimizations on the list of
1782 operand entries, stored in OPS. The tree code for the binary
1783 operation between all the operands is OPCODE. */
1786 optimize_ops_list (enum tree_code opcode
,
1787 vec
<operand_entry_t
> *ops
)
1789 unsigned int length
= ops
->length ();
1792 operand_entry_t oelast
= NULL
;
1793 bool iterate
= false;
1798 oelast
= ops
->last ();
1800 /* If the last two are constants, pop the constants off, merge them
1801 and try the next two. */
1802 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1804 operand_entry_t oelm1
= (*ops
)[length
- 2];
1806 if (oelm1
->rank
== 0
1807 && is_gimple_min_invariant (oelm1
->op
)
1808 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1809 TREE_TYPE (oelast
->op
)))
1811 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1812 oelm1
->op
, oelast
->op
);
1814 if (folded
&& is_gimple_min_invariant (folded
))
1816 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1817 fprintf (dump_file
, "Merging constants\n");
1822 add_to_ops_vec (ops
, folded
);
1823 reassociate_stats
.constants_eliminated
++;
1825 optimize_ops_list (opcode
, ops
);
1831 eliminate_using_constants (opcode
, ops
);
1834 for (i
= 0; ops
->iterate (i
, &oe
);)
1838 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1840 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1841 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1842 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1854 length
= ops
->length ();
1855 oelast
= ops
->last ();
1858 optimize_ops_list (opcode
, ops
);
1861 /* The following functions are subroutines to optimize_range_tests and allow
1862 it to try to change a logical combination of comparisons into a range
1866 X == 2 || X == 5 || X == 3 || X == 4
1870 (unsigned) (X - 2) <= 3
1872 For more information see comments above fold_test_range in fold-const.c,
1873 this implementation is for GIMPLE. */
1881 bool strict_overflow_p
;
1882 unsigned int idx
, next
;
1885 /* This is similar to make_range in fold-const.c, but on top of
1886 GIMPLE instead of trees. If EXP is non-NULL, it should be
1887 an SSA_NAME and STMT argument is ignored, otherwise STMT
1888 argument should be a GIMPLE_COND. */
1891 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1895 bool is_bool
, strict_overflow_p
;
1899 r
->strict_overflow_p
= false;
1901 r
->high
= NULL_TREE
;
1902 if (exp
!= NULL_TREE
1903 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1906 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1907 and see if we can refine the range. Some of the cases below may not
1908 happen, but it doesn't seem worth worrying about this. We "continue"
1909 the outer loop when we've changed something; otherwise we "break"
1910 the switch, which will "break" the while. */
1911 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1914 strict_overflow_p
= false;
1916 if (exp
== NULL_TREE
)
1918 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1920 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1925 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1930 enum tree_code code
;
1931 tree arg0
, arg1
, exp_type
;
1935 if (exp
!= NULL_TREE
)
1937 if (TREE_CODE (exp
) != SSA_NAME
1938 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
1941 stmt
= SSA_NAME_DEF_STMT (exp
);
1942 if (!is_gimple_assign (stmt
))
1945 code
= gimple_assign_rhs_code (stmt
);
1946 arg0
= gimple_assign_rhs1 (stmt
);
1947 arg1
= gimple_assign_rhs2 (stmt
);
1948 exp_type
= TREE_TYPE (exp
);
1952 code
= gimple_cond_code (stmt
);
1953 arg0
= gimple_cond_lhs (stmt
);
1954 arg1
= gimple_cond_rhs (stmt
);
1955 exp_type
= boolean_type_node
;
1958 if (TREE_CODE (arg0
) != SSA_NAME
)
1960 loc
= gimple_location (stmt
);
1964 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1965 /* Ensure the range is either +[-,0], +[0,0],
1966 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1967 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1968 or similar expression of unconditional true or
1969 false, it should not be negated. */
1970 && ((high
&& integer_zerop (high
))
1971 || (low
&& integer_onep (low
))))
1984 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1986 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1991 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2006 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2008 &strict_overflow_p
);
2009 if (nexp
!= NULL_TREE
)
2012 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2025 r
->strict_overflow_p
= strict_overflow_p
;
2029 /* Comparison function for qsort. Sort entries
2030 without SSA_NAME exp first, then with SSA_NAMEs sorted
2031 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2032 by increasing ->low and if ->low is the same, by increasing
2033 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2037 range_entry_cmp (const void *a
, const void *b
)
2039 const struct range_entry
*p
= (const struct range_entry
*) a
;
2040 const struct range_entry
*q
= (const struct range_entry
*) b
;
2042 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2044 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2046 /* Group range_entries for the same SSA_NAME together. */
2047 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2049 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2051 /* If ->low is different, NULL low goes first, then by
2053 if (p
->low
!= NULL_TREE
)
2055 if (q
->low
!= NULL_TREE
)
2057 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2059 if (tem
&& integer_onep (tem
))
2061 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2063 if (tem
&& integer_onep (tem
))
2069 else if (q
->low
!= NULL_TREE
)
2071 /* If ->high is different, NULL high goes last, before that by
2073 if (p
->high
!= NULL_TREE
)
2075 if (q
->high
!= NULL_TREE
)
2077 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2079 if (tem
&& integer_onep (tem
))
2081 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2083 if (tem
&& integer_onep (tem
))
2089 else if (q
->high
!= NULL_TREE
)
2091 /* If both ranges are the same, sort below by ascending idx. */
2096 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2099 if (p
->idx
< q
->idx
)
2103 gcc_checking_assert (p
->idx
> q
->idx
);
2108 /* Helper routine of optimize_range_test.
2109 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2110 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2111 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2112 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2113 an array of COUNT pointers to other ranges. Return
2114 true if the range merge has been successful.
2115 If OPCODE is ERROR_MARK, this is called from within
2116 maybe_optimize_range_tests and is performing inter-bb range optimization.
2117 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2121 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2122 struct range_entry
**otherrangep
,
2123 unsigned int count
, enum tree_code opcode
,
2124 vec
<operand_entry_t
> *ops
, tree exp
, gimple_seq seq
,
2125 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2127 operand_entry_t oe
= (*ops
)[range
->idx
];
2129 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) :
2130 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2131 location_t loc
= gimple_location (stmt
);
2132 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2133 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2135 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2136 gimple_stmt_iterator gsi
;
2139 if (tem
== NULL_TREE
)
2142 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2143 warning_at (loc
, OPT_Wstrict_overflow
,
2144 "assuming signed overflow does not occur "
2145 "when simplifying range test");
2147 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2149 struct range_entry
*r
;
2150 fprintf (dump_file
, "Optimizing range tests ");
2151 print_generic_expr (dump_file
, range
->exp
, 0);
2152 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2153 print_generic_expr (dump_file
, range
->low
, 0);
2154 fprintf (dump_file
, ", ");
2155 print_generic_expr (dump_file
, range
->high
, 0);
2156 fprintf (dump_file
, "]");
2157 for (i
= 0; i
< count
; i
++)
2163 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2164 print_generic_expr (dump_file
, r
->low
, 0);
2165 fprintf (dump_file
, ", ");
2166 print_generic_expr (dump_file
, r
->high
, 0);
2167 fprintf (dump_file
, "]");
2169 fprintf (dump_file
, "\n into ");
2170 print_generic_expr (dump_file
, tem
, 0);
2171 fprintf (dump_file
, "\n");
2174 if (opcode
== BIT_IOR_EXPR
2175 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2176 tem
= invert_truthvalue_loc (loc
, tem
);
2178 tem
= fold_convert_loc (loc
, optype
, tem
);
2179 gsi
= gsi_for_stmt (stmt
);
2180 unsigned int uid
= gimple_uid (stmt
);
2181 /* In rare cases range->exp can be equal to lhs of stmt.
2182 In that case we have to insert after the stmt rather then before
2183 it. If stmt is a PHI, insert it at the start of the basic block. */
2184 if (op
!= range
->exp
)
2186 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2187 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2191 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2193 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2194 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2195 GSI_CONTINUE_LINKING
);
2199 gsi
= gsi_after_labels (gimple_bb (stmt
));
2200 if (!gsi_end_p (gsi
))
2201 uid
= gimple_uid (gsi_stmt (gsi
));
2204 gsi
= gsi_start_bb (gimple_bb (stmt
));
2206 while (!gsi_end_p (gsi
))
2208 uid
= gimple_uid (gsi_stmt (gsi
));
2212 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2213 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2215 if (gsi_end_p (gsi
))
2216 gsi
= gsi_last_bb (gimple_bb (stmt
));
2220 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2221 if (gimple_uid (gsi_stmt (gsi
)))
2224 gimple_set_uid (gsi_stmt (gsi
), uid
);
2231 range
->strict_overflow_p
= false;
2233 for (i
= 0; i
< count
; i
++)
2236 range
= otherrange
+ i
;
2238 range
= otherrangep
[i
];
2239 oe
= (*ops
)[range
->idx
];
2240 /* Now change all the other range test immediate uses, so that
2241 those tests will be optimized away. */
2242 if (opcode
== ERROR_MARK
)
2245 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2246 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2248 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2249 ? boolean_false_node
: boolean_true_node
);
2252 oe
->op
= error_mark_node
;
2253 range
->exp
= NULL_TREE
;
2258 /* Optimize X == CST1 || X == CST2
2259 if popcount (CST1 ^ CST2) == 1 into
2260 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2261 Similarly for ranges. E.g.
2262 X != 2 && X != 3 && X != 10 && X != 11
2263 will be transformed by the previous optimization into
2264 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2265 and this loop can transform that into
2266 !(((X & ~8) - 2U) <= 1U). */
2269 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2270 tree lowi
, tree lowj
, tree highi
, tree highj
,
2271 vec
<operand_entry_t
> *ops
,
2272 struct range_entry
*rangei
,
2273 struct range_entry
*rangej
)
2275 tree lowxor
, highxor
, tem
, exp
;
2276 /* Check lowi ^ lowj == highi ^ highj and
2277 popcount (lowi ^ lowj) == 1. */
2278 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2279 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2281 if (!integer_pow2p (lowxor
))
2283 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2284 if (!tree_int_cst_equal (lowxor
, highxor
))
2287 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2288 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2289 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2290 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2291 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2292 NULL
, rangei
->in_p
, lowj
, highj
,
2293 rangei
->strict_overflow_p
2294 || rangej
->strict_overflow_p
))
2299 /* Optimize X == CST1 || X == CST2
2300 if popcount (CST2 - CST1) == 1 into
2301 ((X - CST1) & ~(CST2 - CST1)) == 0.
2302 Similarly for ranges. E.g.
2303 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2304 || X == 75 || X == 45
2305 will be transformed by the previous optimization into
2306 (X - 43U) <= 3U || (X - 75U) <= 3U
2307 and this loop can transform that into
2308 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2310 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2311 tree lowi
, tree lowj
, tree highi
, tree highj
,
2312 vec
<operand_entry_t
> *ops
,
2313 struct range_entry
*rangei
,
2314 struct range_entry
*rangej
)
2316 tree tem1
, tem2
, mask
;
2317 /* Check highi - lowi == highj - lowj. */
2318 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2319 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2321 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2322 if (!tree_int_cst_equal (tem1
, tem2
))
2324 /* Check popcount (lowj - lowi) == 1. */
2325 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2326 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2328 if (!integer_pow2p (tem1
))
2331 type
= unsigned_type_for (type
);
2332 tem1
= fold_convert (type
, tem1
);
2333 tem2
= fold_convert (type
, tem2
);
2334 lowi
= fold_convert (type
, lowi
);
2335 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2336 tem1
= fold_binary (MINUS_EXPR
, type
,
2337 fold_convert (type
, rangei
->exp
), lowi
);
2338 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2339 lowj
= build_int_cst (type
, 0);
2340 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2341 NULL
, rangei
->in_p
, lowj
, tem2
,
2342 rangei
->strict_overflow_p
2343 || rangej
->strict_overflow_p
))
2348 /* It does some common checks for function optimize_range_tests_xor and
2349 optimize_range_tests_diff.
2350 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2351 Else it calls optimize_range_tests_diff. */
2354 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2355 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2356 struct range_entry
*ranges
)
2359 bool any_changes
= false;
2360 for (i
= first
; i
< length
; i
++)
2362 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2364 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2366 type
= TREE_TYPE (ranges
[i
].exp
);
2367 if (!INTEGRAL_TYPE_P (type
))
2369 lowi
= ranges
[i
].low
;
2370 if (lowi
== NULL_TREE
)
2371 lowi
= TYPE_MIN_VALUE (type
);
2372 highi
= ranges
[i
].high
;
2373 if (highi
== NULL_TREE
)
2375 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2378 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2380 lowj
= ranges
[j
].low
;
2381 if (lowj
== NULL_TREE
)
2383 highj
= ranges
[j
].high
;
2384 if (highj
== NULL_TREE
)
2385 highj
= TYPE_MAX_VALUE (type
);
2386 /* Check lowj > highi. */
2387 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2389 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2392 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2394 ranges
+ i
, ranges
+ j
);
2396 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2398 ranges
+ i
, ranges
+ j
);
2409 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2410 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2411 EXP on success, NULL otherwise. */
2414 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2415 wide_int
*mask
, tree
*totallowp
)
2417 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2418 if (tem
== NULL_TREE
2419 || TREE_CODE (tem
) != INTEGER_CST
2420 || TREE_OVERFLOW (tem
)
2421 || tree_int_cst_sgn (tem
) == -1
2422 || compare_tree_int (tem
, prec
) != -1)
2425 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2426 *mask
= wi::shifted_mask (0, max
, false, prec
);
2427 if (TREE_CODE (exp
) == BIT_AND_EXPR
2428 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2430 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2431 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2432 if (wi::popcount (msk
) == 1
2433 && wi::ltu_p (msk
, prec
- max
))
2435 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2436 max
+= msk
.to_uhwi ();
2437 exp
= TREE_OPERAND (exp
, 0);
2438 if (integer_zerop (low
)
2439 && TREE_CODE (exp
) == PLUS_EXPR
2440 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2443 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2444 TYPE_PRECISION (TREE_TYPE (low
))));
2445 tree tbias
= wide_int_to_tree (TREE_TYPE (low
), bias
);
2449 exp
= TREE_OPERAND (exp
, 0);
2453 else if (!tree_int_cst_lt (totallow
, tbias
))
2455 bias
-= wi::to_widest (totallow
);
2456 if (wi::ges_p (bias
, 0) && wi::lts_p (bias
, prec
- max
))
2458 *mask
= wi::lshift (*mask
, bias
);
2459 exp
= TREE_OPERAND (exp
, 0);
2468 if (!tree_int_cst_lt (totallow
, low
))
2470 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2471 if (tem
== NULL_TREE
2472 || TREE_CODE (tem
) != INTEGER_CST
2473 || TREE_OVERFLOW (tem
)
2474 || compare_tree_int (tem
, prec
- max
) == 1)
2477 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2481 /* Attempt to optimize small range tests using bit test.
2483 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2484 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2485 has been by earlier optimizations optimized into:
2486 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2487 As all the 43 through 82 range is less than 64 numbers,
2488 for 64-bit word targets optimize that into:
2489 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2492 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2493 vec
<operand_entry_t
> *ops
,
2494 struct range_entry
*ranges
)
2497 bool any_changes
= false;
2498 int prec
= GET_MODE_BITSIZE (word_mode
);
2499 auto_vec
<struct range_entry
*, 64> candidates
;
2501 for (i
= first
; i
< length
- 2; i
++)
2503 tree lowi
, highi
, lowj
, highj
, type
;
2505 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2507 type
= TREE_TYPE (ranges
[i
].exp
);
2508 if (!INTEGRAL_TYPE_P (type
))
2510 lowi
= ranges
[i
].low
;
2511 if (lowi
== NULL_TREE
)
2512 lowi
= TYPE_MIN_VALUE (type
);
2513 highi
= ranges
[i
].high
;
2514 if (highi
== NULL_TREE
)
2517 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2518 highi
, &mask
, &lowi
);
2519 if (exp
== NULL_TREE
)
2521 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2522 candidates
.truncate (0);
2523 int end
= MIN (i
+ 64, length
);
2524 for (j
= i
+ 1; j
< end
; j
++)
2527 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2529 if (ranges
[j
].exp
== exp
)
2531 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2533 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2536 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2538 exp2
= TREE_OPERAND (exp2
, 0);
2548 lowj
= ranges
[j
].low
;
2549 if (lowj
== NULL_TREE
)
2551 highj
= ranges
[j
].high
;
2552 if (highj
== NULL_TREE
)
2553 highj
= TYPE_MAX_VALUE (type
);
2555 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2556 highj
, &mask2
, NULL
);
2560 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2561 candidates
.safe_push (&ranges
[j
]);
2564 /* If we need otherwise 3 or more comparisons, use a bit test. */
2565 if (candidates
.length () >= 2)
2567 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2568 wi::to_widest (lowi
)
2569 + prec
- 1 - wi::clz (mask
));
2570 operand_entry_t oe
= (*ops
)[ranges
[i
].idx
];
2572 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
)
2573 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2574 location_t loc
= gimple_location (stmt
);
2575 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2577 /* See if it isn't cheaper to pretend the minimum value of the
2578 range is 0, if maximum value is small enough.
2579 We can avoid then subtraction of the minimum value, but the
2580 mask constant could be perhaps more expensive. */
2581 if (compare_tree_int (lowi
, 0) > 0
2582 && compare_tree_int (high
, prec
) < 0)
2585 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2586 rtx reg
= gen_raw_REG (word_mode
, 10000);
2587 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2588 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2589 GEN_INT (-m
)), speed_p
);
2590 rtx r
= immed_wide_int_const (mask
, word_mode
);
2591 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2593 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2594 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2598 mask
= wi::lshift (mask
, m
);
2599 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2603 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2605 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2607 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2608 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2609 fold_convert_loc (loc
, etype
, exp
),
2610 fold_convert_loc (loc
, etype
, lowi
));
2611 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2612 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2613 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2614 build_int_cst (word_type
, 1), exp
);
2615 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2616 wide_int_to_tree (word_type
, mask
));
2617 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2618 build_zero_cst (word_type
));
2619 if (is_gimple_val (exp
))
2622 /* The shift might have undefined behavior if TEM is true,
2623 but reassociate_bb isn't prepared to have basic blocks
2624 split when it is running. So, temporarily emit a code
2625 with BIT_IOR_EXPR instead of &&, and fix it up in
2628 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2629 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2630 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2632 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2633 gimple_seq_add_seq_without_update (&seq
, seq2
);
2634 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2635 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2636 gimple g
= gimple_build_assign (make_ssa_name (optype
),
2637 BIT_IOR_EXPR
, tem
, exp
);
2638 gimple_set_location (g
, loc
);
2639 gimple_seq_add_stmt_without_update (&seq
, g
);
2640 exp
= gimple_assign_lhs (g
);
2641 tree val
= build_zero_cst (optype
);
2642 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2643 candidates
.length (), opcode
, ops
, exp
,
2644 seq
, false, val
, val
, strict_overflow_p
))
2647 reassoc_branch_fixups
.safe_push (tem
);
2650 gimple_seq_discard (seq
);
2656 /* Optimize range tests, similarly how fold_range_test optimizes
2657 it on trees. The tree code for the binary
2658 operation between all the operands is OPCODE.
2659 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2660 maybe_optimize_range_tests for inter-bb range optimization.
2661 In that case if oe->op is NULL, oe->id is bb->index whose
2662 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2663 the actual opcode. */
2666 optimize_range_tests (enum tree_code opcode
,
2667 vec
<operand_entry_t
> *ops
)
2669 unsigned int length
= ops
->length (), i
, j
, first
;
2671 struct range_entry
*ranges
;
2672 bool any_changes
= false;
2677 ranges
= XNEWVEC (struct range_entry
, length
);
2678 for (i
= 0; i
< length
; i
++)
2682 init_range_entry (ranges
+ i
, oe
->op
,
2684 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2685 /* For | invert it now, we will invert it again before emitting
2686 the optimized expression. */
2687 if (opcode
== BIT_IOR_EXPR
2688 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2689 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2692 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2693 for (i
= 0; i
< length
; i
++)
2694 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2697 /* Try to merge ranges. */
2698 for (first
= i
; i
< length
; i
++)
2700 tree low
= ranges
[i
].low
;
2701 tree high
= ranges
[i
].high
;
2702 int in_p
= ranges
[i
].in_p
;
2703 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2704 int update_fail_count
= 0;
2706 for (j
= i
+ 1; j
< length
; j
++)
2708 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2710 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2711 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2713 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2719 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
2720 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
2721 low
, high
, strict_overflow_p
))
2726 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2727 while update_range_test would fail. */
2728 else if (update_fail_count
== 64)
2731 ++update_fail_count
;
2734 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2737 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2738 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2740 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
2741 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
2744 if (any_changes
&& opcode
!= ERROR_MARK
)
2747 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2749 if (oe
->op
== error_mark_node
)
2758 XDELETEVEC (ranges
);
2762 /* Return true if STMT is a cast like:
2768 # _345 = PHI <_123(N), 1(...), 1(...)>
2769 where _234 has bool type, _123 has single use and
2770 bb N has a single successor M. This is commonly used in
2771 the last block of a range test. */
2774 final_range_test_p (gimple stmt
)
2776 basic_block bb
, rhs_bb
;
2779 use_operand_p use_p
;
2782 if (!gimple_assign_cast_p (stmt
))
2784 bb
= gimple_bb (stmt
);
2785 if (!single_succ_p (bb
))
2787 e
= single_succ_edge (bb
);
2788 if (e
->flags
& EDGE_COMPLEX
)
2791 lhs
= gimple_assign_lhs (stmt
);
2792 rhs
= gimple_assign_rhs1 (stmt
);
2793 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2794 || TREE_CODE (rhs
) != SSA_NAME
2795 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2798 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2799 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2802 if (gimple_code (use_stmt
) != GIMPLE_PHI
2803 || gimple_bb (use_stmt
) != e
->dest
)
2806 /* And that the rhs is defined in the same loop. */
2807 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2809 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2815 /* Return true if BB is suitable basic block for inter-bb range test
2816 optimization. If BACKWARD is true, BB should be the only predecessor
2817 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2818 or compared with to find a common basic block to which all conditions
2819 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2820 be the only predecessor of BB. */
2823 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2826 edge_iterator ei
, ei2
;
2830 bool other_edge_seen
= false;
2835 /* Check last stmt first. */
2836 stmt
= last_stmt (bb
);
2838 || (gimple_code (stmt
) != GIMPLE_COND
2839 && (backward
|| !final_range_test_p (stmt
)))
2840 || gimple_visited_p (stmt
)
2841 || stmt_could_throw_p (stmt
)
2844 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2847 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2848 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2849 to *OTHER_BB (if not set yet, try to find it out). */
2850 if (EDGE_COUNT (bb
->succs
) != 2)
2852 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2854 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2856 if (e
->dest
== test_bb
)
2865 if (*other_bb
== NULL
)
2867 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2868 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2870 else if (e
->dest
== e2
->dest
)
2871 *other_bb
= e
->dest
;
2872 if (*other_bb
== NULL
)
2875 if (e
->dest
== *other_bb
)
2876 other_edge_seen
= true;
2880 if (*other_bb
== NULL
|| !other_edge_seen
)
2883 else if (single_succ (bb
) != *other_bb
)
2886 /* Now check all PHIs of *OTHER_BB. */
2887 e
= find_edge (bb
, *other_bb
);
2888 e2
= find_edge (test_bb
, *other_bb
);
2889 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2891 gphi
*phi
= gsi
.phi ();
2892 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2893 corresponding to BB and TEST_BB predecessor must be the same. */
2894 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2895 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2897 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2898 one of the PHIs should have the lhs of the last stmt in
2899 that block as PHI arg and that PHI should have 0 or 1
2900 corresponding to it in all other range test basic blocks
2904 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2905 == gimple_assign_lhs (stmt
)
2906 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2907 || integer_onep (gimple_phi_arg_def (phi
,
2913 gimple test_last
= last_stmt (test_bb
);
2914 if (gimple_code (test_last
) != GIMPLE_COND
2915 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2916 == gimple_assign_lhs (test_last
)
2917 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2918 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2928 /* Return true if BB doesn't have side-effects that would disallow
2929 range test optimization, all SSA_NAMEs set in the bb are consumed
2930 in the bb and there are no PHIs. */
2933 no_side_effect_bb (basic_block bb
)
2935 gimple_stmt_iterator gsi
;
2938 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2940 last
= last_stmt (bb
);
2941 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2943 gimple stmt
= gsi_stmt (gsi
);
2945 imm_use_iterator imm_iter
;
2946 use_operand_p use_p
;
2948 if (is_gimple_debug (stmt
))
2950 if (gimple_has_side_effects (stmt
))
2954 if (!is_gimple_assign (stmt
))
2956 lhs
= gimple_assign_lhs (stmt
);
2957 if (TREE_CODE (lhs
) != SSA_NAME
)
2959 if (gimple_assign_rhs_could_trap_p (stmt
))
2961 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2963 gimple use_stmt
= USE_STMT (use_p
);
2964 if (is_gimple_debug (use_stmt
))
2966 if (gimple_bb (use_stmt
) != bb
)
2973 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2974 return true and fill in *OPS recursively. */
2977 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2980 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2984 if (!is_reassociable_op (stmt
, code
, loop
))
2987 rhs
[0] = gimple_assign_rhs1 (stmt
);
2988 rhs
[1] = gimple_assign_rhs2 (stmt
);
2989 gimple_set_visited (stmt
, true);
2990 for (i
= 0; i
< 2; i
++)
2991 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2992 && !get_ops (rhs
[i
], code
, ops
, loop
)
2993 && has_single_use (rhs
[i
]))
2995 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
3001 ops
->safe_push (oe
);
3006 /* Find the ops that were added by get_ops starting from VAR, see if
3007 they were changed during update_range_test and if yes, create new
3011 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
3012 unsigned int *pidx
, struct loop
*loop
)
3014 gimple stmt
= SSA_NAME_DEF_STMT (var
);
3018 if (!is_reassociable_op (stmt
, code
, loop
))
3021 rhs
[0] = gimple_assign_rhs1 (stmt
);
3022 rhs
[1] = gimple_assign_rhs2 (stmt
);
3025 for (i
= 0; i
< 2; i
++)
3026 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
3028 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
3029 if (rhs
[2 + i
] == NULL_TREE
)
3031 if (has_single_use (rhs
[i
]))
3032 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
3034 rhs
[2 + i
] = rhs
[i
];
3037 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
3038 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
3040 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3041 var
= make_ssa_name (TREE_TYPE (var
));
3042 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
3044 gimple_set_uid (g
, gimple_uid (stmt
));
3045 gimple_set_visited (g
, true);
3046 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3051 /* Structure to track the initial value passed to get_ops and
3052 the range in the ops vector for each basic block. */
3054 struct inter_bb_range_test_entry
3057 unsigned int first_idx
, last_idx
;
3060 /* Inter-bb range test optimization. */
3063 maybe_optimize_range_tests (gimple stmt
)
3065 basic_block first_bb
= gimple_bb (stmt
);
3066 basic_block last_bb
= first_bb
;
3067 basic_block other_bb
= NULL
;
3071 auto_vec
<operand_entry_t
> ops
;
3072 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3073 bool any_changes
= false;
3075 /* Consider only basic blocks that end with GIMPLE_COND or
3076 a cast statement satisfying final_range_test_p. All
3077 but the last bb in the first_bb .. last_bb range
3078 should end with GIMPLE_COND. */
3079 if (gimple_code (stmt
) == GIMPLE_COND
)
3081 if (EDGE_COUNT (first_bb
->succs
) != 2)
3084 else if (final_range_test_p (stmt
))
3085 other_bb
= single_succ (first_bb
);
3089 if (stmt_could_throw_p (stmt
))
3092 /* As relative ordering of post-dominator sons isn't fixed,
3093 maybe_optimize_range_tests can be called first on any
3094 bb in the range we want to optimize. So, start searching
3095 backwards, if first_bb can be set to a predecessor. */
3096 while (single_pred_p (first_bb
))
3098 basic_block pred_bb
= single_pred (first_bb
);
3099 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3101 if (!no_side_effect_bb (first_bb
))
3105 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3106 Before starting forward search in last_bb successors, find
3107 out the other_bb. */
3108 if (first_bb
== last_bb
)
3111 /* As non-GIMPLE_COND last stmt always terminates the range,
3112 if forward search didn't discover anything, just give up. */
3113 if (gimple_code (stmt
) != GIMPLE_COND
)
3115 /* Look at both successors. Either it ends with a GIMPLE_COND
3116 and satisfies suitable_cond_bb, or ends with a cast and
3117 other_bb is that cast's successor. */
3118 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
3119 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
3120 || e
->dest
== first_bb
)
3122 else if (single_pred_p (e
->dest
))
3124 stmt
= last_stmt (e
->dest
);
3126 && gimple_code (stmt
) == GIMPLE_COND
3127 && EDGE_COUNT (e
->dest
->succs
) == 2)
3129 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
3135 && final_range_test_p (stmt
)
3136 && find_edge (first_bb
, single_succ (e
->dest
)))
3138 other_bb
= single_succ (e
->dest
);
3139 if (other_bb
== first_bb
)
3143 if (other_bb
== NULL
)
3146 /* Now do the forward search, moving last_bb to successor bbs
3147 that aren't other_bb. */
3148 while (EDGE_COUNT (last_bb
->succs
) == 2)
3150 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
3151 if (e
->dest
!= other_bb
)
3155 if (!single_pred_p (e
->dest
))
3157 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
3159 if (!no_side_effect_bb (e
->dest
))
3163 if (first_bb
== last_bb
)
3165 /* Here basic blocks first_bb through last_bb's predecessor
3166 end with GIMPLE_COND, all of them have one of the edges to
3167 other_bb and another to another block in the range,
3168 all blocks except first_bb don't have side-effects and
3169 last_bb ends with either GIMPLE_COND, or cast satisfying
3170 final_range_test_p. */
3171 for (bb
= last_bb
; ; bb
= single_pred (bb
))
3173 enum tree_code code
;
3175 inter_bb_range_test_entry bb_ent
;
3177 bb_ent
.op
= NULL_TREE
;
3178 bb_ent
.first_idx
= ops
.length ();
3179 bb_ent
.last_idx
= bb_ent
.first_idx
;
3180 e
= find_edge (bb
, other_bb
);
3181 stmt
= last_stmt (bb
);
3182 gimple_set_visited (stmt
, true);
3183 if (gimple_code (stmt
) != GIMPLE_COND
)
3185 use_operand_p use_p
;
3190 lhs
= gimple_assign_lhs (stmt
);
3191 rhs
= gimple_assign_rhs1 (stmt
);
3192 gcc_assert (bb
== last_bb
);
3199 # _345 = PHI <_123(N), 1(...), 1(...)>
3201 or 0 instead of 1. If it is 0, the _234
3202 range test is anded together with all the
3203 other range tests, if it is 1, it is ored with
3205 single_imm_use (lhs
, &use_p
, &phi
);
3206 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
3207 e2
= find_edge (first_bb
, other_bb
);
3209 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
3210 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
3211 code
= BIT_AND_EXPR
;
3214 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
3215 code
= BIT_IOR_EXPR
;
3218 /* If _234 SSA_NAME_DEF_STMT is
3220 (or &, corresponding to 1/0 in the phi arguments,
3221 push into ops the individual range test arguments
3222 of the bitwise or resp. and, recursively. */
3223 if (!get_ops (rhs
, code
, &ops
,
3224 loop_containing_stmt (stmt
))
3225 && has_single_use (rhs
))
3227 /* Otherwise, push the _234 range test itself. */
3229 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
3239 bb_ent
.last_idx
= ops
.length ();
3241 bbinfo
.safe_push (bb_ent
);
3244 /* Otherwise stmt is GIMPLE_COND. */
3245 code
= gimple_cond_code (stmt
);
3246 lhs
= gimple_cond_lhs (stmt
);
3247 rhs
= gimple_cond_rhs (stmt
);
3248 if (TREE_CODE (lhs
) == SSA_NAME
3249 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3250 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
3251 || rhs
!= boolean_false_node
3252 /* Either push into ops the individual bitwise
3253 or resp. and operands, depending on which
3254 edge is other_bb. */
3255 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
3256 ^ (code
== EQ_EXPR
))
3257 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
3258 loop_containing_stmt (stmt
))))
3260 /* Or push the GIMPLE_COND stmt itself. */
3262 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
3265 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
3266 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3267 /* oe->op = NULL signs that there is no SSA_NAME
3268 for the range test, and oe->id instead is the
3269 basic block number, at which's end the GIMPLE_COND
3277 else if (ops
.length () > bb_ent
.first_idx
)
3280 bb_ent
.last_idx
= ops
.length ();
3282 bbinfo
.safe_push (bb_ent
);
3286 if (ops
.length () > 1)
3287 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
3291 /* update_ops relies on has_single_use predicates returning the
3292 same values as it did during get_ops earlier. Additionally it
3293 never removes statements, only adds new ones and it should walk
3294 from the single imm use and check the predicate already before
3295 making those changes.
3296 On the other side, the handling of GIMPLE_COND directly can turn
3297 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3298 it needs to be done in a separate loop afterwards. */
3299 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3301 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3302 && bbinfo
[idx
].op
!= NULL_TREE
)
3306 stmt
= last_stmt (bb
);
3307 new_op
= update_ops (bbinfo
[idx
].op
,
3309 ops
[bbinfo
[idx
].first_idx
]->rank
,
3310 ops
, &bbinfo
[idx
].first_idx
,
3311 loop_containing_stmt (stmt
));
3312 if (new_op
== NULL_TREE
)
3314 gcc_assert (bb
== last_bb
);
3315 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
3317 if (bbinfo
[idx
].op
!= new_op
)
3319 imm_use_iterator iter
;
3320 use_operand_p use_p
;
3321 gimple use_stmt
, cast_stmt
= NULL
;
3323 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
3324 if (is_gimple_debug (use_stmt
))
3326 else if (gimple_code (use_stmt
) == GIMPLE_COND
3327 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3328 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3329 SET_USE (use_p
, new_op
);
3330 else if (gimple_assign_cast_p (use_stmt
))
3331 cast_stmt
= use_stmt
;
3336 gcc_assert (bb
== last_bb
);
3337 tree lhs
= gimple_assign_lhs (cast_stmt
);
3338 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
3339 enum tree_code rhs_code
3340 = gimple_assign_rhs_code (cast_stmt
);
3342 if (is_gimple_min_invariant (new_op
))
3344 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
3345 g
= gimple_build_assign (new_lhs
, new_op
);
3348 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
3349 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
3350 gimple_set_uid (g
, gimple_uid (cast_stmt
));
3351 gimple_set_visited (g
, true);
3352 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3353 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3354 if (is_gimple_debug (use_stmt
))
3356 else if (gimple_code (use_stmt
) == GIMPLE_COND
3357 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3358 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3359 SET_USE (use_p
, new_lhs
);
3368 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3370 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3371 && bbinfo
[idx
].op
== NULL_TREE
3372 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
3374 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
3375 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
3376 gimple_cond_make_false (cond_stmt
);
3377 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
3378 gimple_cond_make_true (cond_stmt
);
3381 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
3382 gimple_cond_set_lhs (cond_stmt
,
3383 ops
[bbinfo
[idx
].first_idx
]->op
);
3384 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
3386 update_stmt (cond_stmt
);
3394 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3395 of STMT in it's operands. This is also known as a "destructive
3396 update" operation. */
3399 is_phi_for_stmt (gimple stmt
, tree operand
)
3404 use_operand_p arg_p
;
3407 if (TREE_CODE (operand
) != SSA_NAME
)
3410 lhs
= gimple_assign_lhs (stmt
);
3412 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3413 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
3417 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
3418 if (lhs
== USE_FROM_PTR (arg_p
))
3423 /* Remove def stmt of VAR if VAR has zero uses and recurse
3424 on rhs1 operand if so. */
3427 remove_visited_stmt_chain (tree var
)
3430 gimple_stmt_iterator gsi
;
3434 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3436 stmt
= SSA_NAME_DEF_STMT (var
);
3437 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3439 var
= gimple_assign_rhs1 (stmt
);
3440 gsi
= gsi_for_stmt (stmt
);
3441 reassoc_remove_stmt (&gsi
);
3442 release_defs (stmt
);
3449 /* This function checks three consequtive operands in
3450 passed operands vector OPS starting from OPINDEX and
3451 swaps two operands if it is profitable for binary operation
3452 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3454 We pair ops with the same rank if possible.
3456 The alternative we try is to see if STMT is a destructive
3457 update style statement, which is like:
3460 In that case, we want to use the destructive update form to
3461 expose the possible vectorizer sum reduction opportunity.
3462 In that case, the third operand will be the phi node. This
3463 check is not performed if STMT is null.
3465 We could, of course, try to be better as noted above, and do a
3466 lot of work to try to find these opportunities in >3 operand
3467 cases, but it is unlikely to be worth it. */
3470 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3471 unsigned int opindex
, gimple stmt
)
3473 operand_entry_t oe1
, oe2
, oe3
;
3476 oe2
= ops
[opindex
+ 1];
3477 oe3
= ops
[opindex
+ 2];
3479 if ((oe1
->rank
== oe2
->rank
3480 && oe2
->rank
!= oe3
->rank
)
3481 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3482 && !is_phi_for_stmt (stmt
, oe1
->op
)
3483 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3485 struct operand_entry temp
= *oe3
;
3487 oe3
->rank
= oe1
->rank
;
3489 oe1
->rank
= temp
.rank
;
3491 else if ((oe1
->rank
== oe3
->rank
3492 && oe2
->rank
!= oe3
->rank
)
3493 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3494 && !is_phi_for_stmt (stmt
, oe1
->op
)
3495 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3497 struct operand_entry temp
= *oe2
;
3499 oe2
->rank
= oe1
->rank
;
3501 oe1
->rank
= temp
.rank
;
3505 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3506 two definitions, otherwise return STMT. */
3508 static inline gimple
3509 find_insert_point (gimple stmt
, tree rhs1
, tree rhs2
)
3511 if (TREE_CODE (rhs1
) == SSA_NAME
3512 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3513 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3514 if (TREE_CODE (rhs2
) == SSA_NAME
3515 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3516 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3520 /* Recursively rewrite our linearized statements so that the operators
3521 match those in OPS[OPINDEX], putting the computation in rank
3522 order. Return new lhs. */
3525 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3526 vec
<operand_entry_t
> ops
, bool changed
)
3528 tree rhs1
= gimple_assign_rhs1 (stmt
);
3529 tree rhs2
= gimple_assign_rhs2 (stmt
);
3530 tree lhs
= gimple_assign_lhs (stmt
);
3533 /* The final recursion case for this function is that you have
3534 exactly two operations left.
3535 If we had one exactly one op in the entire list to start with, we
3536 would have never called this function, and the tail recursion
3537 rewrites them one at a time. */
3538 if (opindex
+ 2 == ops
.length ())
3540 operand_entry_t oe1
, oe2
;
3543 oe2
= ops
[opindex
+ 1];
3545 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3547 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3548 unsigned int uid
= gimple_uid (stmt
);
3550 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3552 fprintf (dump_file
, "Transforming ");
3553 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3558 gimple insert_point
= find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3559 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3561 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3563 gimple_set_uid (stmt
, uid
);
3564 gimple_set_visited (stmt
, true);
3565 if (insert_point
== gsi_stmt (gsi
))
3566 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3568 insert_stmt_after (stmt
, insert_point
);
3572 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3574 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3575 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3579 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3580 remove_visited_stmt_chain (rhs1
);
3582 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3584 fprintf (dump_file
, " into ");
3585 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3591 /* If we hit here, we should have 3 or more ops left. */
3592 gcc_assert (opindex
+ 2 < ops
.length ());
3594 /* Rewrite the next operator. */
3597 /* Recurse on the LHS of the binary operator, which is guaranteed to
3598 be the non-leaf side. */
3600 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3601 changed
|| oe
->op
!= rhs2
);
3603 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3605 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3607 fprintf (dump_file
, "Transforming ");
3608 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3611 /* If changed is false, this is either opindex == 0
3612 or all outer rhs2's were equal to corresponding oe->op,
3613 and powi_result is NULL.
3614 That means lhs is equivalent before and after reassociation.
3615 Otherwise ensure the old lhs SSA_NAME is not reused and
3616 create a new stmt as well, so that any debug stmts will be
3617 properly adjusted. */
3620 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3621 unsigned int uid
= gimple_uid (stmt
);
3622 gimple insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3624 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3625 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3627 gimple_set_uid (stmt
, uid
);
3628 gimple_set_visited (stmt
, true);
3629 if (insert_point
== gsi_stmt (gsi
))
3630 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3632 insert_stmt_after (stmt
, insert_point
);
3636 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3638 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3639 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3643 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3645 fprintf (dump_file
, " into ");
3646 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3652 /* Find out how many cycles we need to compute statements chain.
3653 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3654 maximum number of independent statements we may execute per cycle. */
3657 get_required_cycles (int ops_num
, int cpu_width
)
3663 /* While we have more than 2 * cpu_width operands
3664 we may reduce number of operands by cpu_width
3666 res
= ops_num
/ (2 * cpu_width
);
3668 /* Remained operands count may be reduced twice per cycle
3669 until we have only one operand. */
3670 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3671 elog
= exact_log2 (rest
);
3675 res
+= floor_log2 (rest
) + 1;
3680 /* Returns an optimal number of registers to use for computation of
3681 given statements. */
3684 get_reassociation_width (int ops_num
, enum tree_code opc
,
3687 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3692 if (param_width
> 0)
3693 width
= param_width
;
3695 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3700 /* Get the minimal time required for sequence computation. */
3701 cycles_best
= get_required_cycles (ops_num
, width
);
3703 /* Check if we may use less width and still compute sequence for
3704 the same time. It will allow us to reduce registers usage.
3705 get_required_cycles is monotonically increasing with lower width
3706 so we can perform a binary search for the minimal width that still
3707 results in the optimal cycle count. */
3709 while (width
> width_min
)
3711 int width_mid
= (width
+ width_min
) / 2;
3713 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3715 else if (width_min
< width_mid
)
3716 width_min
= width_mid
;
3724 /* Recursively rewrite our linearized statements so that the operators
3725 match those in OPS[OPINDEX], putting the computation in rank
3726 order and trying to allow operations to be executed in
3730 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
3731 vec
<operand_entry_t
> ops
)
3733 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3734 int op_num
= ops
.length ();
3735 int stmt_num
= op_num
- 1;
3736 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3737 int op_index
= op_num
- 1;
3739 int ready_stmts_end
= 0;
3741 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3743 /* We start expression rewriting from the top statements.
3744 So, in this loop we create a full list of statements
3745 we will work with. */
3746 stmts
[stmt_num
- 1] = stmt
;
3747 for (i
= stmt_num
- 2; i
>= 0; i
--)
3748 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3750 for (i
= 0; i
< stmt_num
; i
++)
3754 /* Determine whether we should use results of
3755 already handled statements or not. */
3756 if (ready_stmts_end
== 0
3757 && (i
- stmt_index
>= width
|| op_index
< 1))
3758 ready_stmts_end
= i
;
3760 /* Now we choose operands for the next statement. Non zero
3761 value in ready_stmts_end means here that we should use
3762 the result of already generated statements as new operand. */
3763 if (ready_stmts_end
> 0)
3765 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3766 if (ready_stmts_end
> stmt_index
)
3767 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3768 else if (op_index
>= 0)
3769 op2
= ops
[op_index
--]->op
;
3772 gcc_assert (stmt_index
< i
);
3773 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3776 if (stmt_index
>= ready_stmts_end
)
3777 ready_stmts_end
= 0;
3782 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3783 op2
= ops
[op_index
--]->op
;
3784 op1
= ops
[op_index
--]->op
;
3787 /* If we emit the last statement then we should put
3788 operands into the last statement. It will also
3790 if (op_index
< 0 && stmt_index
== i
)
3793 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3795 fprintf (dump_file
, "Transforming ");
3796 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3799 /* We keep original statement only for the last one. All
3800 others are recreated. */
3801 if (i
== stmt_num
- 1)
3803 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3804 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3805 update_stmt (stmts
[i
]);
3808 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3810 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3812 fprintf (dump_file
, " into ");
3813 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3817 remove_visited_stmt_chain (last_rhs1
);
3820 /* Transform STMT, which is really (A +B) + (C + D) into the left
3821 linear form, ((A+B)+C)+D.
3822 Recurse on D if necessary. */
3825 linearize_expr (gimple stmt
)
3827 gimple_stmt_iterator gsi
;
3828 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3829 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3830 gimple oldbinrhs
= binrhs
;
3831 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3832 gimple newbinrhs
= NULL
;
3833 struct loop
*loop
= loop_containing_stmt (stmt
);
3834 tree lhs
= gimple_assign_lhs (stmt
);
3836 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3837 && is_reassociable_op (binrhs
, rhscode
, loop
));
3839 gsi
= gsi_for_stmt (stmt
);
3841 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3842 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
3843 gimple_assign_rhs_code (binrhs
),
3844 gimple_assign_lhs (binlhs
),
3845 gimple_assign_rhs2 (binrhs
));
3846 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3847 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3848 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3850 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3851 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3853 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3855 fprintf (dump_file
, "Linearized: ");
3856 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3859 reassociate_stats
.linearized
++;
3862 gsi
= gsi_for_stmt (oldbinrhs
);
3863 reassoc_remove_stmt (&gsi
);
3864 release_defs (oldbinrhs
);
3866 gimple_set_visited (stmt
, true);
3867 gimple_set_visited (binlhs
, true);
3868 gimple_set_visited (binrhs
, true);
3870 /* Tail recurse on the new rhs if it still needs reassociation. */
3871 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3872 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3873 want to change the algorithm while converting to tuples. */
3874 linearize_expr (stmt
);
3877 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3878 it. Otherwise, return NULL. */
3881 get_single_immediate_use (tree lhs
)
3883 use_operand_p immuse
;
3886 if (TREE_CODE (lhs
) == SSA_NAME
3887 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3888 && is_gimple_assign (immusestmt
))
3894 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3895 representing the negated value. Insertions of any necessary
3896 instructions go before GSI.
3897 This function is recursive in that, if you hand it "a_5" as the
3898 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3899 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3902 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3904 gimple negatedefstmt
= NULL
;
3905 tree resultofnegate
;
3906 gimple_stmt_iterator gsi
;
3909 /* If we are trying to negate a name, defined by an add, negate the
3910 add operands instead. */
3911 if (TREE_CODE (tonegate
) == SSA_NAME
)
3912 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3913 if (TREE_CODE (tonegate
) == SSA_NAME
3914 && is_gimple_assign (negatedefstmt
)
3915 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3916 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3917 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3919 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3920 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3921 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3924 gsi
= gsi_for_stmt (negatedefstmt
);
3925 rhs1
= negate_value (rhs1
, &gsi
);
3927 gsi
= gsi_for_stmt (negatedefstmt
);
3928 rhs2
= negate_value (rhs2
, &gsi
);
3930 gsi
= gsi_for_stmt (negatedefstmt
);
3931 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3932 gimple_set_visited (negatedefstmt
, true);
3933 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
3934 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3935 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3939 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3940 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3941 NULL_TREE
, true, GSI_SAME_STMT
);
3943 uid
= gimple_uid (gsi_stmt (gsi
));
3944 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3946 gimple stmt
= gsi_stmt (gsi
);
3947 if (gimple_uid (stmt
) != 0)
3949 gimple_set_uid (stmt
, uid
);
3951 return resultofnegate
;
3954 /* Return true if we should break up the subtract in STMT into an add
3955 with negate. This is true when we the subtract operands are really
3956 adds, or the subtract itself is used in an add expression. In
3957 either case, breaking up the subtract into an add with negate
3958 exposes the adds to reassociation. */
3961 should_break_up_subtract (gimple stmt
)
3963 tree lhs
= gimple_assign_lhs (stmt
);
3964 tree binlhs
= gimple_assign_rhs1 (stmt
);
3965 tree binrhs
= gimple_assign_rhs2 (stmt
);
3967 struct loop
*loop
= loop_containing_stmt (stmt
);
3969 if (TREE_CODE (binlhs
) == SSA_NAME
3970 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3973 if (TREE_CODE (binrhs
) == SSA_NAME
3974 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3977 if (TREE_CODE (lhs
) == SSA_NAME
3978 && (immusestmt
= get_single_immediate_use (lhs
))
3979 && is_gimple_assign (immusestmt
)
3980 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3981 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3986 /* Transform STMT from A - B into A + -B. */
3989 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3991 tree rhs1
= gimple_assign_rhs1 (stmt
);
3992 tree rhs2
= gimple_assign_rhs2 (stmt
);
3994 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3996 fprintf (dump_file
, "Breaking up subtract ");
3997 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4000 rhs2
= negate_value (rhs2
, gsip
);
4001 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
4005 /* Determine whether STMT is a builtin call that raises an SSA name
4006 to an integer power and has only one use. If so, and this is early
4007 reassociation and unsafe math optimizations are permitted, place
4008 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4009 If any of these conditions does not hold, return FALSE. */
4012 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
4015 REAL_VALUE_TYPE c
, cint
;
4017 if (!first_pass_instance
4018 || !flag_unsafe_math_optimizations
4019 || !is_gimple_call (stmt
)
4020 || !has_single_use (gimple_call_lhs (stmt
)))
4023 fndecl
= gimple_call_fndecl (stmt
);
4026 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
4029 switch (DECL_FUNCTION_CODE (fndecl
))
4031 CASE_FLT_FN (BUILT_IN_POW
):
4032 if (flag_errno_math
)
4035 *base
= gimple_call_arg (stmt
, 0);
4036 arg1
= gimple_call_arg (stmt
, 1);
4038 if (TREE_CODE (arg1
) != REAL_CST
)
4041 c
= TREE_REAL_CST (arg1
);
4043 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
4046 *exponent
= real_to_integer (&c
);
4047 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
4048 if (!real_identical (&c
, &cint
))
4053 CASE_FLT_FN (BUILT_IN_POWI
):
4054 *base
= gimple_call_arg (stmt
, 0);
4055 arg1
= gimple_call_arg (stmt
, 1);
4057 if (!tree_fits_shwi_p (arg1
))
4060 *exponent
= tree_to_shwi (arg1
);
4067 /* Expanding negative exponents is generally unproductive, so we don't
4068 complicate matters with those. Exponents of zero and one should
4069 have been handled by expression folding. */
4070 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
4076 /* Recursively linearize a binary expression that is the RHS of STMT.
4077 Place the operands of the expression tree in the vector named OPS. */
4080 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
4081 bool is_associative
, bool set_visited
)
4083 tree binlhs
= gimple_assign_rhs1 (stmt
);
4084 tree binrhs
= gimple_assign_rhs2 (stmt
);
4085 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
4086 bool binlhsisreassoc
= false;
4087 bool binrhsisreassoc
= false;
4088 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4089 struct loop
*loop
= loop_containing_stmt (stmt
);
4090 tree base
= NULL_TREE
;
4091 HOST_WIDE_INT exponent
= 0;
4094 gimple_set_visited (stmt
, true);
4096 if (TREE_CODE (binlhs
) == SSA_NAME
)
4098 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
4099 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
4100 && !stmt_could_throw_p (binlhsdef
));
4103 if (TREE_CODE (binrhs
) == SSA_NAME
)
4105 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
4106 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
4107 && !stmt_could_throw_p (binrhsdef
));
4110 /* If the LHS is not reassociable, but the RHS is, we need to swap
4111 them. If neither is reassociable, there is nothing we can do, so
4112 just put them in the ops vector. If the LHS is reassociable,
4113 linearize it. If both are reassociable, then linearize the RHS
4116 if (!binlhsisreassoc
)
4120 /* If this is not a associative operation like division, give up. */
4121 if (!is_associative
)
4123 add_to_ops_vec (ops
, binrhs
);
4127 if (!binrhsisreassoc
)
4129 if (rhscode
== MULT_EXPR
4130 && TREE_CODE (binrhs
) == SSA_NAME
4131 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
4133 add_repeat_to_ops_vec (ops
, base
, exponent
);
4134 gimple_set_visited (binrhsdef
, true);
4137 add_to_ops_vec (ops
, binrhs
);
4139 if (rhscode
== MULT_EXPR
4140 && TREE_CODE (binlhs
) == SSA_NAME
4141 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
4143 add_repeat_to_ops_vec (ops
, base
, exponent
);
4144 gimple_set_visited (binlhsdef
, true);
4147 add_to_ops_vec (ops
, binlhs
);
4152 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4154 fprintf (dump_file
, "swapping operands of ");
4155 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4158 swap_ssa_operands (stmt
,
4159 gimple_assign_rhs1_ptr (stmt
),
4160 gimple_assign_rhs2_ptr (stmt
));
4163 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4165 fprintf (dump_file
, " is now ");
4166 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4169 /* We want to make it so the lhs is always the reassociative op,
4175 else if (binrhsisreassoc
)
4177 linearize_expr (stmt
);
4178 binlhs
= gimple_assign_rhs1 (stmt
);
4179 binrhs
= gimple_assign_rhs2 (stmt
);
4182 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
4183 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
4185 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
4186 is_associative
, set_visited
);
4188 if (rhscode
== MULT_EXPR
4189 && TREE_CODE (binrhs
) == SSA_NAME
4190 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
4192 add_repeat_to_ops_vec (ops
, base
, exponent
);
4193 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
4196 add_to_ops_vec (ops
, binrhs
);
4199 /* Repropagate the negates back into subtracts, since no other pass
4200 currently does it. */
4203 repropagate_negates (void)
4208 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
4210 gimple user
= get_single_immediate_use (negate
);
4212 if (!user
|| !is_gimple_assign (user
))
4215 /* The negate operand can be either operand of a PLUS_EXPR
4216 (it can be the LHS if the RHS is a constant for example).
4218 Force the negate operand to the RHS of the PLUS_EXPR, then
4219 transform the PLUS_EXPR into a MINUS_EXPR. */
4220 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
4222 /* If the negated operand appears on the LHS of the
4223 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4224 to force the negated operand to the RHS of the PLUS_EXPR. */
4225 if (gimple_assign_rhs1 (user
) == negate
)
4227 swap_ssa_operands (user
,
4228 gimple_assign_rhs1_ptr (user
),
4229 gimple_assign_rhs2_ptr (user
));
4232 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4233 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4234 if (gimple_assign_rhs2 (user
) == negate
)
4236 tree rhs1
= gimple_assign_rhs1 (user
);
4237 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
4238 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4239 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
4243 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
4245 if (gimple_assign_rhs1 (user
) == negate
)
4250 which we transform into
4253 This pushes down the negate which we possibly can merge
4254 into some other operation, hence insert it into the
4255 plus_negates vector. */
4256 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4257 tree a
= gimple_assign_rhs1 (feed
);
4258 tree b
= gimple_assign_rhs2 (user
);
4259 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
4260 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
4261 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
4262 gimple g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
4263 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
4264 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
4265 user
= gsi_stmt (gsi2
);
4267 reassoc_remove_stmt (&gsi
);
4268 release_defs (feed
);
4269 plus_negates
.safe_push (gimple_assign_lhs (user
));
4273 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4274 rid of one operation. */
4275 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4276 tree a
= gimple_assign_rhs1 (feed
);
4277 tree rhs1
= gimple_assign_rhs1 (user
);
4278 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4279 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
4280 update_stmt (gsi_stmt (gsi
));
4286 /* Returns true if OP is of a type for which we can do reassociation.
4287 That is for integral or non-saturating fixed-point types, and for
4288 floating point type when associative-math is enabled. */
4291 can_reassociate_p (tree op
)
4293 tree type
= TREE_TYPE (op
);
4294 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
4295 || NON_SAT_FIXED_POINT_TYPE_P (type
)
4296 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
4301 /* Break up subtract operations in block BB.
4303 We do this top down because we don't know whether the subtract is
4304 part of a possible chain of reassociation except at the top.
4313 we want to break up k = t - q, but we won't until we've transformed q
4314 = b - r, which won't be broken up until we transform b = c - d.
4316 En passant, clear the GIMPLE visited flag on every statement
4317 and set UIDs within each basic block. */
4320 break_up_subtract_bb (basic_block bb
)
4322 gimple_stmt_iterator gsi
;
4324 unsigned int uid
= 1;
4326 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4328 gimple stmt
= gsi_stmt (gsi
);
4329 gimple_set_visited (stmt
, false);
4330 gimple_set_uid (stmt
, uid
++);
4332 if (!is_gimple_assign (stmt
)
4333 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
4336 /* Look for simple gimple subtract operations. */
4337 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
4339 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
4340 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
4343 /* Check for a subtract used only in an addition. If this
4344 is the case, transform it into add of a negate for better
4345 reassociation. IE transform C = A-B into C = A + -B if C
4346 is only used in an addition. */
4347 if (should_break_up_subtract (stmt
))
4348 break_up_subtract (stmt
, &gsi
);
4350 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
4351 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
4352 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
4354 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
4356 son
= next_dom_son (CDI_DOMINATORS
, son
))
4357 break_up_subtract_bb (son
);
4360 /* Used for repeated factor analysis. */
4361 struct repeat_factor_d
4363 /* An SSA name that occurs in a multiply chain. */
4366 /* Cached rank of the factor. */
4369 /* Number of occurrences of the factor in the chain. */
4370 HOST_WIDE_INT count
;
4372 /* An SSA name representing the product of this factor and
4373 all factors appearing later in the repeated factor vector. */
4377 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
4378 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
4381 static vec
<repeat_factor
> repeat_factor_vec
;
4383 /* Used for sorting the repeat factor vector. Sort primarily by
4384 ascending occurrence count, secondarily by descending rank. */
4387 compare_repeat_factors (const void *x1
, const void *x2
)
4389 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
4390 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
4392 if (rf1
->count
!= rf2
->count
)
4393 return rf1
->count
- rf2
->count
;
4395 return rf2
->rank
- rf1
->rank
;
4398 /* Look for repeated operands in OPS in the multiply tree rooted at
4399 STMT. Replace them with an optimal sequence of multiplies and powi
4400 builtin calls, and remove the used operands from OPS. Return an
4401 SSA name representing the value of the replacement sequence. */
4404 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
4406 unsigned i
, j
, vec_len
;
4409 repeat_factor_t rf1
, rf2
;
4410 repeat_factor rfnew
;
4411 tree result
= NULL_TREE
;
4412 tree target_ssa
, iter_result
;
4413 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4414 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4415 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4416 gimple mul_stmt
, pow_stmt
;
4418 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4423 /* Allocate the repeated factor vector. */
4424 repeat_factor_vec
.create (10);
4426 /* Scan the OPS vector for all SSA names in the product and build
4427 up a vector of occurrence counts for each factor. */
4428 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4430 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4432 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4434 if (rf1
->factor
== oe
->op
)
4436 rf1
->count
+= oe
->count
;
4441 if (j
>= repeat_factor_vec
.length ())
4443 rfnew
.factor
= oe
->op
;
4444 rfnew
.rank
= oe
->rank
;
4445 rfnew
.count
= oe
->count
;
4446 rfnew
.repr
= NULL_TREE
;
4447 repeat_factor_vec
.safe_push (rfnew
);
4452 /* Sort the repeated factor vector by (a) increasing occurrence count,
4453 and (b) decreasing rank. */
4454 repeat_factor_vec
.qsort (compare_repeat_factors
);
4456 /* It is generally best to combine as many base factors as possible
4457 into a product before applying __builtin_powi to the result.
4458 However, the sort order chosen for the repeated factor vector
4459 allows us to cache partial results for the product of the base
4460 factors for subsequent use. When we already have a cached partial
4461 result from a previous iteration, it is best to make use of it
4462 before looking for another __builtin_pow opportunity.
4464 As an example, consider x * x * y * y * y * z * z * z * z.
4465 We want to first compose the product x * y * z, raise it to the
4466 second power, then multiply this by y * z, and finally multiply
4467 by z. This can be done in 5 multiplies provided we cache y * z
4468 for use in both expressions:
4476 If we instead ignored the cached y * z and first multiplied by
4477 the __builtin_pow opportunity z * z, we would get the inferior:
4486 vec_len
= repeat_factor_vec
.length ();
4488 /* Repeatedly look for opportunities to create a builtin_powi call. */
4491 HOST_WIDE_INT power
;
4493 /* First look for the largest cached product of factors from
4494 preceding iterations. If found, create a builtin_powi for
4495 it if the minimum occurrence count for its factors is at
4496 least 2, or just use this cached product as our next
4497 multiplicand if the minimum occurrence count is 1. */
4498 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4500 if (rf1
->repr
&& rf1
->count
> 0)
4510 iter_result
= rf1
->repr
;
4512 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4516 fputs ("Multiplying by cached product ", dump_file
);
4517 for (elt
= j
; elt
< vec_len
; elt
++)
4519 rf
= &repeat_factor_vec
[elt
];
4520 print_generic_expr (dump_file
, rf
->factor
, 0);
4521 if (elt
< vec_len
- 1)
4522 fputs (" * ", dump_file
);
4524 fputs ("\n", dump_file
);
4529 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4530 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4531 build_int_cst (integer_type_node
,
4533 gimple_call_set_lhs (pow_stmt
, iter_result
);
4534 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4535 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4537 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4541 fputs ("Building __builtin_pow call for cached product (",
4543 for (elt
= j
; elt
< vec_len
; elt
++)
4545 rf
= &repeat_factor_vec
[elt
];
4546 print_generic_expr (dump_file
, rf
->factor
, 0);
4547 if (elt
< vec_len
- 1)
4548 fputs (" * ", dump_file
);
4550 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n",
4557 /* Otherwise, find the first factor in the repeated factor
4558 vector whose occurrence count is at least 2. If no such
4559 factor exists, there are no builtin_powi opportunities
4561 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4563 if (rf1
->count
>= 2)
4572 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4576 fputs ("Building __builtin_pow call for (", dump_file
);
4577 for (elt
= j
; elt
< vec_len
; elt
++)
4579 rf
= &repeat_factor_vec
[elt
];
4580 print_generic_expr (dump_file
, rf
->factor
, 0);
4581 if (elt
< vec_len
- 1)
4582 fputs (" * ", dump_file
);
4584 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4587 reassociate_stats
.pows_created
++;
4589 /* Visit each element of the vector in reverse order (so that
4590 high-occurrence elements are visited first, and within the
4591 same occurrence count, lower-ranked elements are visited
4592 first). Form a linear product of all elements in this order
4593 whose occurrencce count is at least that of element J.
4594 Record the SSA name representing the product of each element
4595 with all subsequent elements in the vector. */
4596 if (j
== vec_len
- 1)
4597 rf1
->repr
= rf1
->factor
;
4600 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4604 rf1
= &repeat_factor_vec
[ii
];
4605 rf2
= &repeat_factor_vec
[ii
+ 1];
4607 /* Init the last factor's representative to be itself. */
4609 rf2
->repr
= rf2
->factor
;
4614 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4615 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
4617 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4618 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4619 rf1
->repr
= target_ssa
;
4621 /* Don't reprocess the multiply we just introduced. */
4622 gimple_set_visited (mul_stmt
, true);
4626 /* Form a call to __builtin_powi for the maximum product
4627 just formed, raised to the power obtained earlier. */
4628 rf1
= &repeat_factor_vec
[j
];
4629 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4630 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4631 build_int_cst (integer_type_node
,
4633 gimple_call_set_lhs (pow_stmt
, iter_result
);
4634 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4635 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4638 /* If we previously formed at least one other builtin_powi call,
4639 form the product of this one and those others. */
4642 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4643 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
4644 result
, iter_result
);
4645 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4646 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4647 gimple_set_visited (mul_stmt
, true);
4648 result
= new_result
;
4651 result
= iter_result
;
4653 /* Decrement the occurrence count of each element in the product
4654 by the count found above, and remove this many copies of each
4656 for (i
= j
; i
< vec_len
; i
++)
4661 rf1
= &repeat_factor_vec
[i
];
4662 rf1
->count
-= power
;
4664 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4666 if (oe
->op
== rf1
->factor
)
4670 ops
->ordered_remove (n
);
4686 /* At this point all elements in the repeated factor vector have a
4687 remaining occurrence count of 0 or 1, and those with a count of 1
4688 don't have cached representatives. Re-sort the ops vector and
4690 ops
->qsort (sort_by_operand_rank
);
4691 repeat_factor_vec
.release ();
4693 /* Return the final product computed herein. Note that there may
4694 still be some elements with single occurrence count left in OPS;
4695 those will be handled by the normal reassociation logic. */
4699 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4702 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4706 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4708 fprintf (dump_file
, "Transforming ");
4709 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4712 rhs1
= gimple_assign_rhs1 (stmt
);
4713 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4715 remove_visited_stmt_chain (rhs1
);
4717 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4719 fprintf (dump_file
, " into ");
4720 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4724 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4727 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4728 tree rhs1
, tree rhs2
)
4730 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4732 fprintf (dump_file
, "Transforming ");
4733 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4736 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4737 update_stmt (gsi_stmt (*gsi
));
4738 remove_visited_stmt_chain (rhs1
);
4740 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4742 fprintf (dump_file
, " into ");
4743 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4747 /* Reassociate expressions in basic block BB and its post-dominator as
4751 reassociate_bb (basic_block bb
)
4753 gimple_stmt_iterator gsi
;
4755 gimple stmt
= last_stmt (bb
);
4757 if (stmt
&& !gimple_visited_p (stmt
))
4758 maybe_optimize_range_tests (stmt
);
4760 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4762 stmt
= gsi_stmt (gsi
);
4764 if (is_gimple_assign (stmt
)
4765 && !stmt_could_throw_p (stmt
))
4767 tree lhs
, rhs1
, rhs2
;
4768 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4770 /* If this is not a gimple binary expression, there is
4771 nothing for us to do with it. */
4772 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4775 /* If this was part of an already processed statement,
4776 we don't need to touch it again. */
4777 if (gimple_visited_p (stmt
))
4779 /* This statement might have become dead because of previous
4781 if (has_zero_uses (gimple_get_lhs (stmt
)))
4783 reassoc_remove_stmt (&gsi
);
4784 release_defs (stmt
);
4785 /* We might end up removing the last stmt above which
4786 places the iterator to the end of the sequence.
4787 Reset it to the last stmt in this case which might
4788 be the end of the sequence as well if we removed
4789 the last statement of the sequence. In which case
4790 we need to bail out. */
4791 if (gsi_end_p (gsi
))
4793 gsi
= gsi_last_bb (bb
);
4794 if (gsi_end_p (gsi
))
4801 lhs
= gimple_assign_lhs (stmt
);
4802 rhs1
= gimple_assign_rhs1 (stmt
);
4803 rhs2
= gimple_assign_rhs2 (stmt
);
4805 /* For non-bit or min/max operations we can't associate
4806 all types. Verify that here. */
4807 if (rhs_code
!= BIT_IOR_EXPR
4808 && rhs_code
!= BIT_AND_EXPR
4809 && rhs_code
!= BIT_XOR_EXPR
4810 && rhs_code
!= MIN_EXPR
4811 && rhs_code
!= MAX_EXPR
4812 && (!can_reassociate_p (lhs
)
4813 || !can_reassociate_p (rhs1
)
4814 || !can_reassociate_p (rhs2
)))
4817 if (associative_tree_code (rhs_code
))
4819 auto_vec
<operand_entry_t
> ops
;
4820 tree powi_result
= NULL_TREE
;
4822 /* There may be no immediate uses left by the time we
4823 get here because we may have eliminated them all. */
4824 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4827 gimple_set_visited (stmt
, true);
4828 linearize_expr_tree (&ops
, stmt
, true, true);
4829 ops
.qsort (sort_by_operand_rank
);
4830 optimize_ops_list (rhs_code
, &ops
);
4831 if (undistribute_ops_list (rhs_code
, &ops
,
4832 loop_containing_stmt (stmt
)))
4834 ops
.qsort (sort_by_operand_rank
);
4835 optimize_ops_list (rhs_code
, &ops
);
4838 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4839 optimize_range_tests (rhs_code
, &ops
);
4841 if (first_pass_instance
4842 && rhs_code
== MULT_EXPR
4843 && flag_unsafe_math_optimizations
)
4844 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4846 /* If the operand vector is now empty, all operands were
4847 consumed by the __builtin_powi optimization. */
4848 if (ops
.length () == 0)
4849 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4850 else if (ops
.length () == 1)
4852 tree last_op
= ops
.last ()->op
;
4855 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4858 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4862 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4863 int ops_num
= ops
.length ();
4864 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4867 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4869 "Width = %d was chosen for reassociation\n", width
);
4872 && ops
.length () > 3)
4873 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
4877 /* When there are three operands left, we want
4878 to make sure the ones that get the double
4879 binary op are chosen wisely. */
4880 int len
= ops
.length ();
4882 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4884 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4885 powi_result
!= NULL
);
4888 /* If we combined some repeated factors into a
4889 __builtin_powi call, multiply that result by the
4890 reassociated operands. */
4893 gimple mul_stmt
, lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4894 tree type
= TREE_TYPE (lhs
);
4895 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4897 gimple_set_lhs (lhs_stmt
, target_ssa
);
4898 update_stmt (lhs_stmt
);
4900 target_ssa
= new_lhs
;
4901 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
4902 powi_result
, target_ssa
);
4903 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4904 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4910 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4912 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4913 reassociate_bb (son
);
4916 /* Add jumps around shifts for range tests turned into bit tests.
4917 For each SSA_NAME VAR we have code like:
4918 VAR = ...; // final stmt of range comparison
4919 // bit test here...;
4920 OTHERVAR = ...; // final stmt of the bit test sequence
4921 RES = VAR | OTHERVAR;
4922 Turn the above into:
4929 // bit test here...;
4932 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4940 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
4942 gimple def_stmt
= SSA_NAME_DEF_STMT (var
);
4945 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
4947 && is_gimple_assign (use_stmt
)
4948 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
4949 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
4951 basic_block cond_bb
= gimple_bb (def_stmt
);
4952 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
4953 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
4955 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
4956 gimple g
= gimple_build_cond (NE_EXPR
, var
,
4957 build_zero_cst (TREE_TYPE (var
)),
4958 NULL_TREE
, NULL_TREE
);
4959 location_t loc
= gimple_location (use_stmt
);
4960 gimple_set_location (g
, loc
);
4961 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
4963 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
4964 etrue
->probability
= REG_BR_PROB_BASE
/ 2;
4965 etrue
->count
= cond_bb
->count
/ 2;
4966 edge efalse
= find_edge (cond_bb
, then_bb
);
4967 efalse
->flags
= EDGE_FALSE_VALUE
;
4968 efalse
->probability
-= etrue
->probability
;
4969 efalse
->count
-= etrue
->count
;
4970 then_bb
->count
-= etrue
->count
;
4972 tree othervar
= NULL_TREE
;
4973 if (gimple_assign_rhs1 (use_stmt
) == var
)
4974 othervar
= gimple_assign_rhs2 (use_stmt
);
4975 else if (gimple_assign_rhs2 (use_stmt
) == var
)
4976 othervar
= gimple_assign_rhs1 (use_stmt
);
4979 tree lhs
= gimple_assign_lhs (use_stmt
);
4980 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
4981 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
4982 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
4983 gsi
= gsi_for_stmt (use_stmt
);
4984 gsi_remove (&gsi
, true);
4986 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
4987 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
4989 reassoc_branch_fixups
.release ();
4992 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4993 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4995 /* Dump the operand entry vector OPS to FILE. */
4998 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
5003 FOR_EACH_VEC_ELT (ops
, i
, oe
)
5005 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
5006 print_generic_expr (file
, oe
->op
, 0);
5010 /* Dump the operand entry vector OPS to STDERR. */
5013 debug_ops_vector (vec
<operand_entry_t
> ops
)
5015 dump_ops_vector (stderr
, ops
);
5021 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
5022 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
5025 /* Initialize the reassociation pass. */
5032 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
5034 /* Find the loops, so that we can prevent moving calculations in
5036 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
5038 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
5040 operand_entry_pool
= create_alloc_pool ("operand entry pool",
5041 sizeof (struct operand_entry
), 30);
5042 next_operand_entry_id
= 0;
5044 /* Reverse RPO (Reverse Post Order) will give us something where
5045 deeper loops come later. */
5046 pre_and_rev_post_order_compute (NULL
, bbs
, false);
5047 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
5048 operand_rank
= new hash_map
<tree
, long>;
5050 /* Give each default definition a distinct rank. This includes
5051 parameters and the static chain. Walk backwards over all
5052 SSA names so that we get proper rank ordering according
5053 to tree_swap_operands_p. */
5054 for (i
= num_ssa_names
- 1; i
> 0; --i
)
5056 tree name
= ssa_name (i
);
5057 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
5058 insert_operand_rank (name
, ++rank
);
5061 /* Set up rank for each BB */
5062 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
5063 bb_rank
[bbs
[i
]] = ++rank
<< 16;
5066 calculate_dominance_info (CDI_POST_DOMINATORS
);
5067 plus_negates
= vNULL
;
5070 /* Cleanup after the reassociation pass, and print stats if
5076 statistics_counter_event (cfun
, "Linearized",
5077 reassociate_stats
.linearized
);
5078 statistics_counter_event (cfun
, "Constants eliminated",
5079 reassociate_stats
.constants_eliminated
);
5080 statistics_counter_event (cfun
, "Ops eliminated",
5081 reassociate_stats
.ops_eliminated
);
5082 statistics_counter_event (cfun
, "Statements rewritten",
5083 reassociate_stats
.rewritten
);
5084 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
5085 reassociate_stats
.pows_encountered
);
5086 statistics_counter_event (cfun
, "Built-in powi calls created",
5087 reassociate_stats
.pows_created
);
5089 delete operand_rank
;
5090 free_alloc_pool (operand_entry_pool
);
5092 plus_negates
.release ();
5093 free_dominance_info (CDI_POST_DOMINATORS
);
5094 loop_optimizer_finalize ();
5097 /* Gate and execute functions for Reassociation. */
5100 execute_reassoc (void)
5105 repropagate_negates ();
5114 const pass_data pass_data_reassoc
=
5116 GIMPLE_PASS
, /* type */
5117 "reassoc", /* name */
5118 OPTGROUP_NONE
, /* optinfo_flags */
5119 TV_TREE_REASSOC
, /* tv_id */
5120 ( PROP_cfg
| PROP_ssa
), /* properties_required */
5121 0, /* properties_provided */
5122 0, /* properties_destroyed */
5123 0, /* todo_flags_start */
5124 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
5127 class pass_reassoc
: public gimple_opt_pass
5130 pass_reassoc (gcc::context
*ctxt
)
5131 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
5134 /* opt_pass methods: */
5135 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
5136 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
5137 virtual unsigned int execute (function
*) { return execute_reassoc (); }
5139 }; // class pass_reassoc
5144 make_pass_reassoc (gcc::context
*ctxt
)
5146 return new pass_reassoc (ctxt
);