1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
24 #include "hash-table.h"
31 #include "double-int.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "hard-reg-set.h"
43 #include "dominance.h"
46 #include "basic-block.h"
47 #include "gimple-pretty-print.h"
48 #include "tree-inline.h"
50 #include "tree-ssa-alias.h"
51 #include "internal-fn.h"
52 #include "gimple-fold.h"
54 #include "gimple-expr.h"
57 #include "gimple-iterator.h"
58 #include "gimplify-me.h"
59 #include "gimple-ssa.h"
61 #include "tree-phinodes.h"
62 #include "ssa-iterators.h"
63 #include "stringpool.h"
64 #include "tree-ssanames.h"
65 #include "tree-ssa-loop-niter.h"
66 #include "tree-ssa-loop.h"
69 #include "statistics.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
83 #include "tree-iterator.h"
84 #include "tree-pass.h"
85 #include "alloc-pool.h"
86 #include "langhooks.h"
90 #include "diagnostic-core.h"
93 #include "insn-codes.h"
96 /* This is a simple global reassociation pass. It is, in part, based
97 on the LLVM pass of the same name (They do some things more/less
98 than we do, in different orders, etc).
100 It consists of five steps:
102 1. Breaking up subtract operations into addition + negate, where
103 it would promote the reassociation of adds.
105 2. Left linearization of the expression trees, so that (A+B)+(C+D)
106 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
107 During linearization, we place the operands of the binary
108 expressions into a vector of operand_entry_t
110 3. Optimization of the operand lists, eliminating things like a +
113 3a. Combine repeated factors with the same occurrence counts
114 into a __builtin_powi call that will later be optimized into
115 an optimal number of multiplies.
117 4. Rewrite the expression trees we linearized and optimized so
118 they are in proper rank order.
120 5. Repropagate negates, as nothing else will clean it up ATM.
122 A bit of theory on #4, since nobody seems to write anything down
123 about why it makes sense to do it the way they do it:
125 We could do this much nicer theoretically, but don't (for reasons
126 explained after how to do it theoretically nice :P).
128 In order to promote the most redundancy elimination, you want
129 binary expressions whose operands are the same rank (or
130 preferably, the same value) exposed to the redundancy eliminator,
131 for possible elimination.
133 So the way to do this if we really cared, is to build the new op
134 tree from the leaves to the roots, merging as you go, and putting the
135 new op on the end of the worklist, until you are left with one
136 thing on the worklist.
138 IE if you have to rewrite the following set of operands (listed with
139 rank in parentheses), with opcode PLUS_EXPR:
141 a (1), b (1), c (1), d (2), e (2)
144 We start with our merge worklist empty, and the ops list with all of
147 You want to first merge all leaves of the same rank, as much as
150 So first build a binary op of
152 mergetmp = a + b, and put "mergetmp" on the merge worklist.
154 Because there is no three operand form of PLUS_EXPR, c is not going to
155 be exposed to redundancy elimination as a rank 1 operand.
157 So you might as well throw it on the merge worklist (you could also
158 consider it to now be a rank two operand, and merge it with d and e,
159 but in this case, you then have evicted e from a binary op. So at
160 least in this situation, you can't win.)
162 Then build a binary op of d + e
165 and put mergetmp2 on the merge worklist.
167 so merge worklist = {mergetmp, c, mergetmp2}
169 Continue building binary ops of these operations until you have only
170 one operation left on the worklist.
175 mergetmp3 = mergetmp + c
177 worklist = {mergetmp2, mergetmp3}
179 mergetmp4 = mergetmp2 + mergetmp3
181 worklist = {mergetmp4}
183 because we have one operation left, we can now just set the original
184 statement equal to the result of that operation.
186 This will at least expose a + b and d + e to redundancy elimination
187 as binary operations.
189 For extra points, you can reuse the old statements to build the
190 mergetmps, since you shouldn't run out.
192 So why don't we do this?
194 Because it's expensive, and rarely will help. Most trees we are
195 reassociating have 3 or less ops. If they have 2 ops, they already
196 will be written into a nice single binary op. If you have 3 ops, a
197 single simple check suffices to tell you whether the first two are of the
198 same rank. If so, you know to order it
201 newstmt = mergetmp + op3
205 newstmt = mergetmp + op1
207 If all three are of the same rank, you can't expose them all in a
208 single binary operator anyway, so the above is *still* the best you
211 Thus, this is what we do. When we have three ops left, we check to see
212 what order to put them in, and call it a day. As a nod to vector sum
213 reduction, we check if any of the ops are really a phi node that is a
214 destructive update for the associating op, and keep the destructive
215 update together for vector sum reduction recognition. */
222 int constants_eliminated
;
225 int pows_encountered
;
229 /* Operator, rank pair. */
230 typedef struct operand_entry
238 static alloc_pool operand_entry_pool
;
240 /* This is used to assign a unique ID to each struct operand_entry
241 so that qsort results are identical on different hosts. */
242 static int next_operand_entry_id
;
244 /* Starting rank number for a given basic block, so that we can rank
245 operations using unmovable instructions in that BB based on the bb
247 static long *bb_rank
;
249 /* Operand->rank hashtable. */
250 static hash_map
<tree
, long> *operand_rank
;
252 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
253 all basic blocks the CFG should be adjusted - basic blocks
254 split right after that SSA_NAME's definition statement and before
255 the only use, which must be a bit ior. */
256 static vec
<tree
> reassoc_branch_fixups
;
259 static long get_rank (tree
);
260 static bool reassoc_stmt_dominates_stmt_p (gimple
, gimple
);
262 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
263 possibly added by gsi_remove. */
266 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
268 gimple stmt
= gsi_stmt (*gsi
);
270 if (!MAY_HAVE_DEBUG_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
271 return gsi_remove (gsi
, true);
273 gimple_stmt_iterator prev
= *gsi
;
275 unsigned uid
= gimple_uid (stmt
);
276 basic_block bb
= gimple_bb (stmt
);
277 bool ret
= gsi_remove (gsi
, true);
278 if (!gsi_end_p (prev
))
281 prev
= gsi_start_bb (bb
);
282 gimple end_stmt
= gsi_stmt (*gsi
);
283 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
285 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
286 gimple_set_uid (stmt
, uid
);
292 /* Bias amount for loop-carried phis. We want this to be larger than
293 the depth of any reassociation tree we can see, but not larger than
294 the rank difference between two blocks. */
295 #define PHI_LOOP_BIAS (1 << 15)
297 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
298 an innermost loop, and the phi has only a single use which is inside
299 the loop, then the rank is the block rank of the loop latch plus an
300 extra bias for the loop-carried dependence. This causes expressions
301 calculated into an accumulator variable to be independent for each
302 iteration of the loop. If STMT is some other phi, the rank is the
303 block rank of its containing block. */
305 phi_rank (gimple stmt
)
307 basic_block bb
= gimple_bb (stmt
);
308 struct loop
*father
= bb
->loop_father
;
314 /* We only care about real loops (those with a latch). */
316 return bb_rank
[bb
->index
];
318 /* Interesting phis must be in headers of innermost loops. */
319 if (bb
!= father
->header
321 return bb_rank
[bb
->index
];
323 /* Ignore virtual SSA_NAMEs. */
324 res
= gimple_phi_result (stmt
);
325 if (virtual_operand_p (res
))
326 return bb_rank
[bb
->index
];
328 /* The phi definition must have a single use, and that use must be
329 within the loop. Otherwise this isn't an accumulator pattern. */
330 if (!single_imm_use (res
, &use
, &use_stmt
)
331 || gimple_bb (use_stmt
)->loop_father
!= father
)
332 return bb_rank
[bb
->index
];
334 /* Look for phi arguments from within the loop. If found, bias this phi. */
335 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
337 tree arg
= gimple_phi_arg_def (stmt
, i
);
338 if (TREE_CODE (arg
) == SSA_NAME
339 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
341 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
342 if (gimple_bb (def_stmt
)->loop_father
== father
)
343 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
347 /* Must be an uninteresting phi. */
348 return bb_rank
[bb
->index
];
351 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
352 loop-carried dependence of an innermost loop, return TRUE; else
355 loop_carried_phi (tree exp
)
360 if (TREE_CODE (exp
) != SSA_NAME
361 || SSA_NAME_IS_DEFAULT_DEF (exp
))
364 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
366 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
369 /* Non-loop-carried phis have block rank. Loop-carried phis have
370 an additional bias added in. If this phi doesn't have block rank,
371 it's biased and should not be propagated. */
372 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
374 if (phi_rank (phi_stmt
) != block_rank
)
380 /* Return the maximum of RANK and the rank that should be propagated
381 from expression OP. For most operands, this is just the rank of OP.
382 For loop-carried phis, the value is zero to avoid undoing the bias
383 in favor of the phi. */
385 propagate_rank (long rank
, tree op
)
389 if (loop_carried_phi (op
))
392 op_rank
= get_rank (op
);
394 return MAX (rank
, op_rank
);
397 /* Look up the operand rank structure for expression E. */
400 find_operand_rank (tree e
)
402 long *slot
= operand_rank
->get (e
);
403 return slot
? *slot
: -1;
406 /* Insert {E,RANK} into the operand rank hashtable. */
409 insert_operand_rank (tree e
, long rank
)
411 gcc_assert (rank
> 0);
412 gcc_assert (!operand_rank
->put (e
, rank
));
415 /* Given an expression E, return the rank of the expression. */
420 /* Constants have rank 0. */
421 if (is_gimple_min_invariant (e
))
424 /* SSA_NAME's have the rank of the expression they are the result
426 For globals and uninitialized values, the rank is 0.
427 For function arguments, use the pre-setup rank.
428 For PHI nodes, stores, asm statements, etc, we use the rank of
430 For simple operations, the rank is the maximum rank of any of
431 its operands, or the bb_rank, whichever is less.
432 I make no claims that this is optimal, however, it gives good
435 /* We make an exception to the normal ranking system to break
436 dependences of accumulator variables in loops. Suppose we
437 have a simple one-block loop containing:
444 As shown, each iteration of the calculation into x is fully
445 dependent upon the iteration before it. We would prefer to
446 see this in the form:
453 If the loop is unrolled, the calculations of b and c from
454 different iterations can be interleaved.
456 To obtain this result during reassociation, we bias the rank
457 of the phi definition x_1 upward, when it is recognized as an
458 accumulator pattern. The artificial rank causes it to be
459 added last, providing the desired independence. */
461 if (TREE_CODE (e
) == SSA_NAME
)
468 if (SSA_NAME_IS_DEFAULT_DEF (e
))
469 return find_operand_rank (e
);
471 stmt
= SSA_NAME_DEF_STMT (e
);
472 if (gimple_code (stmt
) == GIMPLE_PHI
)
473 return phi_rank (stmt
);
475 if (!is_gimple_assign (stmt
)
476 || gimple_vdef (stmt
))
477 return bb_rank
[gimple_bb (stmt
)->index
];
479 /* If we already have a rank for this expression, use that. */
480 rank
= find_operand_rank (e
);
484 /* Otherwise, find the maximum rank for the operands. As an
485 exception, remove the bias from loop-carried phis when propagating
486 the rank so that dependent operations are not also biased. */
488 if (gimple_assign_single_p (stmt
))
490 tree rhs
= gimple_assign_rhs1 (stmt
);
491 n
= TREE_OPERAND_LENGTH (rhs
);
493 rank
= propagate_rank (rank
, rhs
);
496 for (i
= 0; i
< n
; i
++)
498 op
= TREE_OPERAND (rhs
, i
);
501 rank
= propagate_rank (rank
, op
);
507 n
= gimple_num_ops (stmt
);
508 for (i
= 1; i
< n
; i
++)
510 op
= gimple_op (stmt
, i
);
512 rank
= propagate_rank (rank
, op
);
516 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
518 fprintf (dump_file
, "Rank for ");
519 print_generic_expr (dump_file
, e
, 0);
520 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
523 /* Note the rank in the hashtable so we don't recompute it. */
524 insert_operand_rank (e
, (rank
+ 1));
528 /* Globals, etc, are rank 0 */
533 /* We want integer ones to end up last no matter what, since they are
534 the ones we can do the most with. */
535 #define INTEGER_CONST_TYPE 1 << 3
536 #define FLOAT_CONST_TYPE 1 << 2
537 #define OTHER_CONST_TYPE 1 << 1
539 /* Classify an invariant tree into integer, float, or other, so that
540 we can sort them to be near other constants of the same type. */
542 constant_type (tree t
)
544 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
545 return INTEGER_CONST_TYPE
;
546 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
547 return FLOAT_CONST_TYPE
;
549 return OTHER_CONST_TYPE
;
552 /* qsort comparison function to sort operand entries PA and PB by rank
553 so that the sorted array is ordered by rank in decreasing order. */
555 sort_by_operand_rank (const void *pa
, const void *pb
)
557 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
558 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
560 /* It's nicer for optimize_expression if constants that are likely
561 to fold when added/multiplied//whatever are put next to each
562 other. Since all constants have rank 0, order them by type. */
563 if (oeb
->rank
== 0 && oea
->rank
== 0)
565 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
566 return constant_type (oeb
->op
) - constant_type (oea
->op
);
568 /* To make sorting result stable, we use unique IDs to determine
570 return oeb
->id
- oea
->id
;
573 /* Lastly, make sure the versions that are the same go next to each
575 if ((oeb
->rank
- oea
->rank
== 0)
576 && TREE_CODE (oea
->op
) == SSA_NAME
577 && TREE_CODE (oeb
->op
) == SSA_NAME
)
579 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
580 versions of removed SSA_NAMEs, so if possible, prefer to sort
581 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
583 if (!SSA_NAME_IS_DEFAULT_DEF (oea
->op
)
584 && !SSA_NAME_IS_DEFAULT_DEF (oeb
->op
)
585 && SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
587 gimple stmta
= SSA_NAME_DEF_STMT (oea
->op
);
588 gimple stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
589 basic_block bba
= gimple_bb (stmta
);
590 basic_block bbb
= gimple_bb (stmtb
);
593 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
594 return bb_rank
[bbb
->index
] - bb_rank
[bba
->index
];
598 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
599 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
605 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
606 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
608 return oeb
->id
- oea
->id
;
611 if (oeb
->rank
!= oea
->rank
)
612 return oeb
->rank
- oea
->rank
;
614 return oeb
->id
- oea
->id
;
617 /* Add an operand entry to *OPS for the tree operand OP. */
620 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
622 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
625 oe
->rank
= get_rank (op
);
626 oe
->id
= next_operand_entry_id
++;
631 /* Add an operand entry to *OPS for the tree operand OP with repeat
635 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
636 HOST_WIDE_INT repeat
)
638 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
641 oe
->rank
= get_rank (op
);
642 oe
->id
= next_operand_entry_id
++;
646 reassociate_stats
.pows_encountered
++;
649 /* Return true if STMT is reassociable operation containing a binary
650 operation with tree code CODE, and is inside LOOP. */
653 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
655 basic_block bb
= gimple_bb (stmt
);
657 if (gimple_bb (stmt
) == NULL
)
660 if (!flow_bb_inside_loop_p (loop
, bb
))
663 if (is_gimple_assign (stmt
)
664 && gimple_assign_rhs_code (stmt
) == code
665 && has_single_use (gimple_assign_lhs (stmt
)))
672 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
673 operand of the negate operation. Otherwise, return NULL. */
676 get_unary_op (tree name
, enum tree_code opcode
)
678 gimple stmt
= SSA_NAME_DEF_STMT (name
);
680 if (!is_gimple_assign (stmt
))
683 if (gimple_assign_rhs_code (stmt
) == opcode
)
684 return gimple_assign_rhs1 (stmt
);
688 /* If CURR and LAST are a pair of ops that OPCODE allows us to
689 eliminate through equivalences, do so, remove them from OPS, and
690 return true. Otherwise, return false. */
693 eliminate_duplicate_pair (enum tree_code opcode
,
694 vec
<operand_entry_t
> *ops
,
697 operand_entry_t curr
,
698 operand_entry_t last
)
701 /* If we have two of the same op, and the opcode is & |, min, or max,
702 we can eliminate one of them.
703 If we have two of the same op, and the opcode is ^, we can
704 eliminate both of them. */
706 if (last
&& last
->op
== curr
->op
)
714 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
716 fprintf (dump_file
, "Equivalence: ");
717 print_generic_expr (dump_file
, curr
->op
, 0);
718 fprintf (dump_file
, " [&|minmax] ");
719 print_generic_expr (dump_file
, last
->op
, 0);
720 fprintf (dump_file
, " -> ");
721 print_generic_stmt (dump_file
, last
->op
, 0);
724 ops
->ordered_remove (i
);
725 reassociate_stats
.ops_eliminated
++;
730 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
732 fprintf (dump_file
, "Equivalence: ");
733 print_generic_expr (dump_file
, curr
->op
, 0);
734 fprintf (dump_file
, " ^ ");
735 print_generic_expr (dump_file
, last
->op
, 0);
736 fprintf (dump_file
, " -> nothing\n");
739 reassociate_stats
.ops_eliminated
+= 2;
741 if (ops
->length () == 2)
744 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
749 ops
->ordered_remove (i
-1);
750 ops
->ordered_remove (i
-1);
762 static vec
<tree
> plus_negates
;
764 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
765 expression, look in OPS for a corresponding positive operation to cancel
766 it out. If we find one, remove the other from OPS, replace
767 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
771 eliminate_plus_minus_pair (enum tree_code opcode
,
772 vec
<operand_entry_t
> *ops
,
773 unsigned int currindex
,
774 operand_entry_t curr
)
781 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
784 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
785 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
786 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
789 /* Any non-negated version will have a rank that is one less than
790 the current rank. So once we hit those ranks, if we don't find
793 for (i
= currindex
+ 1;
794 ops
->iterate (i
, &oe
)
795 && oe
->rank
>= curr
->rank
- 1 ;
798 if (oe
->op
== negateop
)
801 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
803 fprintf (dump_file
, "Equivalence: ");
804 print_generic_expr (dump_file
, negateop
, 0);
805 fprintf (dump_file
, " + -");
806 print_generic_expr (dump_file
, oe
->op
, 0);
807 fprintf (dump_file
, " -> 0\n");
810 ops
->ordered_remove (i
);
811 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
812 ops
->ordered_remove (currindex
);
813 reassociate_stats
.ops_eliminated
++;
817 else if (oe
->op
== notop
)
819 tree op_type
= TREE_TYPE (oe
->op
);
821 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
823 fprintf (dump_file
, "Equivalence: ");
824 print_generic_expr (dump_file
, notop
, 0);
825 fprintf (dump_file
, " + ~");
826 print_generic_expr (dump_file
, oe
->op
, 0);
827 fprintf (dump_file
, " -> -1\n");
830 ops
->ordered_remove (i
);
831 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
832 ops
->ordered_remove (currindex
);
833 reassociate_stats
.ops_eliminated
++;
839 /* CURR->OP is a negate expr in a plus expr: save it for later
840 inspection in repropagate_negates(). */
841 if (negateop
!= NULL_TREE
)
842 plus_negates
.safe_push (curr
->op
);
847 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
848 bitwise not expression, look in OPS for a corresponding operand to
849 cancel it out. If we find one, remove the other from OPS, replace
850 OPS[CURRINDEX] with 0, and return true. Otherwise, return
854 eliminate_not_pairs (enum tree_code opcode
,
855 vec
<operand_entry_t
> *ops
,
856 unsigned int currindex
,
857 operand_entry_t curr
)
863 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
864 || TREE_CODE (curr
->op
) != SSA_NAME
)
867 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
868 if (notop
== NULL_TREE
)
871 /* Any non-not version will have a rank that is one less than
872 the current rank. So once we hit those ranks, if we don't find
875 for (i
= currindex
+ 1;
876 ops
->iterate (i
, &oe
)
877 && oe
->rank
>= curr
->rank
- 1;
882 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
884 fprintf (dump_file
, "Equivalence: ");
885 print_generic_expr (dump_file
, notop
, 0);
886 if (opcode
== BIT_AND_EXPR
)
887 fprintf (dump_file
, " & ~");
888 else if (opcode
== BIT_IOR_EXPR
)
889 fprintf (dump_file
, " | ~");
890 print_generic_expr (dump_file
, oe
->op
, 0);
891 if (opcode
== BIT_AND_EXPR
)
892 fprintf (dump_file
, " -> 0\n");
893 else if (opcode
== BIT_IOR_EXPR
)
894 fprintf (dump_file
, " -> -1\n");
897 if (opcode
== BIT_AND_EXPR
)
898 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
899 else if (opcode
== BIT_IOR_EXPR
)
900 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
902 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
904 ops
->quick_push (oe
);
912 /* Use constant value that may be present in OPS to try to eliminate
913 operands. Note that this function is only really used when we've
914 eliminated ops for other reasons, or merged constants. Across
915 single statements, fold already does all of this, plus more. There
916 is little point in duplicating logic, so I've only included the
917 identities that I could ever construct testcases to trigger. */
920 eliminate_using_constants (enum tree_code opcode
,
921 vec
<operand_entry_t
> *ops
)
923 operand_entry_t oelast
= ops
->last ();
924 tree type
= TREE_TYPE (oelast
->op
);
926 if (oelast
->rank
== 0
927 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
932 if (integer_zerop (oelast
->op
))
934 if (ops
->length () != 1)
936 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
937 fprintf (dump_file
, "Found & 0, removing all other ops\n");
939 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
942 ops
->quick_push (oelast
);
946 else if (integer_all_onesp (oelast
->op
))
948 if (ops
->length () != 1)
950 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
951 fprintf (dump_file
, "Found & -1, removing\n");
953 reassociate_stats
.ops_eliminated
++;
958 if (integer_all_onesp (oelast
->op
))
960 if (ops
->length () != 1)
962 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
963 fprintf (dump_file
, "Found | -1, removing all other ops\n");
965 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
968 ops
->quick_push (oelast
);
972 else if (integer_zerop (oelast
->op
))
974 if (ops
->length () != 1)
976 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
977 fprintf (dump_file
, "Found | 0, removing\n");
979 reassociate_stats
.ops_eliminated
++;
984 if (integer_zerop (oelast
->op
)
985 || (FLOAT_TYPE_P (type
)
986 && !HONOR_NANS (type
)
987 && !HONOR_SIGNED_ZEROS (type
)
988 && real_zerop (oelast
->op
)))
990 if (ops
->length () != 1)
992 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
993 fprintf (dump_file
, "Found * 0, removing all other ops\n");
995 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
997 ops
->quick_push (oelast
);
1001 else if (integer_onep (oelast
->op
)
1002 || (FLOAT_TYPE_P (type
)
1003 && !HONOR_SNANS (type
)
1004 && real_onep (oelast
->op
)))
1006 if (ops
->length () != 1)
1008 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1009 fprintf (dump_file
, "Found * 1, removing\n");
1011 reassociate_stats
.ops_eliminated
++;
1019 if (integer_zerop (oelast
->op
)
1020 || (FLOAT_TYPE_P (type
)
1021 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1022 && fold_real_zero_addition_p (type
, oelast
->op
,
1023 opcode
== MINUS_EXPR
)))
1025 if (ops
->length () != 1)
1027 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1028 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1030 reassociate_stats
.ops_eliminated
++;
1042 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
1045 /* Structure for tracking and counting operands. */
1046 typedef struct oecount_s
{
1049 enum tree_code oecode
;
1054 /* The heap for the oecount hashtable and the sorted list of operands. */
1055 static vec
<oecount
> cvec
;
1058 /* Oecount hashtable helpers. */
1060 struct oecount_hasher
1062 typedef int value_type
;
1063 typedef int compare_type
;
1064 static inline hashval_t
hash (const value_type
&);
1065 static inline bool equal (const value_type
&, const compare_type
&);
1066 static bool is_deleted (int &v
) { return v
== 1; }
1067 static void mark_deleted (int &e
) { e
= 1; }
1068 static bool is_empty (int &v
) { return v
== 0; }
1069 static void mark_empty (int &e
) { e
= 0; }
1070 static void remove (int &) {}
1073 /* Hash function for oecount. */
1076 oecount_hasher::hash (const value_type
&p
)
1078 const oecount
*c
= &cvec
[p
- 42];
1079 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1082 /* Comparison function for oecount. */
1085 oecount_hasher::equal (const value_type
&p1
, const compare_type
&p2
)
1087 const oecount
*c1
= &cvec
[p1
- 42];
1088 const oecount
*c2
= &cvec
[p2
- 42];
1089 return (c1
->oecode
== c2
->oecode
1090 && c1
->op
== c2
->op
);
1093 /* Comparison function for qsort sorting oecount elements by count. */
1096 oecount_cmp (const void *p1
, const void *p2
)
1098 const oecount
*c1
= (const oecount
*)p1
;
1099 const oecount
*c2
= (const oecount
*)p2
;
1100 if (c1
->cnt
!= c2
->cnt
)
1101 return c1
->cnt
- c2
->cnt
;
1103 /* If counts are identical, use unique IDs to stabilize qsort. */
1104 return c1
->id
- c2
->id
;
1107 /* Return TRUE iff STMT represents a builtin call that raises OP
1108 to some exponent. */
1111 stmt_is_power_of_op (gimple stmt
, tree op
)
1115 if (!is_gimple_call (stmt
))
1118 fndecl
= gimple_call_fndecl (stmt
);
1121 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1124 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1126 CASE_FLT_FN (BUILT_IN_POW
):
1127 CASE_FLT_FN (BUILT_IN_POWI
):
1128 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1135 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1136 in place and return the result. Assumes that stmt_is_power_of_op
1137 was previously called for STMT and returned TRUE. */
1139 static HOST_WIDE_INT
1140 decrement_power (gimple stmt
)
1142 REAL_VALUE_TYPE c
, cint
;
1143 HOST_WIDE_INT power
;
1146 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1148 CASE_FLT_FN (BUILT_IN_POW
):
1149 arg1
= gimple_call_arg (stmt
, 1);
1150 c
= TREE_REAL_CST (arg1
);
1151 power
= real_to_integer (&c
) - 1;
1152 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1153 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1156 CASE_FLT_FN (BUILT_IN_POWI
):
1157 arg1
= gimple_call_arg (stmt
, 1);
1158 power
= TREE_INT_CST_LOW (arg1
) - 1;
1159 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1167 /* Find the single immediate use of STMT's LHS, and replace it
1168 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1169 replace *DEF with OP as well. */
1172 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1177 gimple_stmt_iterator gsi
;
1179 if (is_gimple_call (stmt
))
1180 lhs
= gimple_call_lhs (stmt
);
1182 lhs
= gimple_assign_lhs (stmt
);
1184 gcc_assert (has_single_use (lhs
));
1185 single_imm_use (lhs
, &use
, &use_stmt
);
1189 if (TREE_CODE (op
) != SSA_NAME
)
1190 update_stmt (use_stmt
);
1191 gsi
= gsi_for_stmt (stmt
);
1192 unlink_stmt_vdef (stmt
);
1193 reassoc_remove_stmt (&gsi
);
1194 release_defs (stmt
);
1197 /* Walks the linear chain with result *DEF searching for an operation
1198 with operand OP and code OPCODE removing that from the chain. *DEF
1199 is updated if there is only one operand but no operation left. */
1202 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1204 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1210 if (opcode
== MULT_EXPR
1211 && stmt_is_power_of_op (stmt
, op
))
1213 if (decrement_power (stmt
) == 1)
1214 propagate_op_to_single_use (op
, stmt
, def
);
1218 name
= gimple_assign_rhs1 (stmt
);
1220 /* If this is the operation we look for and one of the operands
1221 is ours simply propagate the other operand into the stmts
1223 if (gimple_assign_rhs_code (stmt
) == opcode
1225 || gimple_assign_rhs2 (stmt
) == op
))
1228 name
= gimple_assign_rhs2 (stmt
);
1229 propagate_op_to_single_use (name
, stmt
, def
);
1233 /* We might have a multiply of two __builtin_pow* calls, and
1234 the operand might be hiding in the rightmost one. */
1235 if (opcode
== MULT_EXPR
1236 && gimple_assign_rhs_code (stmt
) == opcode
1237 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1238 && has_single_use (gimple_assign_rhs2 (stmt
)))
1240 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1241 if (stmt_is_power_of_op (stmt2
, op
))
1243 if (decrement_power (stmt2
) == 1)
1244 propagate_op_to_single_use (op
, stmt2
, def
);
1249 /* Continue walking the chain. */
1250 gcc_assert (name
!= op
1251 && TREE_CODE (name
) == SSA_NAME
);
1252 stmt
= SSA_NAME_DEF_STMT (name
);
1257 /* Returns true if statement S1 dominates statement S2. Like
1258 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1261 reassoc_stmt_dominates_stmt_p (gimple s1
, gimple s2
)
1263 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1265 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1266 SSA_NAME. Assume it lives at the beginning of function and
1267 thus dominates everything. */
1268 if (!bb1
|| s1
== s2
)
1271 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1277 /* PHIs in the same basic block are assumed to be
1278 executed all in parallel, if only one stmt is a PHI,
1279 it dominates the other stmt in the same basic block. */
1280 if (gimple_code (s1
) == GIMPLE_PHI
)
1283 if (gimple_code (s2
) == GIMPLE_PHI
)
1286 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1288 if (gimple_uid (s1
) < gimple_uid (s2
))
1291 if (gimple_uid (s1
) > gimple_uid (s2
))
1294 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1295 unsigned int uid
= gimple_uid (s1
);
1296 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1298 gimple s
= gsi_stmt (gsi
);
1299 if (gimple_uid (s
) != uid
)
1308 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1311 /* Insert STMT after INSERT_POINT. */
1314 insert_stmt_after (gimple stmt
, gimple insert_point
)
1316 gimple_stmt_iterator gsi
;
1319 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1320 bb
= gimple_bb (insert_point
);
1321 else if (!stmt_ends_bb_p (insert_point
))
1323 gsi
= gsi_for_stmt (insert_point
);
1324 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1325 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1329 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1330 thus if it must end a basic block, it should be a call that can
1331 throw, or some assignment that can throw. If it throws, the LHS
1332 of it will not be initialized though, so only valid places using
1333 the SSA_NAME should be dominated by the fallthru edge. */
1334 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1335 gsi
= gsi_after_labels (bb
);
1336 if (gsi_end_p (gsi
))
1338 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1339 gimple_set_uid (stmt
,
1340 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1343 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1344 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1347 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1348 the result. Places the statement after the definition of either
1349 OP1 or OP2. Returns the new statement. */
1352 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1354 gimple op1def
= NULL
, op2def
= NULL
;
1355 gimple_stmt_iterator gsi
;
1359 /* Create the addition statement. */
1360 op
= make_ssa_name (type
);
1361 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1363 /* Find an insertion place and insert. */
1364 if (TREE_CODE (op1
) == SSA_NAME
)
1365 op1def
= SSA_NAME_DEF_STMT (op1
);
1366 if (TREE_CODE (op2
) == SSA_NAME
)
1367 op2def
= SSA_NAME_DEF_STMT (op2
);
1368 if ((!op1def
|| gimple_nop_p (op1def
))
1369 && (!op2def
|| gimple_nop_p (op2def
)))
1371 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1372 if (gsi_end_p (gsi
))
1374 gimple_stmt_iterator gsi2
1375 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1376 gimple_set_uid (sum
,
1377 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1380 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1381 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1385 gimple insert_point
;
1386 if ((!op1def
|| gimple_nop_p (op1def
))
1387 || (op2def
&& !gimple_nop_p (op2def
)
1388 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1389 insert_point
= op2def
;
1391 insert_point
= op1def
;
1392 insert_stmt_after (sum
, insert_point
);
1399 /* Perform un-distribution of divisions and multiplications.
1400 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1401 to (A + B) / X for real X.
1403 The algorithm is organized as follows.
1405 - First we walk the addition chain *OPS looking for summands that
1406 are defined by a multiplication or a real division. This results
1407 in the candidates bitmap with relevant indices into *OPS.
1409 - Second we build the chains of multiplications or divisions for
1410 these candidates, counting the number of occurrences of (operand, code)
1411 pairs in all of the candidates chains.
1413 - Third we sort the (operand, code) pairs by number of occurrence and
1414 process them starting with the pair with the most uses.
1416 * For each such pair we walk the candidates again to build a
1417 second candidate bitmap noting all multiplication/division chains
1418 that have at least one occurrence of (operand, code).
1420 * We build an alternate addition chain only covering these
1421 candidates with one (operand, code) operation removed from their
1422 multiplication/division chain.
1424 * The first candidate gets replaced by the alternate addition chain
1425 multiplied/divided by the operand.
1427 * All candidate chains get disabled for further processing and
1428 processing of (operand, code) pairs continues.
1430 The alternate addition chains built are re-processed by the main
1431 reassociation algorithm which allows optimizing a * x * y + b * y * x
1432 to (a + b ) * x * y in one invocation of the reassociation pass. */
1435 undistribute_ops_list (enum tree_code opcode
,
1436 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1438 unsigned int length
= ops
->length ();
1439 operand_entry_t oe1
;
1441 sbitmap candidates
, candidates2
;
1442 unsigned nr_candidates
, nr_candidates2
;
1443 sbitmap_iterator sbi0
;
1444 vec
<operand_entry_t
> *subops
;
1445 bool changed
= false;
1446 int next_oecount_id
= 0;
1449 || opcode
!= PLUS_EXPR
)
1452 /* Build a list of candidates to process. */
1453 candidates
= sbitmap_alloc (length
);
1454 bitmap_clear (candidates
);
1456 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1458 enum tree_code dcode
;
1461 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1463 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1464 if (!is_gimple_assign (oe1def
))
1466 dcode
= gimple_assign_rhs_code (oe1def
);
1467 if ((dcode
!= MULT_EXPR
1468 && dcode
!= RDIV_EXPR
)
1469 || !is_reassociable_op (oe1def
, dcode
, loop
))
1472 bitmap_set_bit (candidates
, i
);
1476 if (nr_candidates
< 2)
1478 sbitmap_free (candidates
);
1482 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1484 fprintf (dump_file
, "searching for un-distribute opportunities ");
1485 print_generic_expr (dump_file
,
1486 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1487 fprintf (dump_file
, " %d\n", nr_candidates
);
1490 /* Build linearized sub-operand lists and the counting table. */
1493 hash_table
<oecount_hasher
> ctable (15);
1495 /* ??? Macro arguments cannot have multi-argument template types in
1496 them. This typedef is needed to workaround that limitation. */
1497 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1498 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1499 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1502 enum tree_code oecode
;
1505 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1506 oecode
= gimple_assign_rhs_code (oedef
);
1507 linearize_expr_tree (&subops
[i
], oedef
,
1508 associative_tree_code (oecode
), false);
1510 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1517 c
.id
= next_oecount_id
++;
1520 idx
= cvec
.length () + 41;
1521 slot
= ctable
.find_slot (idx
, INSERT
);
1529 cvec
[*slot
- 42].cnt
++;
1534 /* Sort the counting table. */
1535 cvec
.qsort (oecount_cmp
);
1537 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1540 fprintf (dump_file
, "Candidates:\n");
1541 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1543 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1544 c
->oecode
== MULT_EXPR
1545 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1546 print_generic_expr (dump_file
, c
->op
, 0);
1547 fprintf (dump_file
, "\n");
1551 /* Process the (operand, code) pairs in order of most occurrence. */
1552 candidates2
= sbitmap_alloc (length
);
1553 while (!cvec
.is_empty ())
1555 oecount
*c
= &cvec
.last ();
1559 /* Now collect the operands in the outer chain that contain
1560 the common operand in their inner chain. */
1561 bitmap_clear (candidates2
);
1563 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1566 enum tree_code oecode
;
1568 tree op
= (*ops
)[i
]->op
;
1570 /* If we undistributed in this chain already this may be
1572 if (TREE_CODE (op
) != SSA_NAME
)
1575 oedef
= SSA_NAME_DEF_STMT (op
);
1576 oecode
= gimple_assign_rhs_code (oedef
);
1577 if (oecode
!= c
->oecode
)
1580 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1582 if (oe1
->op
== c
->op
)
1584 bitmap_set_bit (candidates2
, i
);
1591 if (nr_candidates2
>= 2)
1593 operand_entry_t oe1
, oe2
;
1595 int first
= bitmap_first_set_bit (candidates2
);
1597 /* Build the new addition chain. */
1598 oe1
= (*ops
)[first
];
1599 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1601 fprintf (dump_file
, "Building (");
1602 print_generic_expr (dump_file
, oe1
->op
, 0);
1604 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1605 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1609 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1611 fprintf (dump_file
, " + ");
1612 print_generic_expr (dump_file
, oe2
->op
, 0);
1614 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1615 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1616 oe1
->op
, oe2
->op
, opcode
);
1617 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1619 oe1
->op
= gimple_get_lhs (sum
);
1622 /* Apply the multiplication/division. */
1623 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1624 oe1
->op
, c
->op
, c
->oecode
);
1625 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1627 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1628 print_generic_expr (dump_file
, c
->op
, 0);
1629 fprintf (dump_file
, "\n");
1632 /* Record it in the addition chain and disable further
1633 undistribution with this op. */
1634 oe1
->op
= gimple_assign_lhs (prod
);
1635 oe1
->rank
= get_rank (oe1
->op
);
1636 subops
[first
].release ();
1644 for (i
= 0; i
< ops
->length (); ++i
)
1645 subops
[i
].release ();
1648 sbitmap_free (candidates
);
1649 sbitmap_free (candidates2
);
1654 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1655 expression, examine the other OPS to see if any of them are comparisons
1656 of the same values, which we may be able to combine or eliminate.
1657 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1660 eliminate_redundant_comparison (enum tree_code opcode
,
1661 vec
<operand_entry_t
> *ops
,
1662 unsigned int currindex
,
1663 operand_entry_t curr
)
1666 enum tree_code lcode
, rcode
;
1671 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1674 /* Check that CURR is a comparison. */
1675 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1677 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1678 if (!is_gimple_assign (def1
))
1680 lcode
= gimple_assign_rhs_code (def1
);
1681 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1683 op1
= gimple_assign_rhs1 (def1
);
1684 op2
= gimple_assign_rhs2 (def1
);
1686 /* Now look for a similar comparison in the remaining OPS. */
1687 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1691 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1693 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1694 if (!is_gimple_assign (def2
))
1696 rcode
= gimple_assign_rhs_code (def2
);
1697 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1700 /* If we got here, we have a match. See if we can combine the
1702 if (opcode
== BIT_IOR_EXPR
)
1703 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1704 rcode
, gimple_assign_rhs1 (def2
),
1705 gimple_assign_rhs2 (def2
));
1707 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1708 rcode
, gimple_assign_rhs1 (def2
),
1709 gimple_assign_rhs2 (def2
));
1713 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1714 always give us a boolean_type_node value back. If the original
1715 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1716 we need to convert. */
1717 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1718 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1720 if (TREE_CODE (t
) != INTEGER_CST
1721 && !operand_equal_p (t
, curr
->op
, 0))
1723 enum tree_code subcode
;
1724 tree newop1
, newop2
;
1725 if (!COMPARISON_CLASS_P (t
))
1727 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1728 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1729 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1730 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1734 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1736 fprintf (dump_file
, "Equivalence: ");
1737 print_generic_expr (dump_file
, curr
->op
, 0);
1738 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1739 print_generic_expr (dump_file
, oe
->op
, 0);
1740 fprintf (dump_file
, " -> ");
1741 print_generic_expr (dump_file
, t
, 0);
1742 fprintf (dump_file
, "\n");
1745 /* Now we can delete oe, as it has been subsumed by the new combined
1747 ops
->ordered_remove (i
);
1748 reassociate_stats
.ops_eliminated
++;
1750 /* If t is the same as curr->op, we're done. Otherwise we must
1751 replace curr->op with t. Special case is if we got a constant
1752 back, in which case we add it to the end instead of in place of
1753 the current entry. */
1754 if (TREE_CODE (t
) == INTEGER_CST
)
1756 ops
->ordered_remove (currindex
);
1757 add_to_ops_vec (ops
, t
);
1759 else if (!operand_equal_p (t
, curr
->op
, 0))
1762 enum tree_code subcode
;
1765 gcc_assert (COMPARISON_CLASS_P (t
));
1766 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1767 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1768 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1769 gcc_checking_assert (is_gimple_val (newop1
)
1770 && is_gimple_val (newop2
));
1771 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1772 curr
->op
= gimple_get_lhs (sum
);
1780 /* Perform various identities and other optimizations on the list of
1781 operand entries, stored in OPS. The tree code for the binary
1782 operation between all the operands is OPCODE. */
1785 optimize_ops_list (enum tree_code opcode
,
1786 vec
<operand_entry_t
> *ops
)
1788 unsigned int length
= ops
->length ();
1791 operand_entry_t oelast
= NULL
;
1792 bool iterate
= false;
1797 oelast
= ops
->last ();
1799 /* If the last two are constants, pop the constants off, merge them
1800 and try the next two. */
1801 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1803 operand_entry_t oelm1
= (*ops
)[length
- 2];
1805 if (oelm1
->rank
== 0
1806 && is_gimple_min_invariant (oelm1
->op
)
1807 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1808 TREE_TYPE (oelast
->op
)))
1810 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1811 oelm1
->op
, oelast
->op
);
1813 if (folded
&& is_gimple_min_invariant (folded
))
1815 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1816 fprintf (dump_file
, "Merging constants\n");
1821 add_to_ops_vec (ops
, folded
);
1822 reassociate_stats
.constants_eliminated
++;
1824 optimize_ops_list (opcode
, ops
);
1830 eliminate_using_constants (opcode
, ops
);
1833 for (i
= 0; ops
->iterate (i
, &oe
);)
1837 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1839 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1840 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1841 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1853 length
= ops
->length ();
1854 oelast
= ops
->last ();
1857 optimize_ops_list (opcode
, ops
);
1860 /* The following functions are subroutines to optimize_range_tests and allow
1861 it to try to change a logical combination of comparisons into a range
1865 X == 2 || X == 5 || X == 3 || X == 4
1869 (unsigned) (X - 2) <= 3
1871 For more information see comments above fold_test_range in fold-const.c,
1872 this implementation is for GIMPLE. */
1880 bool strict_overflow_p
;
1881 unsigned int idx
, next
;
1884 /* This is similar to make_range in fold-const.c, but on top of
1885 GIMPLE instead of trees. If EXP is non-NULL, it should be
1886 an SSA_NAME and STMT argument is ignored, otherwise STMT
1887 argument should be a GIMPLE_COND. */
1890 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1894 bool is_bool
, strict_overflow_p
;
1898 r
->strict_overflow_p
= false;
1900 r
->high
= NULL_TREE
;
1901 if (exp
!= NULL_TREE
1902 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1905 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1906 and see if we can refine the range. Some of the cases below may not
1907 happen, but it doesn't seem worth worrying about this. We "continue"
1908 the outer loop when we've changed something; otherwise we "break"
1909 the switch, which will "break" the while. */
1910 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1913 strict_overflow_p
= false;
1915 if (exp
== NULL_TREE
)
1917 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1919 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1924 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1929 enum tree_code code
;
1930 tree arg0
, arg1
, exp_type
;
1934 if (exp
!= NULL_TREE
)
1936 if (TREE_CODE (exp
) != SSA_NAME
1937 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
1940 stmt
= SSA_NAME_DEF_STMT (exp
);
1941 if (!is_gimple_assign (stmt
))
1944 code
= gimple_assign_rhs_code (stmt
);
1945 arg0
= gimple_assign_rhs1 (stmt
);
1946 arg1
= gimple_assign_rhs2 (stmt
);
1947 exp_type
= TREE_TYPE (exp
);
1951 code
= gimple_cond_code (stmt
);
1952 arg0
= gimple_cond_lhs (stmt
);
1953 arg1
= gimple_cond_rhs (stmt
);
1954 exp_type
= boolean_type_node
;
1957 if (TREE_CODE (arg0
) != SSA_NAME
)
1959 loc
= gimple_location (stmt
);
1963 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1964 /* Ensure the range is either +[-,0], +[0,0],
1965 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1966 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1967 or similar expression of unconditional true or
1968 false, it should not be negated. */
1969 && ((high
&& integer_zerop (high
))
1970 || (low
&& integer_onep (low
))))
1983 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1985 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1990 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2005 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2007 &strict_overflow_p
);
2008 if (nexp
!= NULL_TREE
)
2011 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2024 r
->strict_overflow_p
= strict_overflow_p
;
2028 /* Comparison function for qsort. Sort entries
2029 without SSA_NAME exp first, then with SSA_NAMEs sorted
2030 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2031 by increasing ->low and if ->low is the same, by increasing
2032 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2036 range_entry_cmp (const void *a
, const void *b
)
2038 const struct range_entry
*p
= (const struct range_entry
*) a
;
2039 const struct range_entry
*q
= (const struct range_entry
*) b
;
2041 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2043 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2045 /* Group range_entries for the same SSA_NAME together. */
2046 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2048 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2050 /* If ->low is different, NULL low goes first, then by
2052 if (p
->low
!= NULL_TREE
)
2054 if (q
->low
!= NULL_TREE
)
2056 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2058 if (tem
&& integer_onep (tem
))
2060 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2062 if (tem
&& integer_onep (tem
))
2068 else if (q
->low
!= NULL_TREE
)
2070 /* If ->high is different, NULL high goes last, before that by
2072 if (p
->high
!= NULL_TREE
)
2074 if (q
->high
!= NULL_TREE
)
2076 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2078 if (tem
&& integer_onep (tem
))
2080 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2082 if (tem
&& integer_onep (tem
))
2088 else if (q
->high
!= NULL_TREE
)
2090 /* If both ranges are the same, sort below by ascending idx. */
2095 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2098 if (p
->idx
< q
->idx
)
2102 gcc_checking_assert (p
->idx
> q
->idx
);
2107 /* Helper routine of optimize_range_test.
2108 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2109 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2110 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2111 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2112 an array of COUNT pointers to other ranges. Return
2113 true if the range merge has been successful.
2114 If OPCODE is ERROR_MARK, this is called from within
2115 maybe_optimize_range_tests and is performing inter-bb range optimization.
2116 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2120 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2121 struct range_entry
**otherrangep
,
2122 unsigned int count
, enum tree_code opcode
,
2123 vec
<operand_entry_t
> *ops
, tree exp
, gimple_seq seq
,
2124 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2126 operand_entry_t oe
= (*ops
)[range
->idx
];
2128 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) :
2129 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2130 location_t loc
= gimple_location (stmt
);
2131 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2132 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2134 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2135 gimple_stmt_iterator gsi
;
2138 if (tem
== NULL_TREE
)
2141 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2142 warning_at (loc
, OPT_Wstrict_overflow
,
2143 "assuming signed overflow does not occur "
2144 "when simplifying range test");
2146 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2148 struct range_entry
*r
;
2149 fprintf (dump_file
, "Optimizing range tests ");
2150 print_generic_expr (dump_file
, range
->exp
, 0);
2151 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2152 print_generic_expr (dump_file
, range
->low
, 0);
2153 fprintf (dump_file
, ", ");
2154 print_generic_expr (dump_file
, range
->high
, 0);
2155 fprintf (dump_file
, "]");
2156 for (i
= 0; i
< count
; i
++)
2162 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2163 print_generic_expr (dump_file
, r
->low
, 0);
2164 fprintf (dump_file
, ", ");
2165 print_generic_expr (dump_file
, r
->high
, 0);
2166 fprintf (dump_file
, "]");
2168 fprintf (dump_file
, "\n into ");
2169 print_generic_expr (dump_file
, tem
, 0);
2170 fprintf (dump_file
, "\n");
2173 if (opcode
== BIT_IOR_EXPR
2174 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2175 tem
= invert_truthvalue_loc (loc
, tem
);
2177 tem
= fold_convert_loc (loc
, optype
, tem
);
2178 gsi
= gsi_for_stmt (stmt
);
2179 unsigned int uid
= gimple_uid (stmt
);
2180 /* In rare cases range->exp can be equal to lhs of stmt.
2181 In that case we have to insert after the stmt rather then before
2182 it. If stmt is a PHI, insert it at the start of the basic block. */
2183 if (op
!= range
->exp
)
2185 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2186 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2190 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2192 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2193 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2194 GSI_CONTINUE_LINKING
);
2198 gsi
= gsi_after_labels (gimple_bb (stmt
));
2199 if (!gsi_end_p (gsi
))
2200 uid
= gimple_uid (gsi_stmt (gsi
));
2203 gsi
= gsi_start_bb (gimple_bb (stmt
));
2205 while (!gsi_end_p (gsi
))
2207 uid
= gimple_uid (gsi_stmt (gsi
));
2211 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2212 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2214 if (gsi_end_p (gsi
))
2215 gsi
= gsi_last_bb (gimple_bb (stmt
));
2219 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2220 if (gimple_uid (gsi_stmt (gsi
)))
2223 gimple_set_uid (gsi_stmt (gsi
), uid
);
2230 range
->strict_overflow_p
= false;
2232 for (i
= 0; i
< count
; i
++)
2235 range
= otherrange
+ i
;
2237 range
= otherrangep
[i
];
2238 oe
= (*ops
)[range
->idx
];
2239 /* Now change all the other range test immediate uses, so that
2240 those tests will be optimized away. */
2241 if (opcode
== ERROR_MARK
)
2244 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2245 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2247 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2248 ? boolean_false_node
: boolean_true_node
);
2251 oe
->op
= error_mark_node
;
2252 range
->exp
= NULL_TREE
;
2257 /* Optimize X == CST1 || X == CST2
2258 if popcount (CST1 ^ CST2) == 1 into
2259 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2260 Similarly for ranges. E.g.
2261 X != 2 && X != 3 && X != 10 && X != 11
2262 will be transformed by the previous optimization into
2263 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2264 and this loop can transform that into
2265 !(((X & ~8) - 2U) <= 1U). */
2268 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2269 tree lowi
, tree lowj
, tree highi
, tree highj
,
2270 vec
<operand_entry_t
> *ops
,
2271 struct range_entry
*rangei
,
2272 struct range_entry
*rangej
)
2274 tree lowxor
, highxor
, tem
, exp
;
2275 /* Check lowi ^ lowj == highi ^ highj and
2276 popcount (lowi ^ lowj) == 1. */
2277 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2278 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2280 if (!integer_pow2p (lowxor
))
2282 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2283 if (!tree_int_cst_equal (lowxor
, highxor
))
2286 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2287 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2288 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2289 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2290 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2291 NULL
, rangei
->in_p
, lowj
, highj
,
2292 rangei
->strict_overflow_p
2293 || rangej
->strict_overflow_p
))
2298 /* Optimize X == CST1 || X == CST2
2299 if popcount (CST2 - CST1) == 1 into
2300 ((X - CST1) & ~(CST2 - CST1)) == 0.
2301 Similarly for ranges. E.g.
2302 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2303 || X == 75 || X == 45
2304 will be transformed by the previous optimization into
2305 (X - 43U) <= 3U || (X - 75U) <= 3U
2306 and this loop can transform that into
2307 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2309 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2310 tree lowi
, tree lowj
, tree highi
, tree highj
,
2311 vec
<operand_entry_t
> *ops
,
2312 struct range_entry
*rangei
,
2313 struct range_entry
*rangej
)
2315 tree tem1
, tem2
, mask
;
2316 /* Check highi - lowi == highj - lowj. */
2317 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2318 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2320 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2321 if (!tree_int_cst_equal (tem1
, tem2
))
2323 /* Check popcount (lowj - lowi) == 1. */
2324 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2325 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2327 if (!integer_pow2p (tem1
))
2330 type
= unsigned_type_for (type
);
2331 tem1
= fold_convert (type
, tem1
);
2332 tem2
= fold_convert (type
, tem2
);
2333 lowi
= fold_convert (type
, lowi
);
2334 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2335 tem1
= fold_binary (MINUS_EXPR
, type
,
2336 fold_convert (type
, rangei
->exp
), lowi
);
2337 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2338 lowj
= build_int_cst (type
, 0);
2339 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2340 NULL
, rangei
->in_p
, lowj
, tem2
,
2341 rangei
->strict_overflow_p
2342 || rangej
->strict_overflow_p
))
2347 /* It does some common checks for function optimize_range_tests_xor and
2348 optimize_range_tests_diff.
2349 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2350 Else it calls optimize_range_tests_diff. */
2353 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2354 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2355 struct range_entry
*ranges
)
2358 bool any_changes
= false;
2359 for (i
= first
; i
< length
; i
++)
2361 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2363 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2365 type
= TREE_TYPE (ranges
[i
].exp
);
2366 if (!INTEGRAL_TYPE_P (type
))
2368 lowi
= ranges
[i
].low
;
2369 if (lowi
== NULL_TREE
)
2370 lowi
= TYPE_MIN_VALUE (type
);
2371 highi
= ranges
[i
].high
;
2372 if (highi
== NULL_TREE
)
2374 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2377 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2379 lowj
= ranges
[j
].low
;
2380 if (lowj
== NULL_TREE
)
2382 highj
= ranges
[j
].high
;
2383 if (highj
== NULL_TREE
)
2384 highj
= TYPE_MAX_VALUE (type
);
2385 /* Check lowj > highi. */
2386 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2388 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2391 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2393 ranges
+ i
, ranges
+ j
);
2395 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2397 ranges
+ i
, ranges
+ j
);
2408 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2409 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2410 EXP on success, NULL otherwise. */
2413 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2414 wide_int
*mask
, tree
*totallowp
)
2416 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2417 if (tem
== NULL_TREE
2418 || TREE_CODE (tem
) != INTEGER_CST
2419 || TREE_OVERFLOW (tem
)
2420 || tree_int_cst_sgn (tem
) == -1
2421 || compare_tree_int (tem
, prec
) != -1)
2424 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2425 *mask
= wi::shifted_mask (0, max
, false, prec
);
2426 if (TREE_CODE (exp
) == BIT_AND_EXPR
2427 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2429 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2430 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2431 if (wi::popcount (msk
) == 1
2432 && wi::ltu_p (msk
, prec
- max
))
2434 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2435 max
+= msk
.to_uhwi ();
2436 exp
= TREE_OPERAND (exp
, 0);
2437 if (integer_zerop (low
)
2438 && TREE_CODE (exp
) == PLUS_EXPR
2439 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2441 tree ret
= TREE_OPERAND (exp
, 0);
2444 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2445 TYPE_PRECISION (TREE_TYPE (low
))));
2446 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
2452 else if (!tree_int_cst_lt (totallow
, tbias
))
2454 bias
= wi::to_widest (tbias
);
2455 bias
-= wi::to_widest (totallow
);
2456 if (wi::ges_p (bias
, 0) && wi::lts_p (bias
, prec
- max
))
2458 *mask
= wi::lshift (*mask
, bias
);
2466 if (!tree_int_cst_lt (totallow
, low
))
2468 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
2469 if (tem
== NULL_TREE
2470 || TREE_CODE (tem
) != INTEGER_CST
2471 || TREE_OVERFLOW (tem
)
2472 || compare_tree_int (tem
, prec
- max
) == 1)
2475 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
2479 /* Attempt to optimize small range tests using bit test.
2481 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2482 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2483 has been by earlier optimizations optimized into:
2484 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2485 As all the 43 through 82 range is less than 64 numbers,
2486 for 64-bit word targets optimize that into:
2487 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2490 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
2491 vec
<operand_entry_t
> *ops
,
2492 struct range_entry
*ranges
)
2495 bool any_changes
= false;
2496 int prec
= GET_MODE_BITSIZE (word_mode
);
2497 auto_vec
<struct range_entry
*, 64> candidates
;
2499 for (i
= first
; i
< length
- 2; i
++)
2501 tree lowi
, highi
, lowj
, highj
, type
;
2503 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2505 type
= TREE_TYPE (ranges
[i
].exp
);
2506 if (!INTEGRAL_TYPE_P (type
))
2508 lowi
= ranges
[i
].low
;
2509 if (lowi
== NULL_TREE
)
2510 lowi
= TYPE_MIN_VALUE (type
);
2511 highi
= ranges
[i
].high
;
2512 if (highi
== NULL_TREE
)
2515 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
2516 highi
, &mask
, &lowi
);
2517 if (exp
== NULL_TREE
)
2519 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2520 candidates
.truncate (0);
2521 int end
= MIN (i
+ 64, length
);
2522 for (j
= i
+ 1; j
< end
; j
++)
2525 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
2527 if (ranges
[j
].exp
== exp
)
2529 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
2531 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
2534 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
2536 exp2
= TREE_OPERAND (exp2
, 0);
2546 lowj
= ranges
[j
].low
;
2547 if (lowj
== NULL_TREE
)
2549 highj
= ranges
[j
].high
;
2550 if (highj
== NULL_TREE
)
2551 highj
= TYPE_MAX_VALUE (type
);
2553 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
2554 highj
, &mask2
, NULL
);
2558 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2559 candidates
.safe_push (&ranges
[j
]);
2562 /* If we need otherwise 3 or more comparisons, use a bit test. */
2563 if (candidates
.length () >= 2)
2565 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
2566 wi::to_widest (lowi
)
2567 + prec
- 1 - wi::clz (mask
));
2568 operand_entry_t oe
= (*ops
)[ranges
[i
].idx
];
2570 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
)
2571 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2572 location_t loc
= gimple_location (stmt
);
2573 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2575 /* See if it isn't cheaper to pretend the minimum value of the
2576 range is 0, if maximum value is small enough.
2577 We can avoid then subtraction of the minimum value, but the
2578 mask constant could be perhaps more expensive. */
2579 if (compare_tree_int (lowi
, 0) > 0
2580 && compare_tree_int (high
, prec
) < 0)
2583 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
2584 rtx reg
= gen_raw_REG (word_mode
, 10000);
2585 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
2586 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
2587 GEN_INT (-m
)), speed_p
);
2588 rtx r
= immed_wide_int_const (mask
, word_mode
);
2589 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2591 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
2592 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
2596 mask
= wi::lshift (mask
, m
);
2597 lowi
= build_zero_cst (TREE_TYPE (lowi
));
2601 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2603 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
2605 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
2606 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
2607 fold_convert_loc (loc
, etype
, exp
),
2608 fold_convert_loc (loc
, etype
, lowi
));
2609 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
2610 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
2611 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
2612 build_int_cst (word_type
, 1), exp
);
2613 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
2614 wide_int_to_tree (word_type
, mask
));
2615 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
2616 build_zero_cst (word_type
));
2617 if (is_gimple_val (exp
))
2620 /* The shift might have undefined behavior if TEM is true,
2621 but reassociate_bb isn't prepared to have basic blocks
2622 split when it is running. So, temporarily emit a code
2623 with BIT_IOR_EXPR instead of &&, and fix it up in
2626 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
2627 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
2628 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
2630 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
2631 gimple_seq_add_seq_without_update (&seq
, seq2
);
2632 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2633 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
2634 gimple g
= gimple_build_assign (make_ssa_name (optype
),
2635 BIT_IOR_EXPR
, tem
, exp
);
2636 gimple_set_location (g
, loc
);
2637 gimple_seq_add_stmt_without_update (&seq
, g
);
2638 exp
= gimple_assign_lhs (g
);
2639 tree val
= build_zero_cst (optype
);
2640 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
2641 candidates
.length (), opcode
, ops
, exp
,
2642 seq
, false, val
, val
, strict_overflow_p
))
2645 reassoc_branch_fixups
.safe_push (tem
);
2648 gimple_seq_discard (seq
);
2654 /* Optimize range tests, similarly how fold_range_test optimizes
2655 it on trees. The tree code for the binary
2656 operation between all the operands is OPCODE.
2657 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2658 maybe_optimize_range_tests for inter-bb range optimization.
2659 In that case if oe->op is NULL, oe->id is bb->index whose
2660 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2661 the actual opcode. */
2664 optimize_range_tests (enum tree_code opcode
,
2665 vec
<operand_entry_t
> *ops
)
2667 unsigned int length
= ops
->length (), i
, j
, first
;
2669 struct range_entry
*ranges
;
2670 bool any_changes
= false;
2675 ranges
= XNEWVEC (struct range_entry
, length
);
2676 for (i
= 0; i
< length
; i
++)
2680 init_range_entry (ranges
+ i
, oe
->op
,
2682 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2683 /* For | invert it now, we will invert it again before emitting
2684 the optimized expression. */
2685 if (opcode
== BIT_IOR_EXPR
2686 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2687 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2690 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2691 for (i
= 0; i
< length
; i
++)
2692 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2695 /* Try to merge ranges. */
2696 for (first
= i
; i
< length
; i
++)
2698 tree low
= ranges
[i
].low
;
2699 tree high
= ranges
[i
].high
;
2700 int in_p
= ranges
[i
].in_p
;
2701 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2702 int update_fail_count
= 0;
2704 for (j
= i
+ 1; j
< length
; j
++)
2706 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2708 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2709 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2711 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2717 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
2718 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
2719 low
, high
, strict_overflow_p
))
2724 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2725 while update_range_test would fail. */
2726 else if (update_fail_count
== 64)
2729 ++update_fail_count
;
2732 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2735 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2736 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2738 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
2739 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
2742 if (any_changes
&& opcode
!= ERROR_MARK
)
2745 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2747 if (oe
->op
== error_mark_node
)
2756 XDELETEVEC (ranges
);
2760 /* Return true if STMT is a cast like:
2766 # _345 = PHI <_123(N), 1(...), 1(...)>
2767 where _234 has bool type, _123 has single use and
2768 bb N has a single successor M. This is commonly used in
2769 the last block of a range test. */
2772 final_range_test_p (gimple stmt
)
2774 basic_block bb
, rhs_bb
;
2777 use_operand_p use_p
;
2780 if (!gimple_assign_cast_p (stmt
))
2782 bb
= gimple_bb (stmt
);
2783 if (!single_succ_p (bb
))
2785 e
= single_succ_edge (bb
);
2786 if (e
->flags
& EDGE_COMPLEX
)
2789 lhs
= gimple_assign_lhs (stmt
);
2790 rhs
= gimple_assign_rhs1 (stmt
);
2791 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2792 || TREE_CODE (rhs
) != SSA_NAME
2793 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2796 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2797 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2800 if (gimple_code (use_stmt
) != GIMPLE_PHI
2801 || gimple_bb (use_stmt
) != e
->dest
)
2804 /* And that the rhs is defined in the same loop. */
2805 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2807 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2813 /* Return true if BB is suitable basic block for inter-bb range test
2814 optimization. If BACKWARD is true, BB should be the only predecessor
2815 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2816 or compared with to find a common basic block to which all conditions
2817 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2818 be the only predecessor of BB. */
2821 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2824 edge_iterator ei
, ei2
;
2828 bool other_edge_seen
= false;
2833 /* Check last stmt first. */
2834 stmt
= last_stmt (bb
);
2836 || (gimple_code (stmt
) != GIMPLE_COND
2837 && (backward
|| !final_range_test_p (stmt
)))
2838 || gimple_visited_p (stmt
)
2839 || stmt_could_throw_p (stmt
)
2842 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2845 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2846 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2847 to *OTHER_BB (if not set yet, try to find it out). */
2848 if (EDGE_COUNT (bb
->succs
) != 2)
2850 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2852 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2854 if (e
->dest
== test_bb
)
2863 if (*other_bb
== NULL
)
2865 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2866 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2868 else if (e
->dest
== e2
->dest
)
2869 *other_bb
= e
->dest
;
2870 if (*other_bb
== NULL
)
2873 if (e
->dest
== *other_bb
)
2874 other_edge_seen
= true;
2878 if (*other_bb
== NULL
|| !other_edge_seen
)
2881 else if (single_succ (bb
) != *other_bb
)
2884 /* Now check all PHIs of *OTHER_BB. */
2885 e
= find_edge (bb
, *other_bb
);
2886 e2
= find_edge (test_bb
, *other_bb
);
2887 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2889 gphi
*phi
= gsi
.phi ();
2890 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2891 corresponding to BB and TEST_BB predecessor must be the same. */
2892 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2893 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2895 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2896 one of the PHIs should have the lhs of the last stmt in
2897 that block as PHI arg and that PHI should have 0 or 1
2898 corresponding to it in all other range test basic blocks
2902 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2903 == gimple_assign_lhs (stmt
)
2904 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2905 || integer_onep (gimple_phi_arg_def (phi
,
2911 gimple test_last
= last_stmt (test_bb
);
2912 if (gimple_code (test_last
) != GIMPLE_COND
2913 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2914 == gimple_assign_lhs (test_last
)
2915 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2916 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2926 /* Return true if BB doesn't have side-effects that would disallow
2927 range test optimization, all SSA_NAMEs set in the bb are consumed
2928 in the bb and there are no PHIs. */
2931 no_side_effect_bb (basic_block bb
)
2933 gimple_stmt_iterator gsi
;
2936 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2938 last
= last_stmt (bb
);
2939 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2941 gimple stmt
= gsi_stmt (gsi
);
2943 imm_use_iterator imm_iter
;
2944 use_operand_p use_p
;
2946 if (is_gimple_debug (stmt
))
2948 if (gimple_has_side_effects (stmt
))
2952 if (!is_gimple_assign (stmt
))
2954 lhs
= gimple_assign_lhs (stmt
);
2955 if (TREE_CODE (lhs
) != SSA_NAME
)
2957 if (gimple_assign_rhs_could_trap_p (stmt
))
2959 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2961 gimple use_stmt
= USE_STMT (use_p
);
2962 if (is_gimple_debug (use_stmt
))
2964 if (gimple_bb (use_stmt
) != bb
)
2971 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2972 return true and fill in *OPS recursively. */
2975 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2978 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2982 if (!is_reassociable_op (stmt
, code
, loop
))
2985 rhs
[0] = gimple_assign_rhs1 (stmt
);
2986 rhs
[1] = gimple_assign_rhs2 (stmt
);
2987 gimple_set_visited (stmt
, true);
2988 for (i
= 0; i
< 2; i
++)
2989 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2990 && !get_ops (rhs
[i
], code
, ops
, loop
)
2991 && has_single_use (rhs
[i
]))
2993 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
2999 ops
->safe_push (oe
);
3004 /* Find the ops that were added by get_ops starting from VAR, see if
3005 they were changed during update_range_test and if yes, create new
3009 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
3010 unsigned int *pidx
, struct loop
*loop
)
3012 gimple stmt
= SSA_NAME_DEF_STMT (var
);
3016 if (!is_reassociable_op (stmt
, code
, loop
))
3019 rhs
[0] = gimple_assign_rhs1 (stmt
);
3020 rhs
[1] = gimple_assign_rhs2 (stmt
);
3023 for (i
= 0; i
< 2; i
++)
3024 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
3026 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
3027 if (rhs
[2 + i
] == NULL_TREE
)
3029 if (has_single_use (rhs
[i
]))
3030 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
3032 rhs
[2 + i
] = rhs
[i
];
3035 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
3036 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
3038 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3039 var
= make_ssa_name (TREE_TYPE (var
));
3040 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
3042 gimple_set_uid (g
, gimple_uid (stmt
));
3043 gimple_set_visited (g
, true);
3044 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3049 /* Structure to track the initial value passed to get_ops and
3050 the range in the ops vector for each basic block. */
3052 struct inter_bb_range_test_entry
3055 unsigned int first_idx
, last_idx
;
3058 /* Inter-bb range test optimization. */
3061 maybe_optimize_range_tests (gimple stmt
)
3063 basic_block first_bb
= gimple_bb (stmt
);
3064 basic_block last_bb
= first_bb
;
3065 basic_block other_bb
= NULL
;
3069 auto_vec
<operand_entry_t
> ops
;
3070 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
3071 bool any_changes
= false;
3073 /* Consider only basic blocks that end with GIMPLE_COND or
3074 a cast statement satisfying final_range_test_p. All
3075 but the last bb in the first_bb .. last_bb range
3076 should end with GIMPLE_COND. */
3077 if (gimple_code (stmt
) == GIMPLE_COND
)
3079 if (EDGE_COUNT (first_bb
->succs
) != 2)
3082 else if (final_range_test_p (stmt
))
3083 other_bb
= single_succ (first_bb
);
3087 if (stmt_could_throw_p (stmt
))
3090 /* As relative ordering of post-dominator sons isn't fixed,
3091 maybe_optimize_range_tests can be called first on any
3092 bb in the range we want to optimize. So, start searching
3093 backwards, if first_bb can be set to a predecessor. */
3094 while (single_pred_p (first_bb
))
3096 basic_block pred_bb
= single_pred (first_bb
);
3097 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
3099 if (!no_side_effect_bb (first_bb
))
3103 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3104 Before starting forward search in last_bb successors, find
3105 out the other_bb. */
3106 if (first_bb
== last_bb
)
3109 /* As non-GIMPLE_COND last stmt always terminates the range,
3110 if forward search didn't discover anything, just give up. */
3111 if (gimple_code (stmt
) != GIMPLE_COND
)
3113 /* Look at both successors. Either it ends with a GIMPLE_COND
3114 and satisfies suitable_cond_bb, or ends with a cast and
3115 other_bb is that cast's successor. */
3116 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
3117 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
3118 || e
->dest
== first_bb
)
3120 else if (single_pred_p (e
->dest
))
3122 stmt
= last_stmt (e
->dest
);
3124 && gimple_code (stmt
) == GIMPLE_COND
3125 && EDGE_COUNT (e
->dest
->succs
) == 2)
3127 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
3133 && final_range_test_p (stmt
)
3134 && find_edge (first_bb
, single_succ (e
->dest
)))
3136 other_bb
= single_succ (e
->dest
);
3137 if (other_bb
== first_bb
)
3141 if (other_bb
== NULL
)
3144 /* Now do the forward search, moving last_bb to successor bbs
3145 that aren't other_bb. */
3146 while (EDGE_COUNT (last_bb
->succs
) == 2)
3148 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
3149 if (e
->dest
!= other_bb
)
3153 if (!single_pred_p (e
->dest
))
3155 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
3157 if (!no_side_effect_bb (e
->dest
))
3161 if (first_bb
== last_bb
)
3163 /* Here basic blocks first_bb through last_bb's predecessor
3164 end with GIMPLE_COND, all of them have one of the edges to
3165 other_bb and another to another block in the range,
3166 all blocks except first_bb don't have side-effects and
3167 last_bb ends with either GIMPLE_COND, or cast satisfying
3168 final_range_test_p. */
3169 for (bb
= last_bb
; ; bb
= single_pred (bb
))
3171 enum tree_code code
;
3173 inter_bb_range_test_entry bb_ent
;
3175 bb_ent
.op
= NULL_TREE
;
3176 bb_ent
.first_idx
= ops
.length ();
3177 bb_ent
.last_idx
= bb_ent
.first_idx
;
3178 e
= find_edge (bb
, other_bb
);
3179 stmt
= last_stmt (bb
);
3180 gimple_set_visited (stmt
, true);
3181 if (gimple_code (stmt
) != GIMPLE_COND
)
3183 use_operand_p use_p
;
3188 lhs
= gimple_assign_lhs (stmt
);
3189 rhs
= gimple_assign_rhs1 (stmt
);
3190 gcc_assert (bb
== last_bb
);
3197 # _345 = PHI <_123(N), 1(...), 1(...)>
3199 or 0 instead of 1. If it is 0, the _234
3200 range test is anded together with all the
3201 other range tests, if it is 1, it is ored with
3203 single_imm_use (lhs
, &use_p
, &phi
);
3204 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
3205 e2
= find_edge (first_bb
, other_bb
);
3207 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
3208 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
3209 code
= BIT_AND_EXPR
;
3212 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
3213 code
= BIT_IOR_EXPR
;
3216 /* If _234 SSA_NAME_DEF_STMT is
3218 (or &, corresponding to 1/0 in the phi arguments,
3219 push into ops the individual range test arguments
3220 of the bitwise or resp. and, recursively. */
3221 if (!get_ops (rhs
, code
, &ops
,
3222 loop_containing_stmt (stmt
))
3223 && has_single_use (rhs
))
3225 /* Otherwise, push the _234 range test itself. */
3227 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
3237 bb_ent
.last_idx
= ops
.length ();
3239 bbinfo
.safe_push (bb_ent
);
3242 /* Otherwise stmt is GIMPLE_COND. */
3243 code
= gimple_cond_code (stmt
);
3244 lhs
= gimple_cond_lhs (stmt
);
3245 rhs
= gimple_cond_rhs (stmt
);
3246 if (TREE_CODE (lhs
) == SSA_NAME
3247 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3248 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
3249 || rhs
!= boolean_false_node
3250 /* Either push into ops the individual bitwise
3251 or resp. and operands, depending on which
3252 edge is other_bb. */
3253 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
3254 ^ (code
== EQ_EXPR
))
3255 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
3256 loop_containing_stmt (stmt
))))
3258 /* Or push the GIMPLE_COND stmt itself. */
3260 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
3263 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
3264 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3265 /* oe->op = NULL signs that there is no SSA_NAME
3266 for the range test, and oe->id instead is the
3267 basic block number, at which's end the GIMPLE_COND
3275 else if (ops
.length () > bb_ent
.first_idx
)
3278 bb_ent
.last_idx
= ops
.length ();
3280 bbinfo
.safe_push (bb_ent
);
3284 if (ops
.length () > 1)
3285 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
3289 /* update_ops relies on has_single_use predicates returning the
3290 same values as it did during get_ops earlier. Additionally it
3291 never removes statements, only adds new ones and it should walk
3292 from the single imm use and check the predicate already before
3293 making those changes.
3294 On the other side, the handling of GIMPLE_COND directly can turn
3295 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3296 it needs to be done in a separate loop afterwards. */
3297 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3299 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3300 && bbinfo
[idx
].op
!= NULL_TREE
)
3304 stmt
= last_stmt (bb
);
3305 new_op
= update_ops (bbinfo
[idx
].op
,
3307 ops
[bbinfo
[idx
].first_idx
]->rank
,
3308 ops
, &bbinfo
[idx
].first_idx
,
3309 loop_containing_stmt (stmt
));
3310 if (new_op
== NULL_TREE
)
3312 gcc_assert (bb
== last_bb
);
3313 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
3315 if (bbinfo
[idx
].op
!= new_op
)
3317 imm_use_iterator iter
;
3318 use_operand_p use_p
;
3319 gimple use_stmt
, cast_stmt
= NULL
;
3321 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
3322 if (is_gimple_debug (use_stmt
))
3324 else if (gimple_code (use_stmt
) == GIMPLE_COND
3325 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3326 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3327 SET_USE (use_p
, new_op
);
3328 else if (gimple_assign_cast_p (use_stmt
))
3329 cast_stmt
= use_stmt
;
3334 gcc_assert (bb
== last_bb
);
3335 tree lhs
= gimple_assign_lhs (cast_stmt
);
3336 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
3337 enum tree_code rhs_code
3338 = gimple_assign_rhs_code (cast_stmt
);
3340 if (is_gimple_min_invariant (new_op
))
3342 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
3343 g
= gimple_build_assign (new_lhs
, new_op
);
3346 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
3347 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
3348 gimple_set_uid (g
, gimple_uid (cast_stmt
));
3349 gimple_set_visited (g
, true);
3350 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3351 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3352 if (is_gimple_debug (use_stmt
))
3354 else if (gimple_code (use_stmt
) == GIMPLE_COND
3355 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3356 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3357 SET_USE (use_p
, new_lhs
);
3366 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3368 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3369 && bbinfo
[idx
].op
== NULL_TREE
3370 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
3372 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
3373 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
3374 gimple_cond_make_false (cond_stmt
);
3375 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
3376 gimple_cond_make_true (cond_stmt
);
3379 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
3380 gimple_cond_set_lhs (cond_stmt
,
3381 ops
[bbinfo
[idx
].first_idx
]->op
);
3382 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
3384 update_stmt (cond_stmt
);
3392 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3393 of STMT in it's operands. This is also known as a "destructive
3394 update" operation. */
3397 is_phi_for_stmt (gimple stmt
, tree operand
)
3402 use_operand_p arg_p
;
3405 if (TREE_CODE (operand
) != SSA_NAME
)
3408 lhs
= gimple_assign_lhs (stmt
);
3410 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3411 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
3415 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
3416 if (lhs
== USE_FROM_PTR (arg_p
))
3421 /* Remove def stmt of VAR if VAR has zero uses and recurse
3422 on rhs1 operand if so. */
3425 remove_visited_stmt_chain (tree var
)
3428 gimple_stmt_iterator gsi
;
3432 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3434 stmt
= SSA_NAME_DEF_STMT (var
);
3435 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3437 var
= gimple_assign_rhs1 (stmt
);
3438 gsi
= gsi_for_stmt (stmt
);
3439 reassoc_remove_stmt (&gsi
);
3440 release_defs (stmt
);
3447 /* This function checks three consequtive operands in
3448 passed operands vector OPS starting from OPINDEX and
3449 swaps two operands if it is profitable for binary operation
3450 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3452 We pair ops with the same rank if possible.
3454 The alternative we try is to see if STMT is a destructive
3455 update style statement, which is like:
3458 In that case, we want to use the destructive update form to
3459 expose the possible vectorizer sum reduction opportunity.
3460 In that case, the third operand will be the phi node. This
3461 check is not performed if STMT is null.
3463 We could, of course, try to be better as noted above, and do a
3464 lot of work to try to find these opportunities in >3 operand
3465 cases, but it is unlikely to be worth it. */
3468 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3469 unsigned int opindex
, gimple stmt
)
3471 operand_entry_t oe1
, oe2
, oe3
;
3474 oe2
= ops
[opindex
+ 1];
3475 oe3
= ops
[opindex
+ 2];
3477 if ((oe1
->rank
== oe2
->rank
3478 && oe2
->rank
!= oe3
->rank
)
3479 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3480 && !is_phi_for_stmt (stmt
, oe1
->op
)
3481 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3483 struct operand_entry temp
= *oe3
;
3485 oe3
->rank
= oe1
->rank
;
3487 oe1
->rank
= temp
.rank
;
3489 else if ((oe1
->rank
== oe3
->rank
3490 && oe2
->rank
!= oe3
->rank
)
3491 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3492 && !is_phi_for_stmt (stmt
, oe1
->op
)
3493 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3495 struct operand_entry temp
= *oe2
;
3497 oe2
->rank
= oe1
->rank
;
3499 oe1
->rank
= temp
.rank
;
3503 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3504 two definitions, otherwise return STMT. */
3506 static inline gimple
3507 find_insert_point (gimple stmt
, tree rhs1
, tree rhs2
)
3509 if (TREE_CODE (rhs1
) == SSA_NAME
3510 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3511 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3512 if (TREE_CODE (rhs2
) == SSA_NAME
3513 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3514 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3518 /* Recursively rewrite our linearized statements so that the operators
3519 match those in OPS[OPINDEX], putting the computation in rank
3520 order. Return new lhs. */
3523 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3524 vec
<operand_entry_t
> ops
, bool changed
)
3526 tree rhs1
= gimple_assign_rhs1 (stmt
);
3527 tree rhs2
= gimple_assign_rhs2 (stmt
);
3528 tree lhs
= gimple_assign_lhs (stmt
);
3531 /* The final recursion case for this function is that you have
3532 exactly two operations left.
3533 If we had exactly one op in the entire list to start with, we
3534 would have never called this function, and the tail recursion
3535 rewrites them one at a time. */
3536 if (opindex
+ 2 == ops
.length ())
3538 operand_entry_t oe1
, oe2
;
3541 oe2
= ops
[opindex
+ 1];
3543 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3545 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3546 unsigned int uid
= gimple_uid (stmt
);
3548 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3550 fprintf (dump_file
, "Transforming ");
3551 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3554 /* Even when changed is false, reassociation could have e.g. removed
3555 some redundant operations, so unless we are just swapping the
3556 arguments or unless there is no change at all (then we just
3557 return lhs), force creation of a new SSA_NAME. */
3558 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
3560 gimple insert_point
= find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3561 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3563 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3565 gimple_set_uid (stmt
, uid
);
3566 gimple_set_visited (stmt
, true);
3567 if (insert_point
== gsi_stmt (gsi
))
3568 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3570 insert_stmt_after (stmt
, insert_point
);
3574 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3576 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3577 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3581 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3582 remove_visited_stmt_chain (rhs1
);
3584 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3586 fprintf (dump_file
, " into ");
3587 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3593 /* If we hit here, we should have 3 or more ops left. */
3594 gcc_assert (opindex
+ 2 < ops
.length ());
3596 /* Rewrite the next operator. */
3599 /* Recurse on the LHS of the binary operator, which is guaranteed to
3600 be the non-leaf side. */
3602 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3603 changed
|| oe
->op
!= rhs2
);
3605 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3607 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3609 fprintf (dump_file
, "Transforming ");
3610 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3613 /* If changed is false, this is either opindex == 0
3614 or all outer rhs2's were equal to corresponding oe->op,
3615 and powi_result is NULL.
3616 That means lhs is equivalent before and after reassociation.
3617 Otherwise ensure the old lhs SSA_NAME is not reused and
3618 create a new stmt as well, so that any debug stmts will be
3619 properly adjusted. */
3622 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3623 unsigned int uid
= gimple_uid (stmt
);
3624 gimple insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3626 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3627 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
3629 gimple_set_uid (stmt
, uid
);
3630 gimple_set_visited (stmt
, true);
3631 if (insert_point
== gsi_stmt (gsi
))
3632 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3634 insert_stmt_after (stmt
, insert_point
);
3638 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3640 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3641 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3645 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3647 fprintf (dump_file
, " into ");
3648 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3654 /* Find out how many cycles we need to compute statements chain.
3655 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3656 maximum number of independent statements we may execute per cycle. */
3659 get_required_cycles (int ops_num
, int cpu_width
)
3665 /* While we have more than 2 * cpu_width operands
3666 we may reduce number of operands by cpu_width
3668 res
= ops_num
/ (2 * cpu_width
);
3670 /* Remained operands count may be reduced twice per cycle
3671 until we have only one operand. */
3672 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3673 elog
= exact_log2 (rest
);
3677 res
+= floor_log2 (rest
) + 1;
3682 /* Returns an optimal number of registers to use for computation of
3683 given statements. */
3686 get_reassociation_width (int ops_num
, enum tree_code opc
,
3689 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3694 if (param_width
> 0)
3695 width
= param_width
;
3697 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3702 /* Get the minimal time required for sequence computation. */
3703 cycles_best
= get_required_cycles (ops_num
, width
);
3705 /* Check if we may use less width and still compute sequence for
3706 the same time. It will allow us to reduce registers usage.
3707 get_required_cycles is monotonically increasing with lower width
3708 so we can perform a binary search for the minimal width that still
3709 results in the optimal cycle count. */
3711 while (width
> width_min
)
3713 int width_mid
= (width
+ width_min
) / 2;
3715 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3717 else if (width_min
< width_mid
)
3718 width_min
= width_mid
;
3726 /* Recursively rewrite our linearized statements so that the operators
3727 match those in OPS[OPINDEX], putting the computation in rank
3728 order and trying to allow operations to be executed in
3732 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
3733 vec
<operand_entry_t
> ops
)
3735 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3736 int op_num
= ops
.length ();
3737 int stmt_num
= op_num
- 1;
3738 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3739 int op_index
= op_num
- 1;
3741 int ready_stmts_end
= 0;
3743 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3745 /* We start expression rewriting from the top statements.
3746 So, in this loop we create a full list of statements
3747 we will work with. */
3748 stmts
[stmt_num
- 1] = stmt
;
3749 for (i
= stmt_num
- 2; i
>= 0; i
--)
3750 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3752 for (i
= 0; i
< stmt_num
; i
++)
3756 /* Determine whether we should use results of
3757 already handled statements or not. */
3758 if (ready_stmts_end
== 0
3759 && (i
- stmt_index
>= width
|| op_index
< 1))
3760 ready_stmts_end
= i
;
3762 /* Now we choose operands for the next statement. Non zero
3763 value in ready_stmts_end means here that we should use
3764 the result of already generated statements as new operand. */
3765 if (ready_stmts_end
> 0)
3767 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3768 if (ready_stmts_end
> stmt_index
)
3769 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3770 else if (op_index
>= 0)
3771 op2
= ops
[op_index
--]->op
;
3774 gcc_assert (stmt_index
< i
);
3775 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3778 if (stmt_index
>= ready_stmts_end
)
3779 ready_stmts_end
= 0;
3784 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3785 op2
= ops
[op_index
--]->op
;
3786 op1
= ops
[op_index
--]->op
;
3789 /* If we emit the last statement then we should put
3790 operands into the last statement. It will also
3792 if (op_index
< 0 && stmt_index
== i
)
3795 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3797 fprintf (dump_file
, "Transforming ");
3798 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3801 /* We keep original statement only for the last one. All
3802 others are recreated. */
3803 if (i
== stmt_num
- 1)
3805 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3806 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3807 update_stmt (stmts
[i
]);
3810 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3812 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3814 fprintf (dump_file
, " into ");
3815 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3819 remove_visited_stmt_chain (last_rhs1
);
3822 /* Transform STMT, which is really (A +B) + (C + D) into the left
3823 linear form, ((A+B)+C)+D.
3824 Recurse on D if necessary. */
3827 linearize_expr (gimple stmt
)
3829 gimple_stmt_iterator gsi
;
3830 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3831 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3832 gimple oldbinrhs
= binrhs
;
3833 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3834 gimple newbinrhs
= NULL
;
3835 struct loop
*loop
= loop_containing_stmt (stmt
);
3836 tree lhs
= gimple_assign_lhs (stmt
);
3838 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3839 && is_reassociable_op (binrhs
, rhscode
, loop
));
3841 gsi
= gsi_for_stmt (stmt
);
3843 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3844 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
3845 gimple_assign_rhs_code (binrhs
),
3846 gimple_assign_lhs (binlhs
),
3847 gimple_assign_rhs2 (binrhs
));
3848 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3849 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3850 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3852 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3853 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3855 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3857 fprintf (dump_file
, "Linearized: ");
3858 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3861 reassociate_stats
.linearized
++;
3864 gsi
= gsi_for_stmt (oldbinrhs
);
3865 reassoc_remove_stmt (&gsi
);
3866 release_defs (oldbinrhs
);
3868 gimple_set_visited (stmt
, true);
3869 gimple_set_visited (binlhs
, true);
3870 gimple_set_visited (binrhs
, true);
3872 /* Tail recurse on the new rhs if it still needs reassociation. */
3873 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3874 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3875 want to change the algorithm while converting to tuples. */
3876 linearize_expr (stmt
);
3879 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3880 it. Otherwise, return NULL. */
3883 get_single_immediate_use (tree lhs
)
3885 use_operand_p immuse
;
3888 if (TREE_CODE (lhs
) == SSA_NAME
3889 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3890 && is_gimple_assign (immusestmt
))
3896 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3897 representing the negated value. Insertions of any necessary
3898 instructions go before GSI.
3899 This function is recursive in that, if you hand it "a_5" as the
3900 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3901 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3904 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3906 gimple negatedefstmt
= NULL
;
3907 tree resultofnegate
;
3908 gimple_stmt_iterator gsi
;
3911 /* If we are trying to negate a name, defined by an add, negate the
3912 add operands instead. */
3913 if (TREE_CODE (tonegate
) == SSA_NAME
)
3914 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3915 if (TREE_CODE (tonegate
) == SSA_NAME
3916 && is_gimple_assign (negatedefstmt
)
3917 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3918 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3919 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3921 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3922 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3923 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3926 gsi
= gsi_for_stmt (negatedefstmt
);
3927 rhs1
= negate_value (rhs1
, &gsi
);
3929 gsi
= gsi_for_stmt (negatedefstmt
);
3930 rhs2
= negate_value (rhs2
, &gsi
);
3932 gsi
= gsi_for_stmt (negatedefstmt
);
3933 lhs
= make_ssa_name (TREE_TYPE (lhs
));
3934 gimple_set_visited (negatedefstmt
, true);
3935 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
3936 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3937 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3941 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3942 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3943 NULL_TREE
, true, GSI_SAME_STMT
);
3945 uid
= gimple_uid (gsi_stmt (gsi
));
3946 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3948 gimple stmt
= gsi_stmt (gsi
);
3949 if (gimple_uid (stmt
) != 0)
3951 gimple_set_uid (stmt
, uid
);
3953 return resultofnegate
;
3956 /* Return true if we should break up the subtract in STMT into an add
3957 with negate. This is true when we the subtract operands are really
3958 adds, or the subtract itself is used in an add expression. In
3959 either case, breaking up the subtract into an add with negate
3960 exposes the adds to reassociation. */
3963 should_break_up_subtract (gimple stmt
)
3965 tree lhs
= gimple_assign_lhs (stmt
);
3966 tree binlhs
= gimple_assign_rhs1 (stmt
);
3967 tree binrhs
= gimple_assign_rhs2 (stmt
);
3969 struct loop
*loop
= loop_containing_stmt (stmt
);
3971 if (TREE_CODE (binlhs
) == SSA_NAME
3972 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3975 if (TREE_CODE (binrhs
) == SSA_NAME
3976 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3979 if (TREE_CODE (lhs
) == SSA_NAME
3980 && (immusestmt
= get_single_immediate_use (lhs
))
3981 && is_gimple_assign (immusestmt
)
3982 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3983 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3988 /* Transform STMT from A - B into A + -B. */
3991 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3993 tree rhs1
= gimple_assign_rhs1 (stmt
);
3994 tree rhs2
= gimple_assign_rhs2 (stmt
);
3996 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3998 fprintf (dump_file
, "Breaking up subtract ");
3999 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4002 rhs2
= negate_value (rhs2
, gsip
);
4003 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
4007 /* Determine whether STMT is a builtin call that raises an SSA name
4008 to an integer power and has only one use. If so, and this is early
4009 reassociation and unsafe math optimizations are permitted, place
4010 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4011 If any of these conditions does not hold, return FALSE. */
4014 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
4017 REAL_VALUE_TYPE c
, cint
;
4019 if (!first_pass_instance
4020 || !flag_unsafe_math_optimizations
4021 || !is_gimple_call (stmt
)
4022 || !has_single_use (gimple_call_lhs (stmt
)))
4025 fndecl
= gimple_call_fndecl (stmt
);
4028 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
4031 switch (DECL_FUNCTION_CODE (fndecl
))
4033 CASE_FLT_FN (BUILT_IN_POW
):
4034 if (flag_errno_math
)
4037 *base
= gimple_call_arg (stmt
, 0);
4038 arg1
= gimple_call_arg (stmt
, 1);
4040 if (TREE_CODE (arg1
) != REAL_CST
)
4043 c
= TREE_REAL_CST (arg1
);
4045 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
4048 *exponent
= real_to_integer (&c
);
4049 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
4050 if (!real_identical (&c
, &cint
))
4055 CASE_FLT_FN (BUILT_IN_POWI
):
4056 *base
= gimple_call_arg (stmt
, 0);
4057 arg1
= gimple_call_arg (stmt
, 1);
4059 if (!tree_fits_shwi_p (arg1
))
4062 *exponent
= tree_to_shwi (arg1
);
4069 /* Expanding negative exponents is generally unproductive, so we don't
4070 complicate matters with those. Exponents of zero and one should
4071 have been handled by expression folding. */
4072 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
4078 /* Recursively linearize a binary expression that is the RHS of STMT.
4079 Place the operands of the expression tree in the vector named OPS. */
4082 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
4083 bool is_associative
, bool set_visited
)
4085 tree binlhs
= gimple_assign_rhs1 (stmt
);
4086 tree binrhs
= gimple_assign_rhs2 (stmt
);
4087 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
4088 bool binlhsisreassoc
= false;
4089 bool binrhsisreassoc
= false;
4090 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
4091 struct loop
*loop
= loop_containing_stmt (stmt
);
4092 tree base
= NULL_TREE
;
4093 HOST_WIDE_INT exponent
= 0;
4096 gimple_set_visited (stmt
, true);
4098 if (TREE_CODE (binlhs
) == SSA_NAME
)
4100 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
4101 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
4102 && !stmt_could_throw_p (binlhsdef
));
4105 if (TREE_CODE (binrhs
) == SSA_NAME
)
4107 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
4108 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
4109 && !stmt_could_throw_p (binrhsdef
));
4112 /* If the LHS is not reassociable, but the RHS is, we need to swap
4113 them. If neither is reassociable, there is nothing we can do, so
4114 just put them in the ops vector. If the LHS is reassociable,
4115 linearize it. If both are reassociable, then linearize the RHS
4118 if (!binlhsisreassoc
)
4122 /* If this is not a associative operation like division, give up. */
4123 if (!is_associative
)
4125 add_to_ops_vec (ops
, binrhs
);
4129 if (!binrhsisreassoc
)
4131 if (rhscode
== MULT_EXPR
4132 && TREE_CODE (binrhs
) == SSA_NAME
4133 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
4135 add_repeat_to_ops_vec (ops
, base
, exponent
);
4136 gimple_set_visited (binrhsdef
, true);
4139 add_to_ops_vec (ops
, binrhs
);
4141 if (rhscode
== MULT_EXPR
4142 && TREE_CODE (binlhs
) == SSA_NAME
4143 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
4145 add_repeat_to_ops_vec (ops
, base
, exponent
);
4146 gimple_set_visited (binlhsdef
, true);
4149 add_to_ops_vec (ops
, binlhs
);
4154 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4156 fprintf (dump_file
, "swapping operands of ");
4157 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4160 swap_ssa_operands (stmt
,
4161 gimple_assign_rhs1_ptr (stmt
),
4162 gimple_assign_rhs2_ptr (stmt
));
4165 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4167 fprintf (dump_file
, " is now ");
4168 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4171 /* We want to make it so the lhs is always the reassociative op,
4177 else if (binrhsisreassoc
)
4179 linearize_expr (stmt
);
4180 binlhs
= gimple_assign_rhs1 (stmt
);
4181 binrhs
= gimple_assign_rhs2 (stmt
);
4184 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
4185 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
4187 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
4188 is_associative
, set_visited
);
4190 if (rhscode
== MULT_EXPR
4191 && TREE_CODE (binrhs
) == SSA_NAME
4192 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
4194 add_repeat_to_ops_vec (ops
, base
, exponent
);
4195 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
4198 add_to_ops_vec (ops
, binrhs
);
4201 /* Repropagate the negates back into subtracts, since no other pass
4202 currently does it. */
4205 repropagate_negates (void)
4210 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
4212 gimple user
= get_single_immediate_use (negate
);
4214 if (!user
|| !is_gimple_assign (user
))
4217 /* The negate operand can be either operand of a PLUS_EXPR
4218 (it can be the LHS if the RHS is a constant for example).
4220 Force the negate operand to the RHS of the PLUS_EXPR, then
4221 transform the PLUS_EXPR into a MINUS_EXPR. */
4222 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
4224 /* If the negated operand appears on the LHS of the
4225 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4226 to force the negated operand to the RHS of the PLUS_EXPR. */
4227 if (gimple_assign_rhs1 (user
) == negate
)
4229 swap_ssa_operands (user
,
4230 gimple_assign_rhs1_ptr (user
),
4231 gimple_assign_rhs2_ptr (user
));
4234 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4235 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4236 if (gimple_assign_rhs2 (user
) == negate
)
4238 tree rhs1
= gimple_assign_rhs1 (user
);
4239 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
4240 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4241 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
4245 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
4247 if (gimple_assign_rhs1 (user
) == negate
)
4252 which we transform into
4255 This pushes down the negate which we possibly can merge
4256 into some other operation, hence insert it into the
4257 plus_negates vector. */
4258 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4259 tree a
= gimple_assign_rhs1 (feed
);
4260 tree b
= gimple_assign_rhs2 (user
);
4261 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
4262 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
4263 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
4264 gimple g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
4265 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
4266 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
4267 user
= gsi_stmt (gsi2
);
4269 reassoc_remove_stmt (&gsi
);
4270 release_defs (feed
);
4271 plus_negates
.safe_push (gimple_assign_lhs (user
));
4275 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4276 rid of one operation. */
4277 gimple feed
= SSA_NAME_DEF_STMT (negate
);
4278 tree a
= gimple_assign_rhs1 (feed
);
4279 tree rhs1
= gimple_assign_rhs1 (user
);
4280 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
4281 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
4282 update_stmt (gsi_stmt (gsi
));
4288 /* Returns true if OP is of a type for which we can do reassociation.
4289 That is for integral or non-saturating fixed-point types, and for
4290 floating point type when associative-math is enabled. */
4293 can_reassociate_p (tree op
)
4295 tree type
= TREE_TYPE (op
);
4296 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
4297 || NON_SAT_FIXED_POINT_TYPE_P (type
)
4298 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
4303 /* Break up subtract operations in block BB.
4305 We do this top down because we don't know whether the subtract is
4306 part of a possible chain of reassociation except at the top.
4315 we want to break up k = t - q, but we won't until we've transformed q
4316 = b - r, which won't be broken up until we transform b = c - d.
4318 En passant, clear the GIMPLE visited flag on every statement
4319 and set UIDs within each basic block. */
4322 break_up_subtract_bb (basic_block bb
)
4324 gimple_stmt_iterator gsi
;
4326 unsigned int uid
= 1;
4328 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4330 gimple stmt
= gsi_stmt (gsi
);
4331 gimple_set_visited (stmt
, false);
4332 gimple_set_uid (stmt
, uid
++);
4334 if (!is_gimple_assign (stmt
)
4335 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
4338 /* Look for simple gimple subtract operations. */
4339 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
4341 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
4342 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
4345 /* Check for a subtract used only in an addition. If this
4346 is the case, transform it into add of a negate for better
4347 reassociation. IE transform C = A-B into C = A + -B if C
4348 is only used in an addition. */
4349 if (should_break_up_subtract (stmt
))
4350 break_up_subtract (stmt
, &gsi
);
4352 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
4353 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
4354 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
4356 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
4358 son
= next_dom_son (CDI_DOMINATORS
, son
))
4359 break_up_subtract_bb (son
);
4362 /* Used for repeated factor analysis. */
4363 struct repeat_factor_d
4365 /* An SSA name that occurs in a multiply chain. */
4368 /* Cached rank of the factor. */
4371 /* Number of occurrences of the factor in the chain. */
4372 HOST_WIDE_INT count
;
4374 /* An SSA name representing the product of this factor and
4375 all factors appearing later in the repeated factor vector. */
4379 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
4380 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
4383 static vec
<repeat_factor
> repeat_factor_vec
;
4385 /* Used for sorting the repeat factor vector. Sort primarily by
4386 ascending occurrence count, secondarily by descending rank. */
4389 compare_repeat_factors (const void *x1
, const void *x2
)
4391 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
4392 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
4394 if (rf1
->count
!= rf2
->count
)
4395 return rf1
->count
- rf2
->count
;
4397 return rf2
->rank
- rf1
->rank
;
4400 /* Look for repeated operands in OPS in the multiply tree rooted at
4401 STMT. Replace them with an optimal sequence of multiplies and powi
4402 builtin calls, and remove the used operands from OPS. Return an
4403 SSA name representing the value of the replacement sequence. */
4406 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
4408 unsigned i
, j
, vec_len
;
4411 repeat_factor_t rf1
, rf2
;
4412 repeat_factor rfnew
;
4413 tree result
= NULL_TREE
;
4414 tree target_ssa
, iter_result
;
4415 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4416 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4417 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4418 gimple mul_stmt
, pow_stmt
;
4420 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4425 /* Allocate the repeated factor vector. */
4426 repeat_factor_vec
.create (10);
4428 /* Scan the OPS vector for all SSA names in the product and build
4429 up a vector of occurrence counts for each factor. */
4430 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4432 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4434 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4436 if (rf1
->factor
== oe
->op
)
4438 rf1
->count
+= oe
->count
;
4443 if (j
>= repeat_factor_vec
.length ())
4445 rfnew
.factor
= oe
->op
;
4446 rfnew
.rank
= oe
->rank
;
4447 rfnew
.count
= oe
->count
;
4448 rfnew
.repr
= NULL_TREE
;
4449 repeat_factor_vec
.safe_push (rfnew
);
4454 /* Sort the repeated factor vector by (a) increasing occurrence count,
4455 and (b) decreasing rank. */
4456 repeat_factor_vec
.qsort (compare_repeat_factors
);
4458 /* It is generally best to combine as many base factors as possible
4459 into a product before applying __builtin_powi to the result.
4460 However, the sort order chosen for the repeated factor vector
4461 allows us to cache partial results for the product of the base
4462 factors for subsequent use. When we already have a cached partial
4463 result from a previous iteration, it is best to make use of it
4464 before looking for another __builtin_pow opportunity.
4466 As an example, consider x * x * y * y * y * z * z * z * z.
4467 We want to first compose the product x * y * z, raise it to the
4468 second power, then multiply this by y * z, and finally multiply
4469 by z. This can be done in 5 multiplies provided we cache y * z
4470 for use in both expressions:
4478 If we instead ignored the cached y * z and first multiplied by
4479 the __builtin_pow opportunity z * z, we would get the inferior:
4488 vec_len
= repeat_factor_vec
.length ();
4490 /* Repeatedly look for opportunities to create a builtin_powi call. */
4493 HOST_WIDE_INT power
;
4495 /* First look for the largest cached product of factors from
4496 preceding iterations. If found, create a builtin_powi for
4497 it if the minimum occurrence count for its factors is at
4498 least 2, or just use this cached product as our next
4499 multiplicand if the minimum occurrence count is 1. */
4500 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4502 if (rf1
->repr
&& rf1
->count
> 0)
4512 iter_result
= rf1
->repr
;
4514 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4518 fputs ("Multiplying by cached product ", dump_file
);
4519 for (elt
= j
; elt
< vec_len
; elt
++)
4521 rf
= &repeat_factor_vec
[elt
];
4522 print_generic_expr (dump_file
, rf
->factor
, 0);
4523 if (elt
< vec_len
- 1)
4524 fputs (" * ", dump_file
);
4526 fputs ("\n", dump_file
);
4531 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4532 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4533 build_int_cst (integer_type_node
,
4535 gimple_call_set_lhs (pow_stmt
, iter_result
);
4536 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4537 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4539 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4543 fputs ("Building __builtin_pow call for cached product (",
4545 for (elt
= j
; elt
< vec_len
; elt
++)
4547 rf
= &repeat_factor_vec
[elt
];
4548 print_generic_expr (dump_file
, rf
->factor
, 0);
4549 if (elt
< vec_len
- 1)
4550 fputs (" * ", dump_file
);
4552 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
4559 /* Otherwise, find the first factor in the repeated factor
4560 vector whose occurrence count is at least 2. If no such
4561 factor exists, there are no builtin_powi opportunities
4563 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4565 if (rf1
->count
>= 2)
4574 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4578 fputs ("Building __builtin_pow call for (", dump_file
);
4579 for (elt
= j
; elt
< vec_len
; elt
++)
4581 rf
= &repeat_factor_vec
[elt
];
4582 print_generic_expr (dump_file
, rf
->factor
, 0);
4583 if (elt
< vec_len
- 1)
4584 fputs (" * ", dump_file
);
4586 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4589 reassociate_stats
.pows_created
++;
4591 /* Visit each element of the vector in reverse order (so that
4592 high-occurrence elements are visited first, and within the
4593 same occurrence count, lower-ranked elements are visited
4594 first). Form a linear product of all elements in this order
4595 whose occurrencce count is at least that of element J.
4596 Record the SSA name representing the product of each element
4597 with all subsequent elements in the vector. */
4598 if (j
== vec_len
- 1)
4599 rf1
->repr
= rf1
->factor
;
4602 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4606 rf1
= &repeat_factor_vec
[ii
];
4607 rf2
= &repeat_factor_vec
[ii
+ 1];
4609 /* Init the last factor's representative to be itself. */
4611 rf2
->repr
= rf2
->factor
;
4616 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4617 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
4619 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4620 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4621 rf1
->repr
= target_ssa
;
4623 /* Don't reprocess the multiply we just introduced. */
4624 gimple_set_visited (mul_stmt
, true);
4628 /* Form a call to __builtin_powi for the maximum product
4629 just formed, raised to the power obtained earlier. */
4630 rf1
= &repeat_factor_vec
[j
];
4631 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4632 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4633 build_int_cst (integer_type_node
,
4635 gimple_call_set_lhs (pow_stmt
, iter_result
);
4636 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4637 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4640 /* If we previously formed at least one other builtin_powi call,
4641 form the product of this one and those others. */
4644 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4645 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
4646 result
, iter_result
);
4647 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4648 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4649 gimple_set_visited (mul_stmt
, true);
4650 result
= new_result
;
4653 result
= iter_result
;
4655 /* Decrement the occurrence count of each element in the product
4656 by the count found above, and remove this many copies of each
4658 for (i
= j
; i
< vec_len
; i
++)
4663 rf1
= &repeat_factor_vec
[i
];
4664 rf1
->count
-= power
;
4666 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4668 if (oe
->op
== rf1
->factor
)
4672 ops
->ordered_remove (n
);
4688 /* At this point all elements in the repeated factor vector have a
4689 remaining occurrence count of 0 or 1, and those with a count of 1
4690 don't have cached representatives. Re-sort the ops vector and
4692 ops
->qsort (sort_by_operand_rank
);
4693 repeat_factor_vec
.release ();
4695 /* Return the final product computed herein. Note that there may
4696 still be some elements with single occurrence count left in OPS;
4697 those will be handled by the normal reassociation logic. */
4701 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4704 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4708 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4710 fprintf (dump_file
, "Transforming ");
4711 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4714 rhs1
= gimple_assign_rhs1 (stmt
);
4715 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4717 remove_visited_stmt_chain (rhs1
);
4719 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4721 fprintf (dump_file
, " into ");
4722 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4726 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4729 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4730 tree rhs1
, tree rhs2
)
4732 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4734 fprintf (dump_file
, "Transforming ");
4735 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4738 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4739 update_stmt (gsi_stmt (*gsi
));
4740 remove_visited_stmt_chain (rhs1
);
4742 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4744 fprintf (dump_file
, " into ");
4745 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4749 /* Reassociate expressions in basic block BB and its post-dominator as
4753 reassociate_bb (basic_block bb
)
4755 gimple_stmt_iterator gsi
;
4757 gimple stmt
= last_stmt (bb
);
4759 if (stmt
&& !gimple_visited_p (stmt
))
4760 maybe_optimize_range_tests (stmt
);
4762 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4764 stmt
= gsi_stmt (gsi
);
4766 if (is_gimple_assign (stmt
)
4767 && !stmt_could_throw_p (stmt
))
4769 tree lhs
, rhs1
, rhs2
;
4770 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4772 /* If this is not a gimple binary expression, there is
4773 nothing for us to do with it. */
4774 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4777 /* If this was part of an already processed statement,
4778 we don't need to touch it again. */
4779 if (gimple_visited_p (stmt
))
4781 /* This statement might have become dead because of previous
4783 if (has_zero_uses (gimple_get_lhs (stmt
)))
4785 reassoc_remove_stmt (&gsi
);
4786 release_defs (stmt
);
4787 /* We might end up removing the last stmt above which
4788 places the iterator to the end of the sequence.
4789 Reset it to the last stmt in this case which might
4790 be the end of the sequence as well if we removed
4791 the last statement of the sequence. In which case
4792 we need to bail out. */
4793 if (gsi_end_p (gsi
))
4795 gsi
= gsi_last_bb (bb
);
4796 if (gsi_end_p (gsi
))
4803 lhs
= gimple_assign_lhs (stmt
);
4804 rhs1
= gimple_assign_rhs1 (stmt
);
4805 rhs2
= gimple_assign_rhs2 (stmt
);
4807 /* For non-bit or min/max operations we can't associate
4808 all types. Verify that here. */
4809 if (rhs_code
!= BIT_IOR_EXPR
4810 && rhs_code
!= BIT_AND_EXPR
4811 && rhs_code
!= BIT_XOR_EXPR
4812 && rhs_code
!= MIN_EXPR
4813 && rhs_code
!= MAX_EXPR
4814 && (!can_reassociate_p (lhs
)
4815 || !can_reassociate_p (rhs1
)
4816 || !can_reassociate_p (rhs2
)))
4819 if (associative_tree_code (rhs_code
))
4821 auto_vec
<operand_entry_t
> ops
;
4822 tree powi_result
= NULL_TREE
;
4824 /* There may be no immediate uses left by the time we
4825 get here because we may have eliminated them all. */
4826 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4829 gimple_set_visited (stmt
, true);
4830 linearize_expr_tree (&ops
, stmt
, true, true);
4831 ops
.qsort (sort_by_operand_rank
);
4832 optimize_ops_list (rhs_code
, &ops
);
4833 if (undistribute_ops_list (rhs_code
, &ops
,
4834 loop_containing_stmt (stmt
)))
4836 ops
.qsort (sort_by_operand_rank
);
4837 optimize_ops_list (rhs_code
, &ops
);
4840 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4841 optimize_range_tests (rhs_code
, &ops
);
4843 if (first_pass_instance
4844 && rhs_code
== MULT_EXPR
4845 && flag_unsafe_math_optimizations
)
4846 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4848 /* If the operand vector is now empty, all operands were
4849 consumed by the __builtin_powi optimization. */
4850 if (ops
.length () == 0)
4851 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4852 else if (ops
.length () == 1)
4854 tree last_op
= ops
.last ()->op
;
4857 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4860 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4864 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4865 int ops_num
= ops
.length ();
4866 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4869 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4871 "Width = %d was chosen for reassociation\n", width
);
4874 && ops
.length () > 3)
4875 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
4879 /* When there are three operands left, we want
4880 to make sure the ones that get the double
4881 binary op are chosen wisely. */
4882 int len
= ops
.length ();
4884 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4886 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4887 powi_result
!= NULL
);
4890 /* If we combined some repeated factors into a
4891 __builtin_powi call, multiply that result by the
4892 reassociated operands. */
4895 gimple mul_stmt
, lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4896 tree type
= TREE_TYPE (lhs
);
4897 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4899 gimple_set_lhs (lhs_stmt
, target_ssa
);
4900 update_stmt (lhs_stmt
);
4902 target_ssa
= new_lhs
;
4903 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
4904 powi_result
, target_ssa
);
4905 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4906 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4912 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4914 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4915 reassociate_bb (son
);
4918 /* Add jumps around shifts for range tests turned into bit tests.
4919 For each SSA_NAME VAR we have code like:
4920 VAR = ...; // final stmt of range comparison
4921 // bit test here...;
4922 OTHERVAR = ...; // final stmt of the bit test sequence
4923 RES = VAR | OTHERVAR;
4924 Turn the above into:
4931 // bit test here...;
4934 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4942 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
4944 gimple def_stmt
= SSA_NAME_DEF_STMT (var
);
4947 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
4949 && is_gimple_assign (use_stmt
)
4950 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
4951 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
4953 basic_block cond_bb
= gimple_bb (def_stmt
);
4954 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
4955 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
4957 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
4958 gimple g
= gimple_build_cond (NE_EXPR
, var
,
4959 build_zero_cst (TREE_TYPE (var
)),
4960 NULL_TREE
, NULL_TREE
);
4961 location_t loc
= gimple_location (use_stmt
);
4962 gimple_set_location (g
, loc
);
4963 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
4965 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
4966 etrue
->probability
= REG_BR_PROB_BASE
/ 2;
4967 etrue
->count
= cond_bb
->count
/ 2;
4968 edge efalse
= find_edge (cond_bb
, then_bb
);
4969 efalse
->flags
= EDGE_FALSE_VALUE
;
4970 efalse
->probability
-= etrue
->probability
;
4971 efalse
->count
-= etrue
->count
;
4972 then_bb
->count
-= etrue
->count
;
4974 tree othervar
= NULL_TREE
;
4975 if (gimple_assign_rhs1 (use_stmt
) == var
)
4976 othervar
= gimple_assign_rhs2 (use_stmt
);
4977 else if (gimple_assign_rhs2 (use_stmt
) == var
)
4978 othervar
= gimple_assign_rhs1 (use_stmt
);
4981 tree lhs
= gimple_assign_lhs (use_stmt
);
4982 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
4983 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
4984 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
4985 gsi
= gsi_for_stmt (use_stmt
);
4986 gsi_remove (&gsi
, true);
4988 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
4989 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
4991 reassoc_branch_fixups
.release ();
4994 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4995 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4997 /* Dump the operand entry vector OPS to FILE. */
5000 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
5005 FOR_EACH_VEC_ELT (ops
, i
, oe
)
5007 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
5008 print_generic_expr (file
, oe
->op
, 0);
5012 /* Dump the operand entry vector OPS to STDERR. */
5015 debug_ops_vector (vec
<operand_entry_t
> ops
)
5017 dump_ops_vector (stderr
, ops
);
5023 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
5024 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
5027 /* Initialize the reassociation pass. */
5034 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
5036 /* Find the loops, so that we can prevent moving calculations in
5038 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
5040 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
5042 operand_entry_pool
= create_alloc_pool ("operand entry pool",
5043 sizeof (struct operand_entry
), 30);
5044 next_operand_entry_id
= 0;
5046 /* Reverse RPO (Reverse Post Order) will give us something where
5047 deeper loops come later. */
5048 pre_and_rev_post_order_compute (NULL
, bbs
, false);
5049 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
5050 operand_rank
= new hash_map
<tree
, long>;
5052 /* Give each default definition a distinct rank. This includes
5053 parameters and the static chain. Walk backwards over all
5054 SSA names so that we get proper rank ordering according
5055 to tree_swap_operands_p. */
5056 for (i
= num_ssa_names
- 1; i
> 0; --i
)
5058 tree name
= ssa_name (i
);
5059 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
5060 insert_operand_rank (name
, ++rank
);
5063 /* Set up rank for each BB */
5064 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
5065 bb_rank
[bbs
[i
]] = ++rank
<< 16;
5068 calculate_dominance_info (CDI_POST_DOMINATORS
);
5069 plus_negates
= vNULL
;
5072 /* Cleanup after the reassociation pass, and print stats if
5078 statistics_counter_event (cfun
, "Linearized",
5079 reassociate_stats
.linearized
);
5080 statistics_counter_event (cfun
, "Constants eliminated",
5081 reassociate_stats
.constants_eliminated
);
5082 statistics_counter_event (cfun
, "Ops eliminated",
5083 reassociate_stats
.ops_eliminated
);
5084 statistics_counter_event (cfun
, "Statements rewritten",
5085 reassociate_stats
.rewritten
);
5086 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
5087 reassociate_stats
.pows_encountered
);
5088 statistics_counter_event (cfun
, "Built-in powi calls created",
5089 reassociate_stats
.pows_created
);
5091 delete operand_rank
;
5092 free_alloc_pool (operand_entry_pool
);
5094 plus_negates
.release ();
5095 free_dominance_info (CDI_POST_DOMINATORS
);
5096 loop_optimizer_finalize ();
5099 /* Gate and execute functions for Reassociation. */
5102 execute_reassoc (void)
5107 repropagate_negates ();
5116 const pass_data pass_data_reassoc
=
5118 GIMPLE_PASS
, /* type */
5119 "reassoc", /* name */
5120 OPTGROUP_NONE
, /* optinfo_flags */
5121 TV_TREE_REASSOC
, /* tv_id */
5122 ( PROP_cfg
| PROP_ssa
), /* properties_required */
5123 0, /* properties_provided */
5124 0, /* properties_destroyed */
5125 0, /* todo_flags_start */
5126 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
5129 class pass_reassoc
: public gimple_opt_pass
5132 pass_reassoc (gcc::context
*ctxt
)
5133 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
5136 /* opt_pass methods: */
5137 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
5138 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
5139 virtual unsigned int execute (function
*) { return execute_reassoc (); }
5141 }; // class pass_reassoc
5146 make_pass_reassoc (gcc::context
*ctxt
)
5148 return new pass_reassoc (ctxt
);