1 /* Reassociation for trees.
2 Copyright (C) 2005-2019 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "gimple-fold.h"
43 #include "gimple-iterator.h"
44 #include "gimplify-me.h"
46 #include "tree-ssa-loop.h"
49 #include "langhooks.h"
54 #include "case-cfn-macros.h"
56 /* This is a simple global reassociation pass. It is, in part, based
57 on the LLVM pass of the same name (They do some things more/less
58 than we do, in different orders, etc).
60 It consists of five steps:
62 1. Breaking up subtract operations into addition + negate, where
63 it would promote the reassociation of adds.
65 2. Left linearization of the expression trees, so that (A+B)+(C+D)
66 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
67 During linearization, we place the operands of the binary
68 expressions into a vector of operand_entry_*
70 3. Optimization of the operand lists, eliminating things like a +
73 3a. Combine repeated factors with the same occurrence counts
74 into a __builtin_powi call that will later be optimized into
75 an optimal number of multiplies.
77 4. Rewrite the expression trees we linearized and optimized so
78 they are in proper rank order.
80 5. Repropagate negates, as nothing else will clean it up ATM.
82 A bit of theory on #4, since nobody seems to write anything down
83 about why it makes sense to do it the way they do it:
85 We could do this much nicer theoretically, but don't (for reasons
86 explained after how to do it theoretically nice :P).
88 In order to promote the most redundancy elimination, you want
89 binary expressions whose operands are the same rank (or
90 preferably, the same value) exposed to the redundancy eliminator,
91 for possible elimination.
93 So the way to do this if we really cared, is to build the new op
94 tree from the leaves to the roots, merging as you go, and putting the
95 new op on the end of the worklist, until you are left with one
96 thing on the worklist.
98 IE if you have to rewrite the following set of operands (listed with
99 rank in parentheses), with opcode PLUS_EXPR:
101 a (1), b (1), c (1), d (2), e (2)
104 We start with our merge worklist empty, and the ops list with all of
107 You want to first merge all leaves of the same rank, as much as
110 So first build a binary op of
112 mergetmp = a + b, and put "mergetmp" on the merge worklist.
114 Because there is no three operand form of PLUS_EXPR, c is not going to
115 be exposed to redundancy elimination as a rank 1 operand.
117 So you might as well throw it on the merge worklist (you could also
118 consider it to now be a rank two operand, and merge it with d and e,
119 but in this case, you then have evicted e from a binary op. So at
120 least in this situation, you can't win.)
122 Then build a binary op of d + e
125 and put mergetmp2 on the merge worklist.
127 so merge worklist = {mergetmp, c, mergetmp2}
129 Continue building binary ops of these operations until you have only
130 one operation left on the worklist.
135 mergetmp3 = mergetmp + c
137 worklist = {mergetmp2, mergetmp3}
139 mergetmp4 = mergetmp2 + mergetmp3
141 worklist = {mergetmp4}
143 because we have one operation left, we can now just set the original
144 statement equal to the result of that operation.
146 This will at least expose a + b and d + e to redundancy elimination
147 as binary operations.
149 For extra points, you can reuse the old statements to build the
150 mergetmps, since you shouldn't run out.
152 So why don't we do this?
154 Because it's expensive, and rarely will help. Most trees we are
155 reassociating have 3 or less ops. If they have 2 ops, they already
156 will be written into a nice single binary op. If you have 3 ops, a
157 single simple check suffices to tell you whether the first two are of the
158 same rank. If so, you know to order it
161 newstmt = mergetmp + op3
165 newstmt = mergetmp + op1
167 If all three are of the same rank, you can't expose them all in a
168 single binary operator anyway, so the above is *still* the best you
171 Thus, this is what we do. When we have three ops left, we check to see
172 what order to put them in, and call it a day. As a nod to vector sum
173 reduction, we check if any of the ops are really a phi node that is a
174 destructive update for the associating op, and keep the destructive
175 update together for vector sum reduction recognition. */
177 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
178 point 3a in the pass header comment. */
179 static bool reassoc_insert_powi_p
;
185 int constants_eliminated
;
188 int pows_encountered
;
192 /* Operator, rank pair. */
199 gimple
*stmt_to_insert
;
202 static object_allocator
<operand_entry
> operand_entry_pool
203 ("operand entry pool");
205 /* This is used to assign a unique ID to each struct operand_entry
206 so that qsort results are identical on different hosts. */
207 static unsigned int next_operand_entry_id
;
209 /* Starting rank number for a given basic block, so that we can rank
210 operations using unmovable instructions in that BB based on the bb
212 static long *bb_rank
;
214 /* Operand->rank hashtable. */
215 static hash_map
<tree
, long> *operand_rank
;
217 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
218 all basic blocks the CFG should be adjusted - basic blocks
219 split right after that SSA_NAME's definition statement and before
220 the only use, which must be a bit ior. */
221 static vec
<tree
> reassoc_branch_fixups
;
224 static long get_rank (tree
);
225 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
227 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
228 possibly added by gsi_remove. */
231 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
233 gimple
*stmt
= gsi_stmt (*gsi
);
235 if (!MAY_HAVE_DEBUG_BIND_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
236 return gsi_remove (gsi
, true);
238 gimple_stmt_iterator prev
= *gsi
;
240 unsigned uid
= gimple_uid (stmt
);
241 basic_block bb
= gimple_bb (stmt
);
242 bool ret
= gsi_remove (gsi
, true);
243 if (!gsi_end_p (prev
))
246 prev
= gsi_start_bb (bb
);
247 gimple
*end_stmt
= gsi_stmt (*gsi
);
248 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
250 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
251 gimple_set_uid (stmt
, uid
);
257 /* Bias amount for loop-carried phis. We want this to be larger than
258 the depth of any reassociation tree we can see, but not larger than
259 the rank difference between two blocks. */
260 #define PHI_LOOP_BIAS (1 << 15)
262 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
263 an innermost loop, and the phi has only a single use which is inside
264 the loop, then the rank is the block rank of the loop latch plus an
265 extra bias for the loop-carried dependence. This causes expressions
266 calculated into an accumulator variable to be independent for each
267 iteration of the loop. If STMT is some other phi, the rank is the
268 block rank of its containing block. */
270 phi_rank (gimple
*stmt
)
272 basic_block bb
= gimple_bb (stmt
);
273 class loop
*father
= bb
->loop_father
;
279 /* We only care about real loops (those with a latch). */
281 return bb_rank
[bb
->index
];
283 /* Interesting phis must be in headers of innermost loops. */
284 if (bb
!= father
->header
286 return bb_rank
[bb
->index
];
288 /* Ignore virtual SSA_NAMEs. */
289 res
= gimple_phi_result (stmt
);
290 if (virtual_operand_p (res
))
291 return bb_rank
[bb
->index
];
293 /* The phi definition must have a single use, and that use must be
294 within the loop. Otherwise this isn't an accumulator pattern. */
295 if (!single_imm_use (res
, &use
, &use_stmt
)
296 || gimple_bb (use_stmt
)->loop_father
!= father
)
297 return bb_rank
[bb
->index
];
299 /* Look for phi arguments from within the loop. If found, bias this phi. */
300 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
302 tree arg
= gimple_phi_arg_def (stmt
, i
);
303 if (TREE_CODE (arg
) == SSA_NAME
304 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
306 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
307 if (gimple_bb (def_stmt
)->loop_father
== father
)
308 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
312 /* Must be an uninteresting phi. */
313 return bb_rank
[bb
->index
];
316 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
317 loop-carried dependence of an innermost loop, return TRUE; else
320 loop_carried_phi (tree exp
)
325 if (TREE_CODE (exp
) != SSA_NAME
326 || SSA_NAME_IS_DEFAULT_DEF (exp
))
329 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
331 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
334 /* Non-loop-carried phis have block rank. Loop-carried phis have
335 an additional bias added in. If this phi doesn't have block rank,
336 it's biased and should not be propagated. */
337 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
339 if (phi_rank (phi_stmt
) != block_rank
)
345 /* Return the maximum of RANK and the rank that should be propagated
346 from expression OP. For most operands, this is just the rank of OP.
347 For loop-carried phis, the value is zero to avoid undoing the bias
348 in favor of the phi. */
350 propagate_rank (long rank
, tree op
)
354 if (loop_carried_phi (op
))
357 op_rank
= get_rank (op
);
359 return MAX (rank
, op_rank
);
362 /* Look up the operand rank structure for expression E. */
365 find_operand_rank (tree e
)
367 long *slot
= operand_rank
->get (e
);
368 return slot
? *slot
: -1;
371 /* Insert {E,RANK} into the operand rank hashtable. */
374 insert_operand_rank (tree e
, long rank
)
376 gcc_assert (rank
> 0);
377 gcc_assert (!operand_rank
->put (e
, rank
));
380 /* Given an expression E, return the rank of the expression. */
385 /* SSA_NAME's have the rank of the expression they are the result
387 For globals and uninitialized values, the rank is 0.
388 For function arguments, use the pre-setup rank.
389 For PHI nodes, stores, asm statements, etc, we use the rank of
391 For simple operations, the rank is the maximum rank of any of
392 its operands, or the bb_rank, whichever is less.
393 I make no claims that this is optimal, however, it gives good
396 /* We make an exception to the normal ranking system to break
397 dependences of accumulator variables in loops. Suppose we
398 have a simple one-block loop containing:
405 As shown, each iteration of the calculation into x is fully
406 dependent upon the iteration before it. We would prefer to
407 see this in the form:
414 If the loop is unrolled, the calculations of b and c from
415 different iterations can be interleaved.
417 To obtain this result during reassociation, we bias the rank
418 of the phi definition x_1 upward, when it is recognized as an
419 accumulator pattern. The artificial rank causes it to be
420 added last, providing the desired independence. */
422 if (TREE_CODE (e
) == SSA_NAME
)
429 if (SSA_NAME_IS_DEFAULT_DEF (e
))
430 return find_operand_rank (e
);
432 stmt
= SSA_NAME_DEF_STMT (e
);
433 if (gimple_code (stmt
) == GIMPLE_PHI
)
434 return phi_rank (stmt
);
436 if (!is_gimple_assign (stmt
))
437 return bb_rank
[gimple_bb (stmt
)->index
];
439 /* If we already have a rank for this expression, use that. */
440 rank
= find_operand_rank (e
);
444 /* Otherwise, find the maximum rank for the operands. As an
445 exception, remove the bias from loop-carried phis when propagating
446 the rank so that dependent operations are not also biased. */
447 /* Simply walk over all SSA uses - this takes advatage of the
448 fact that non-SSA operands are is_gimple_min_invariant and
451 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
452 rank
= propagate_rank (rank
, op
);
454 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
456 fprintf (dump_file
, "Rank for ");
457 print_generic_expr (dump_file
, e
);
458 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
461 /* Note the rank in the hashtable so we don't recompute it. */
462 insert_operand_rank (e
, (rank
+ 1));
466 /* Constants, globals, etc., are rank 0 */
471 /* We want integer ones to end up last no matter what, since they are
472 the ones we can do the most with. */
473 #define INTEGER_CONST_TYPE 1 << 4
474 #define FLOAT_ONE_CONST_TYPE 1 << 3
475 #define FLOAT_CONST_TYPE 1 << 2
476 #define OTHER_CONST_TYPE 1 << 1
478 /* Classify an invariant tree into integer, float, or other, so that
479 we can sort them to be near other constants of the same type. */
481 constant_type (tree t
)
483 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
484 return INTEGER_CONST_TYPE
;
485 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
487 /* Sort -1.0 and 1.0 constants last, while in some cases
488 const_binop can't optimize some inexact operations, multiplication
489 by -1.0 or 1.0 can be always merged with others. */
490 if (real_onep (t
) || real_minus_onep (t
))
491 return FLOAT_ONE_CONST_TYPE
;
492 return FLOAT_CONST_TYPE
;
495 return OTHER_CONST_TYPE
;
498 /* qsort comparison function to sort operand entries PA and PB by rank
499 so that the sorted array is ordered by rank in decreasing order. */
501 sort_by_operand_rank (const void *pa
, const void *pb
)
503 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
504 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
506 if (oeb
->rank
!= oea
->rank
)
507 return oeb
->rank
> oea
->rank
? 1 : -1;
509 /* It's nicer for optimize_expression if constants that are likely
510 to fold when added/multiplied/whatever are put next to each
511 other. Since all constants have rank 0, order them by type. */
514 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
515 return constant_type (oea
->op
) - constant_type (oeb
->op
);
517 /* To make sorting result stable, we use unique IDs to determine
519 return oeb
->id
> oea
->id
? 1 : -1;
522 if (TREE_CODE (oea
->op
) != SSA_NAME
)
524 if (TREE_CODE (oeb
->op
) != SSA_NAME
)
525 return oeb
->id
> oea
->id
? 1 : -1;
529 else if (TREE_CODE (oeb
->op
) != SSA_NAME
)
532 /* Lastly, make sure the versions that are the same go next to each
534 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
536 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
537 versions of removed SSA_NAMEs, so if possible, prefer to sort
538 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
540 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
541 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
542 basic_block bba
= gimple_bb (stmta
);
543 basic_block bbb
= gimple_bb (stmtb
);
546 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
547 but the other might not. */
552 /* If neither is, compare bb_rank. */
553 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
554 return (bb_rank
[bbb
->index
] >> 16) - (bb_rank
[bba
->index
] >> 16);
557 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
558 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
562 return SSA_NAME_VERSION (oeb
->op
) > SSA_NAME_VERSION (oea
->op
) ? 1 : -1;
565 return oeb
->id
> oea
->id
? 1 : -1;
568 /* Add an operand entry to *OPS for the tree operand OP. */
571 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
, gimple
*stmt_to_insert
= NULL
)
573 operand_entry
*oe
= operand_entry_pool
.allocate ();
576 oe
->rank
= get_rank (op
);
577 oe
->id
= next_operand_entry_id
++;
579 oe
->stmt_to_insert
= stmt_to_insert
;
583 /* Add an operand entry to *OPS for the tree operand OP with repeat
587 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
588 HOST_WIDE_INT repeat
)
590 operand_entry
*oe
= operand_entry_pool
.allocate ();
593 oe
->rank
= get_rank (op
);
594 oe
->id
= next_operand_entry_id
++;
596 oe
->stmt_to_insert
= NULL
;
599 reassociate_stats
.pows_encountered
++;
602 /* Return true if STMT is reassociable operation containing a binary
603 operation with tree code CODE, and is inside LOOP. */
606 is_reassociable_op (gimple
*stmt
, enum tree_code code
, class loop
*loop
)
608 basic_block bb
= gimple_bb (stmt
);
610 if (gimple_bb (stmt
) == NULL
)
613 if (!flow_bb_inside_loop_p (loop
, bb
))
616 if (is_gimple_assign (stmt
)
617 && gimple_assign_rhs_code (stmt
) == code
618 && has_single_use (gimple_assign_lhs (stmt
)))
620 tree rhs1
= gimple_assign_rhs1 (stmt
);
621 tree rhs2
= gimple_assign_rhs2 (stmt
);
622 if (TREE_CODE (rhs1
) == SSA_NAME
623 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1
))
626 && TREE_CODE (rhs2
) == SSA_NAME
627 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2
))
636 /* Return true if STMT is a nop-conversion. */
639 gimple_nop_conversion_p (gimple
*stmt
)
641 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
643 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
644 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
645 TREE_TYPE (gimple_assign_rhs1 (ass
))))
651 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
652 operand of the negate operation. Otherwise, return NULL. */
655 get_unary_op (tree name
, enum tree_code opcode
)
657 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
659 /* Look through nop conversions (sign changes). */
660 if (gimple_nop_conversion_p (stmt
)
661 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
662 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
664 if (!is_gimple_assign (stmt
))
667 if (gimple_assign_rhs_code (stmt
) == opcode
)
668 return gimple_assign_rhs1 (stmt
);
672 /* Return true if OP1 and OP2 have the same value if casted to either type. */
675 ops_equal_values_p (tree op1
, tree op2
)
681 if (TREE_CODE (op1
) == SSA_NAME
)
683 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
684 if (gimple_nop_conversion_p (stmt
))
686 op1
= gimple_assign_rhs1 (stmt
);
692 if (TREE_CODE (op2
) == SSA_NAME
)
694 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
695 if (gimple_nop_conversion_p (stmt
))
697 op2
= gimple_assign_rhs1 (stmt
);
708 /* If CURR and LAST are a pair of ops that OPCODE allows us to
709 eliminate through equivalences, do so, remove them from OPS, and
710 return true. Otherwise, return false. */
713 eliminate_duplicate_pair (enum tree_code opcode
,
714 vec
<operand_entry
*> *ops
,
721 /* If we have two of the same op, and the opcode is & |, min, or max,
722 we can eliminate one of them.
723 If we have two of the same op, and the opcode is ^, we can
724 eliminate both of them. */
726 if (last
&& last
->op
== curr
->op
)
734 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
736 fprintf (dump_file
, "Equivalence: ");
737 print_generic_expr (dump_file
, curr
->op
);
738 fprintf (dump_file
, " [&|minmax] ");
739 print_generic_expr (dump_file
, last
->op
);
740 fprintf (dump_file
, " -> ");
741 print_generic_stmt (dump_file
, last
->op
);
744 ops
->ordered_remove (i
);
745 reassociate_stats
.ops_eliminated
++;
750 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
752 fprintf (dump_file
, "Equivalence: ");
753 print_generic_expr (dump_file
, curr
->op
);
754 fprintf (dump_file
, " ^ ");
755 print_generic_expr (dump_file
, last
->op
);
756 fprintf (dump_file
, " -> nothing\n");
759 reassociate_stats
.ops_eliminated
+= 2;
761 if (ops
->length () == 2)
764 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
769 ops
->ordered_remove (i
-1);
770 ops
->ordered_remove (i
-1);
782 static vec
<tree
> plus_negates
;
784 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
785 expression, look in OPS for a corresponding positive operation to cancel
786 it out. If we find one, remove the other from OPS, replace
787 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
791 eliminate_plus_minus_pair (enum tree_code opcode
,
792 vec
<operand_entry
*> *ops
,
793 unsigned int currindex
,
801 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
804 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
805 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
806 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
809 /* Any non-negated version will have a rank that is one less than
810 the current rank. So once we hit those ranks, if we don't find
813 for (i
= currindex
+ 1;
814 ops
->iterate (i
, &oe
)
815 && oe
->rank
>= curr
->rank
- 1 ;
819 && ops_equal_values_p (oe
->op
, negateop
))
821 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
823 fprintf (dump_file
, "Equivalence: ");
824 print_generic_expr (dump_file
, negateop
);
825 fprintf (dump_file
, " + -");
826 print_generic_expr (dump_file
, oe
->op
);
827 fprintf (dump_file
, " -> 0\n");
830 ops
->ordered_remove (i
);
831 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
832 ops
->ordered_remove (currindex
);
833 reassociate_stats
.ops_eliminated
++;
838 && ops_equal_values_p (oe
->op
, notop
))
840 tree op_type
= TREE_TYPE (oe
->op
);
842 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
844 fprintf (dump_file
, "Equivalence: ");
845 print_generic_expr (dump_file
, notop
);
846 fprintf (dump_file
, " + ~");
847 print_generic_expr (dump_file
, oe
->op
);
848 fprintf (dump_file
, " -> -1\n");
851 ops
->ordered_remove (i
);
852 add_to_ops_vec (ops
, build_all_ones_cst (op_type
));
853 ops
->ordered_remove (currindex
);
854 reassociate_stats
.ops_eliminated
++;
860 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
861 save it for later inspection in repropagate_negates(). */
862 if (negateop
!= NULL_TREE
863 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
864 plus_negates
.safe_push (curr
->op
);
869 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
870 bitwise not expression, look in OPS for a corresponding operand to
871 cancel it out. If we find one, remove the other from OPS, replace
872 OPS[CURRINDEX] with 0, and return true. Otherwise, return
876 eliminate_not_pairs (enum tree_code opcode
,
877 vec
<operand_entry
*> *ops
,
878 unsigned int currindex
,
885 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
886 || TREE_CODE (curr
->op
) != SSA_NAME
)
889 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
890 if (notop
== NULL_TREE
)
893 /* Any non-not version will have a rank that is one less than
894 the current rank. So once we hit those ranks, if we don't find
897 for (i
= currindex
+ 1;
898 ops
->iterate (i
, &oe
)
899 && oe
->rank
>= curr
->rank
- 1;
904 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
906 fprintf (dump_file
, "Equivalence: ");
907 print_generic_expr (dump_file
, notop
);
908 if (opcode
== BIT_AND_EXPR
)
909 fprintf (dump_file
, " & ~");
910 else if (opcode
== BIT_IOR_EXPR
)
911 fprintf (dump_file
, " | ~");
912 print_generic_expr (dump_file
, oe
->op
);
913 if (opcode
== BIT_AND_EXPR
)
914 fprintf (dump_file
, " -> 0\n");
915 else if (opcode
== BIT_IOR_EXPR
)
916 fprintf (dump_file
, " -> -1\n");
919 if (opcode
== BIT_AND_EXPR
)
920 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
921 else if (opcode
== BIT_IOR_EXPR
)
922 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
924 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
926 ops
->quick_push (oe
);
934 /* Use constant value that may be present in OPS to try to eliminate
935 operands. Note that this function is only really used when we've
936 eliminated ops for other reasons, or merged constants. Across
937 single statements, fold already does all of this, plus more. There
938 is little point in duplicating logic, so I've only included the
939 identities that I could ever construct testcases to trigger. */
942 eliminate_using_constants (enum tree_code opcode
,
943 vec
<operand_entry
*> *ops
)
945 operand_entry
*oelast
= ops
->last ();
946 tree type
= TREE_TYPE (oelast
->op
);
948 if (oelast
->rank
== 0
949 && (ANY_INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
954 if (integer_zerop (oelast
->op
))
956 if (ops
->length () != 1)
958 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
959 fprintf (dump_file
, "Found & 0, removing all other ops\n");
961 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
964 ops
->quick_push (oelast
);
968 else if (integer_all_onesp (oelast
->op
))
970 if (ops
->length () != 1)
972 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
973 fprintf (dump_file
, "Found & -1, removing\n");
975 reassociate_stats
.ops_eliminated
++;
980 if (integer_all_onesp (oelast
->op
))
982 if (ops
->length () != 1)
984 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
985 fprintf (dump_file
, "Found | -1, removing all other ops\n");
987 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
990 ops
->quick_push (oelast
);
994 else if (integer_zerop (oelast
->op
))
996 if (ops
->length () != 1)
998 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
999 fprintf (dump_file
, "Found | 0, removing\n");
1001 reassociate_stats
.ops_eliminated
++;
1006 if (integer_zerop (oelast
->op
)
1007 || (FLOAT_TYPE_P (type
)
1008 && !HONOR_NANS (type
)
1009 && !HONOR_SIGNED_ZEROS (type
)
1010 && real_zerop (oelast
->op
)))
1012 if (ops
->length () != 1)
1014 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1015 fprintf (dump_file
, "Found * 0, removing all other ops\n");
1017 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1019 ops
->quick_push (oelast
);
1023 else if (integer_onep (oelast
->op
)
1024 || (FLOAT_TYPE_P (type
)
1025 && !HONOR_SNANS (type
)
1026 && real_onep (oelast
->op
)))
1028 if (ops
->length () != 1)
1030 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1031 fprintf (dump_file
, "Found * 1, removing\n");
1033 reassociate_stats
.ops_eliminated
++;
1041 if (integer_zerop (oelast
->op
)
1042 || (FLOAT_TYPE_P (type
)
1043 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1044 && fold_real_zero_addition_p (type
, oelast
->op
,
1045 opcode
== MINUS_EXPR
)))
1047 if (ops
->length () != 1)
1049 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1050 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1052 reassociate_stats
.ops_eliminated
++;
1064 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1067 /* Structure for tracking and counting operands. */
1071 enum tree_code oecode
;
1076 /* The heap for the oecount hashtable and the sorted list of operands. */
1077 static vec
<oecount
> cvec
;
1080 /* Oecount hashtable helpers. */
1082 struct oecount_hasher
: int_hash
<int, 0, 1>
1084 static inline hashval_t
hash (int);
1085 static inline bool equal (int, int);
1088 /* Hash function for oecount. */
1091 oecount_hasher::hash (int p
)
1093 const oecount
*c
= &cvec
[p
- 42];
1094 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1097 /* Comparison function for oecount. */
1100 oecount_hasher::equal (int p1
, int p2
)
1102 const oecount
*c1
= &cvec
[p1
- 42];
1103 const oecount
*c2
= &cvec
[p2
- 42];
1104 return c1
->oecode
== c2
->oecode
&& c1
->op
== c2
->op
;
1107 /* Comparison function for qsort sorting oecount elements by count. */
1110 oecount_cmp (const void *p1
, const void *p2
)
1112 const oecount
*c1
= (const oecount
*)p1
;
1113 const oecount
*c2
= (const oecount
*)p2
;
1114 if (c1
->cnt
!= c2
->cnt
)
1115 return c1
->cnt
> c2
->cnt
? 1 : -1;
1117 /* If counts are identical, use unique IDs to stabilize qsort. */
1118 return c1
->id
> c2
->id
? 1 : -1;
1121 /* Return TRUE iff STMT represents a builtin call that raises OP
1122 to some exponent. */
1125 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1127 if (!is_gimple_call (stmt
))
1130 switch (gimple_call_combined_fn (stmt
))
1134 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1141 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1142 in place and return the result. Assumes that stmt_is_power_of_op
1143 was previously called for STMT and returned TRUE. */
1145 static HOST_WIDE_INT
1146 decrement_power (gimple
*stmt
)
1148 REAL_VALUE_TYPE c
, cint
;
1149 HOST_WIDE_INT power
;
1152 switch (gimple_call_combined_fn (stmt
))
1155 arg1
= gimple_call_arg (stmt
, 1);
1156 c
= TREE_REAL_CST (arg1
);
1157 power
= real_to_integer (&c
) - 1;
1158 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1159 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1163 arg1
= gimple_call_arg (stmt
, 1);
1164 power
= TREE_INT_CST_LOW (arg1
) - 1;
1165 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1173 /* Replace SSA defined by STMT and replace all its uses with new
1174 SSA. Also return the new SSA. */
1177 make_new_ssa_for_def (gimple
*stmt
, enum tree_code opcode
, tree op
)
1181 imm_use_iterator iter
;
1182 tree new_lhs
, new_debug_lhs
= NULL_TREE
;
1183 tree lhs
= gimple_get_lhs (stmt
);
1185 new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
1186 gimple_set_lhs (stmt
, new_lhs
);
1188 /* Also need to update GIMPLE_DEBUGs. */
1189 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
1191 tree repl
= new_lhs
;
1192 if (is_gimple_debug (use_stmt
))
1194 if (new_debug_lhs
== NULL_TREE
)
1196 new_debug_lhs
= make_node (DEBUG_EXPR_DECL
);
1198 = gimple_build_debug_bind (new_debug_lhs
,
1199 build2 (opcode
, TREE_TYPE (lhs
),
1202 DECL_ARTIFICIAL (new_debug_lhs
) = 1;
1203 TREE_TYPE (new_debug_lhs
) = TREE_TYPE (lhs
);
1204 SET_DECL_MODE (new_debug_lhs
, TYPE_MODE (TREE_TYPE (lhs
)));
1205 gimple_set_uid (def_temp
, gimple_uid (stmt
));
1206 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1207 gsi_insert_after (&gsi
, def_temp
, GSI_SAME_STMT
);
1209 repl
= new_debug_lhs
;
1211 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1212 SET_USE (use
, repl
);
1213 update_stmt (use_stmt
);
1218 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1219 uses with new SSAs. Also do this for the stmt that defines DEF
1220 if *DEF is not OP. */
1223 make_new_ssa_for_all_defs (tree
*def
, enum tree_code opcode
, tree op
,
1224 vec
<gimple
*> &stmts_to_fix
)
1230 && TREE_CODE (*def
) == SSA_NAME
1231 && (stmt
= SSA_NAME_DEF_STMT (*def
))
1232 && gimple_code (stmt
) != GIMPLE_NOP
)
1233 *def
= make_new_ssa_for_def (stmt
, opcode
, op
);
1235 FOR_EACH_VEC_ELT (stmts_to_fix
, i
, stmt
)
1236 make_new_ssa_for_def (stmt
, opcode
, op
);
1239 /* Find the single immediate use of STMT's LHS, and replace it
1240 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1241 replace *DEF with OP as well. */
1244 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1249 gimple_stmt_iterator gsi
;
1251 if (is_gimple_call (stmt
))
1252 lhs
= gimple_call_lhs (stmt
);
1254 lhs
= gimple_assign_lhs (stmt
);
1256 gcc_assert (has_single_use (lhs
));
1257 single_imm_use (lhs
, &use
, &use_stmt
);
1261 if (TREE_CODE (op
) != SSA_NAME
)
1262 update_stmt (use_stmt
);
1263 gsi
= gsi_for_stmt (stmt
);
1264 unlink_stmt_vdef (stmt
);
1265 reassoc_remove_stmt (&gsi
);
1266 release_defs (stmt
);
1269 /* Walks the linear chain with result *DEF searching for an operation
1270 with operand OP and code OPCODE removing that from the chain. *DEF
1271 is updated if there is only one operand but no operation left. */
1274 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1276 tree orig_def
= *def
;
1277 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1278 /* PR72835 - Record the stmt chain that has to be updated such that
1279 we dont use the same LHS when the values computed are different. */
1280 auto_vec
<gimple
*, 64> stmts_to_fix
;
1286 if (opcode
== MULT_EXPR
)
1288 if (stmt_is_power_of_op (stmt
, op
))
1290 if (decrement_power (stmt
) == 1)
1292 if (stmts_to_fix
.length () > 0)
1293 stmts_to_fix
.pop ();
1294 propagate_op_to_single_use (op
, stmt
, def
);
1298 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
)
1300 if (gimple_assign_rhs1 (stmt
) == op
)
1302 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1303 if (stmts_to_fix
.length () > 0)
1304 stmts_to_fix
.pop ();
1305 propagate_op_to_single_use (cst
, stmt
, def
);
1308 else if (integer_minus_onep (op
)
1309 || real_minus_onep (op
))
1311 gimple_assign_set_rhs_code
1312 (stmt
, TREE_CODE (gimple_assign_rhs1 (stmt
)));
1318 name
= gimple_assign_rhs1 (stmt
);
1320 /* If this is the operation we look for and one of the operands
1321 is ours simply propagate the other operand into the stmts
1323 if (gimple_assign_rhs_code (stmt
) == opcode
1325 || gimple_assign_rhs2 (stmt
) == op
))
1328 name
= gimple_assign_rhs2 (stmt
);
1329 if (stmts_to_fix
.length () > 0)
1330 stmts_to_fix
.pop ();
1331 propagate_op_to_single_use (name
, stmt
, def
);
1335 /* We might have a multiply of two __builtin_pow* calls, and
1336 the operand might be hiding in the rightmost one. Likewise
1337 this can happen for a negate. */
1338 if (opcode
== MULT_EXPR
1339 && gimple_assign_rhs_code (stmt
) == opcode
1340 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1341 && has_single_use (gimple_assign_rhs2 (stmt
)))
1343 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1344 if (stmt_is_power_of_op (stmt2
, op
))
1346 if (decrement_power (stmt2
) == 1)
1347 propagate_op_to_single_use (op
, stmt2
, def
);
1349 stmts_to_fix
.safe_push (stmt2
);
1352 else if (is_gimple_assign (stmt2
)
1353 && gimple_assign_rhs_code (stmt2
) == NEGATE_EXPR
)
1355 if (gimple_assign_rhs1 (stmt2
) == op
)
1357 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1358 propagate_op_to_single_use (cst
, stmt2
, def
);
1361 else if (integer_minus_onep (op
)
1362 || real_minus_onep (op
))
1364 stmts_to_fix
.safe_push (stmt2
);
1365 gimple_assign_set_rhs_code
1366 (stmt2
, TREE_CODE (gimple_assign_rhs1 (stmt2
)));
1372 /* Continue walking the chain. */
1373 gcc_assert (name
!= op
1374 && TREE_CODE (name
) == SSA_NAME
);
1375 stmt
= SSA_NAME_DEF_STMT (name
);
1376 stmts_to_fix
.safe_push (stmt
);
1380 if (stmts_to_fix
.length () > 0 || *def
== orig_def
)
1381 make_new_ssa_for_all_defs (def
, opcode
, op
, stmts_to_fix
);
1384 /* Returns true if statement S1 dominates statement S2. Like
1385 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1388 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1390 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1392 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1393 SSA_NAME. Assume it lives at the beginning of function and
1394 thus dominates everything. */
1395 if (!bb1
|| s1
== s2
)
1398 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1404 /* PHIs in the same basic block are assumed to be
1405 executed all in parallel, if only one stmt is a PHI,
1406 it dominates the other stmt in the same basic block. */
1407 if (gimple_code (s1
) == GIMPLE_PHI
)
1410 if (gimple_code (s2
) == GIMPLE_PHI
)
1413 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1415 if (gimple_uid (s1
) < gimple_uid (s2
))
1418 if (gimple_uid (s1
) > gimple_uid (s2
))
1421 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1422 unsigned int uid
= gimple_uid (s1
);
1423 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1425 gimple
*s
= gsi_stmt (gsi
);
1426 if (gimple_uid (s
) != uid
)
1435 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1438 /* Insert STMT after INSERT_POINT. */
1441 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1443 gimple_stmt_iterator gsi
;
1446 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1447 bb
= gimple_bb (insert_point
);
1448 else if (!stmt_ends_bb_p (insert_point
))
1450 gsi
= gsi_for_stmt (insert_point
);
1451 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1452 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1456 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1457 thus if it must end a basic block, it should be a call that can
1458 throw, or some assignment that can throw. If it throws, the LHS
1459 of it will not be initialized though, so only valid places using
1460 the SSA_NAME should be dominated by the fallthru edge. */
1461 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1462 gsi
= gsi_after_labels (bb
);
1463 if (gsi_end_p (gsi
))
1465 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1466 gimple_set_uid (stmt
,
1467 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1470 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1471 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1474 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1475 the result. Places the statement after the definition of either
1476 OP1 or OP2. Returns the new statement. */
1479 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1481 gimple
*op1def
= NULL
, *op2def
= NULL
;
1482 gimple_stmt_iterator gsi
;
1486 /* Create the addition statement. */
1487 op
= make_ssa_name (type
);
1488 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1490 /* Find an insertion place and insert. */
1491 if (TREE_CODE (op1
) == SSA_NAME
)
1492 op1def
= SSA_NAME_DEF_STMT (op1
);
1493 if (TREE_CODE (op2
) == SSA_NAME
)
1494 op2def
= SSA_NAME_DEF_STMT (op2
);
1495 if ((!op1def
|| gimple_nop_p (op1def
))
1496 && (!op2def
|| gimple_nop_p (op2def
)))
1498 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1499 if (gsi_end_p (gsi
))
1501 gimple_stmt_iterator gsi2
1502 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1503 gimple_set_uid (sum
,
1504 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1507 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1508 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1512 gimple
*insert_point
;
1513 if ((!op1def
|| gimple_nop_p (op1def
))
1514 || (op2def
&& !gimple_nop_p (op2def
)
1515 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1516 insert_point
= op2def
;
1518 insert_point
= op1def
;
1519 insert_stmt_after (sum
, insert_point
);
1526 /* Perform un-distribution of divisions and multiplications.
1527 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1528 to (A + B) / X for real X.
1530 The algorithm is organized as follows.
1532 - First we walk the addition chain *OPS looking for summands that
1533 are defined by a multiplication or a real division. This results
1534 in the candidates bitmap with relevant indices into *OPS.
1536 - Second we build the chains of multiplications or divisions for
1537 these candidates, counting the number of occurrences of (operand, code)
1538 pairs in all of the candidates chains.
1540 - Third we sort the (operand, code) pairs by number of occurrence and
1541 process them starting with the pair with the most uses.
1543 * For each such pair we walk the candidates again to build a
1544 second candidate bitmap noting all multiplication/division chains
1545 that have at least one occurrence of (operand, code).
1547 * We build an alternate addition chain only covering these
1548 candidates with one (operand, code) operation removed from their
1549 multiplication/division chain.
1551 * The first candidate gets replaced by the alternate addition chain
1552 multiplied/divided by the operand.
1554 * All candidate chains get disabled for further processing and
1555 processing of (operand, code) pairs continues.
1557 The alternate addition chains built are re-processed by the main
1558 reassociation algorithm which allows optimizing a * x * y + b * y * x
1559 to (a + b ) * x * y in one invocation of the reassociation pass. */
1562 undistribute_ops_list (enum tree_code opcode
,
1563 vec
<operand_entry
*> *ops
, class loop
*loop
)
1565 unsigned int length
= ops
->length ();
1568 unsigned nr_candidates
, nr_candidates2
;
1569 sbitmap_iterator sbi0
;
1570 vec
<operand_entry
*> *subops
;
1571 bool changed
= false;
1572 unsigned int next_oecount_id
= 0;
1575 || opcode
!= PLUS_EXPR
)
1578 /* Build a list of candidates to process. */
1579 auto_sbitmap
candidates (length
);
1580 bitmap_clear (candidates
);
1582 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1584 enum tree_code dcode
;
1587 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1589 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1590 if (!is_gimple_assign (oe1def
))
1592 dcode
= gimple_assign_rhs_code (oe1def
);
1593 if ((dcode
!= MULT_EXPR
1594 && dcode
!= RDIV_EXPR
)
1595 || !is_reassociable_op (oe1def
, dcode
, loop
))
1598 bitmap_set_bit (candidates
, i
);
1602 if (nr_candidates
< 2)
1605 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1607 fprintf (dump_file
, "searching for un-distribute opportunities ");
1608 print_generic_expr (dump_file
,
1609 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, TDF_NONE
);
1610 fprintf (dump_file
, " %d\n", nr_candidates
);
1613 /* Build linearized sub-operand lists and the counting table. */
1616 hash_table
<oecount_hasher
> ctable (15);
1618 /* ??? Macro arguments cannot have multi-argument template types in
1619 them. This typedef is needed to workaround that limitation. */
1620 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1621 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1622 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1625 enum tree_code oecode
;
1628 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1629 oecode
= gimple_assign_rhs_code (oedef
);
1630 linearize_expr_tree (&subops
[i
], oedef
,
1631 associative_tree_code (oecode
), false);
1633 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1640 c
.id
= next_oecount_id
++;
1643 idx
= cvec
.length () + 41;
1644 slot
= ctable
.find_slot (idx
, INSERT
);
1652 cvec
[*slot
- 42].cnt
++;
1657 /* Sort the counting table. */
1658 cvec
.qsort (oecount_cmp
);
1660 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1663 fprintf (dump_file
, "Candidates:\n");
1664 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1666 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1667 c
->oecode
== MULT_EXPR
1668 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1669 print_generic_expr (dump_file
, c
->op
);
1670 fprintf (dump_file
, "\n");
1674 /* Process the (operand, code) pairs in order of most occurrence. */
1675 auto_sbitmap
candidates2 (length
);
1676 while (!cvec
.is_empty ())
1678 oecount
*c
= &cvec
.last ();
1682 /* Now collect the operands in the outer chain that contain
1683 the common operand in their inner chain. */
1684 bitmap_clear (candidates2
);
1686 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1689 enum tree_code oecode
;
1691 tree op
= (*ops
)[i
]->op
;
1693 /* If we undistributed in this chain already this may be
1695 if (TREE_CODE (op
) != SSA_NAME
)
1698 oedef
= SSA_NAME_DEF_STMT (op
);
1699 oecode
= gimple_assign_rhs_code (oedef
);
1700 if (oecode
!= c
->oecode
)
1703 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1705 if (oe1
->op
== c
->op
)
1707 bitmap_set_bit (candidates2
, i
);
1714 if (nr_candidates2
>= 2)
1716 operand_entry
*oe1
, *oe2
;
1718 int first
= bitmap_first_set_bit (candidates2
);
1720 /* Build the new addition chain. */
1721 oe1
= (*ops
)[first
];
1722 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1724 fprintf (dump_file
, "Building (");
1725 print_generic_expr (dump_file
, oe1
->op
);
1727 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1728 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1732 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1734 fprintf (dump_file
, " + ");
1735 print_generic_expr (dump_file
, oe2
->op
);
1737 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1738 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1739 oe1
->op
, oe2
->op
, opcode
);
1740 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1742 oe1
->op
= gimple_get_lhs (sum
);
1745 /* Apply the multiplication/division. */
1746 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1747 oe1
->op
, c
->op
, c
->oecode
);
1748 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1750 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1751 print_generic_expr (dump_file
, c
->op
);
1752 fprintf (dump_file
, "\n");
1755 /* Record it in the addition chain and disable further
1756 undistribution with this op. */
1757 oe1
->op
= gimple_assign_lhs (prod
);
1758 oe1
->rank
= get_rank (oe1
->op
);
1759 subops
[first
].release ();
1767 for (i
= 0; i
< ops
->length (); ++i
)
1768 subops
[i
].release ();
1775 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1776 first: element index for each relevant BIT_FIELD_REF.
1777 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1778 typedef std::pair
<unsigned, unsigned> v_info_elem
;
1779 typedef auto_vec
<v_info_elem
, 32> v_info
;
1780 typedef v_info
*v_info_ptr
;
1782 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1784 sort_by_mach_mode (const void *p_i
, const void *p_j
)
1786 const tree tr1
= *((const tree
*) p_i
);
1787 const tree tr2
= *((const tree
*) p_j
);
1788 unsigned int mode1
= TYPE_MODE (TREE_TYPE (tr1
));
1789 unsigned int mode2
= TYPE_MODE (TREE_TYPE (tr2
));
1792 else if (mode1
< mode2
)
1798 /* Cleanup hash map for VECTOR information. */
1800 cleanup_vinfo_map (hash_map
<tree
, v_info_ptr
> &info_map
)
1802 for (hash_map
<tree
, v_info_ptr
>::iterator it
= info_map
.begin ();
1803 it
!= info_map
.end (); ++it
)
1805 v_info_ptr info
= (*it
).second
;
1807 (*it
).second
= NULL
;
1811 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1812 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1814 Vs = (V1 + V2 + ... + Vn)
1815 Vs[0] + Vs[1] + ... + Vs[k]
1817 The basic steps are listed below:
1819 1) Check the addition chain *OPS by looking those summands coming from
1820 VECTOR bit_field_ref on VECTOR type. Put the information into
1821 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1823 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1824 continuous, they can cover the whole VECTOR perfectly without any holes.
1825 Obtain one VECTOR list which contain candidates to be transformed.
1827 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1828 candidates with same mode, build the addition statements for them and
1829 generate BIT_FIELD_REFs accordingly.
1832 The current implementation requires the whole VECTORs should be fully
1833 covered, but it can be extended to support partial, checking adjacent
1834 but not fill the whole, it may need some cost model to define the
1835 boundary to do or not.
1838 undistribute_bitref_for_vector (enum tree_code opcode
,
1839 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1841 if (ops
->length () <= 1)
1844 if (opcode
!= PLUS_EXPR
&& opcode
!= MULT_EXPR
&& opcode
!= BIT_XOR_EXPR
1845 && opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1848 hash_map
<tree
, v_info_ptr
> v_info_map
;
1852 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1853 information into map. */
1854 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1856 enum tree_code dcode
;
1859 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1861 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1862 if (!is_gimple_assign (oe1def
))
1864 dcode
= gimple_assign_rhs_code (oe1def
);
1865 if (dcode
!= BIT_FIELD_REF
|| !is_reassociable_op (oe1def
, dcode
, loop
))
1868 tree rhs
= gimple_assign_rhs1 (oe1def
);
1869 tree vec
= TREE_OPERAND (rhs
, 0);
1870 tree vec_type
= TREE_TYPE (vec
);
1872 if (TREE_CODE (vec
) != SSA_NAME
|| !VECTOR_TYPE_P (vec_type
))
1875 /* Ignore it if target machine can't support this VECTOR type. */
1876 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1879 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1880 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
1883 tree elem_type
= TREE_TYPE (vec_type
);
1884 unsigned HOST_WIDE_INT elem_size
1885 = TREE_INT_CST_LOW (TYPE_SIZE (elem_type
));
1886 if (maybe_ne (bit_field_size (rhs
), elem_size
))
1890 if (!constant_multiple_p (bit_field_offset (rhs
), elem_size
, &idx
))
1893 /* Ignore it if target machine can't support this type of VECTOR
1895 optab op_tab
= optab_for_tree_code (opcode
, vec_type
, optab_vector
);
1896 if (optab_handler (op_tab
, TYPE_MODE (vec_type
)) == CODE_FOR_nothing
)
1900 v_info_ptr
&info
= v_info_map
.get_or_insert (vec
, &existed
);
1903 info
->safe_push (std::make_pair (idx
, i
));
1906 /* At least two VECTOR to combine. */
1907 if (v_info_map
.elements () <= 1)
1909 cleanup_vinfo_map (v_info_map
);
1913 /* Verify all VECTOR candidates by checking two conditions:
1914 1) sorted offsets are adjacent, no holes.
1915 2) can fill the whole VECTOR perfectly.
1916 And add the valid candidates to a vector for further handling. */
1917 auto_vec
<tree
> valid_vecs (v_info_map
.elements ());
1918 for (hash_map
<tree
, v_info_ptr
>::iterator it
= v_info_map
.begin ();
1919 it
!= v_info_map
.end (); ++it
)
1921 tree cand_vec
= (*it
).first
;
1922 v_info_ptr cand_info
= (*it
).second
;
1923 unsigned int num_elems
= VECTOR_CST_NELTS (cand_vec
).to_constant ();
1924 if (cand_info
->length () != num_elems
)
1926 sbitmap holes
= sbitmap_alloc (num_elems
);
1927 bitmap_ones (holes
);
1930 FOR_EACH_VEC_ELT (*cand_info
, i
, curr
)
1932 if (!bitmap_bit_p (holes
, curr
->first
))
1938 bitmap_clear_bit (holes
, curr
->first
);
1940 if (valid
&& bitmap_empty_p (holes
))
1941 valid_vecs
.quick_push (cand_vec
);
1942 sbitmap_free (holes
);
1945 /* At least two VECTOR to combine. */
1946 if (valid_vecs
.length () <= 1)
1948 cleanup_vinfo_map (v_info_map
);
1952 valid_vecs
.qsort (sort_by_mach_mode
);
1953 /* Go through all candidates by machine mode order, query the mode_to_total
1954 to get the total number for each mode and skip the single one. */
1955 for (unsigned i
= 0; i
< valid_vecs
.length () - 1; ++i
)
1957 tree tvec
= valid_vecs
[i
];
1958 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (tvec
));
1960 /* Skip modes with only a single candidate. */
1961 if (TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) != mode
)
1964 unsigned int idx
, j
;
1966 v_info_ptr info_ptr
;
1967 tree sum_vec
= tvec
;
1970 /* Build the sum for all candidates with same mode. */
1973 sum
= build_and_add_sum (TREE_TYPE (sum_vec
), sum_vec
,
1974 valid_vecs
[i
+ 1], opcode
);
1975 sum_vec
= gimple_get_lhs (sum
);
1976 info_ptr
= *(v_info_map
.get (valid_vecs
[i
+ 1]));
1977 /* Update those related ops of current candidate VECTOR. */
1978 FOR_EACH_VEC_ELT (*info_ptr
, j
, elem
)
1981 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
1982 /* Set this then op definition will get DCEd later. */
1983 gimple_set_visited (def
, true);
1984 if (opcode
== PLUS_EXPR
|| opcode
== BIT_XOR_EXPR
1985 || opcode
== BIT_IOR_EXPR
)
1986 (*ops
)[idx
]->op
= build_zero_cst (TREE_TYPE ((*ops
)[idx
]->op
));
1987 else if (opcode
== MULT_EXPR
)
1988 (*ops
)[idx
]->op
= build_one_cst (TREE_TYPE ((*ops
)[idx
]->op
));
1991 gcc_assert (opcode
== BIT_AND_EXPR
);
1993 = build_all_ones_cst (TREE_TYPE ((*ops
)[idx
]->op
));
1995 (*ops
)[idx
]->rank
= 0;
1997 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1999 fprintf (dump_file
, "Generating addition -> ");
2000 print_gimple_stmt (dump_file
, sum
, 0);
2004 while ((i
< valid_vecs
.length () - 1)
2005 && TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) == mode
);
2007 /* Referring to first valid VECTOR with this mode, generate the
2008 BIT_FIELD_REF statements accordingly. */
2009 info_ptr
= *(v_info_map
.get (tvec
));
2011 tree elem_type
= TREE_TYPE (TREE_TYPE (tvec
));
2012 FOR_EACH_VEC_ELT (*info_ptr
, j
, elem
)
2015 tree dst
= make_ssa_name (elem_type
);
2016 gimple
*gs
= gimple_build_assign (
2018 build3 (BIT_FIELD_REF
, elem_type
, sum_vec
, TYPE_SIZE (elem_type
),
2019 bitsize_int (elem
->first
2020 * tree_to_uhwi (TYPE_SIZE (elem_type
)))));
2021 insert_stmt_after (gs
, sum
);
2022 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2023 /* Set this then op definition will get DCEd later. */
2024 gimple_set_visited (def
, true);
2025 (*ops
)[idx
]->op
= gimple_assign_lhs (gs
);
2026 (*ops
)[idx
]->rank
= get_rank ((*ops
)[idx
]->op
);
2027 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2029 fprintf (dump_file
, "Generating bit_field_ref -> ");
2030 print_gimple_stmt (dump_file
, gs
, 0);
2035 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2036 fprintf (dump_file
, "undistributiong bit_field_ref for vector done.\n");
2038 cleanup_vinfo_map (v_info_map
);
2043 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2044 expression, examine the other OPS to see if any of them are comparisons
2045 of the same values, which we may be able to combine or eliminate.
2046 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2049 eliminate_redundant_comparison (enum tree_code opcode
,
2050 vec
<operand_entry
*> *ops
,
2051 unsigned int currindex
,
2052 operand_entry
*curr
)
2055 enum tree_code lcode
, rcode
;
2056 gimple
*def1
, *def2
;
2060 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
2063 /* Check that CURR is a comparison. */
2064 if (TREE_CODE (curr
->op
) != SSA_NAME
)
2066 def1
= SSA_NAME_DEF_STMT (curr
->op
);
2067 if (!is_gimple_assign (def1
))
2069 lcode
= gimple_assign_rhs_code (def1
);
2070 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
2072 op1
= gimple_assign_rhs1 (def1
);
2073 op2
= gimple_assign_rhs2 (def1
);
2075 /* Now look for a similar comparison in the remaining OPS. */
2076 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
2080 if (TREE_CODE (oe
->op
) != SSA_NAME
)
2082 def2
= SSA_NAME_DEF_STMT (oe
->op
);
2083 if (!is_gimple_assign (def2
))
2085 rcode
= gimple_assign_rhs_code (def2
);
2086 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
2089 /* If we got here, we have a match. See if we can combine the
2091 if (opcode
== BIT_IOR_EXPR
)
2092 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
2093 rcode
, gimple_assign_rhs1 (def2
),
2094 gimple_assign_rhs2 (def2
));
2096 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
2097 rcode
, gimple_assign_rhs1 (def2
),
2098 gimple_assign_rhs2 (def2
));
2102 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2103 always give us a boolean_type_node value back. If the original
2104 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2105 we need to convert. */
2106 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
2107 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
2109 if (TREE_CODE (t
) != INTEGER_CST
2110 && !operand_equal_p (t
, curr
->op
, 0))
2112 enum tree_code subcode
;
2113 tree newop1
, newop2
;
2114 if (!COMPARISON_CLASS_P (t
))
2116 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2117 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2118 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2119 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
2123 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2125 fprintf (dump_file
, "Equivalence: ");
2126 print_generic_expr (dump_file
, curr
->op
);
2127 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
2128 print_generic_expr (dump_file
, oe
->op
);
2129 fprintf (dump_file
, " -> ");
2130 print_generic_expr (dump_file
, t
);
2131 fprintf (dump_file
, "\n");
2134 /* Now we can delete oe, as it has been subsumed by the new combined
2136 ops
->ordered_remove (i
);
2137 reassociate_stats
.ops_eliminated
++;
2139 /* If t is the same as curr->op, we're done. Otherwise we must
2140 replace curr->op with t. Special case is if we got a constant
2141 back, in which case we add it to the end instead of in place of
2142 the current entry. */
2143 if (TREE_CODE (t
) == INTEGER_CST
)
2145 ops
->ordered_remove (currindex
);
2146 add_to_ops_vec (ops
, t
);
2148 else if (!operand_equal_p (t
, curr
->op
, 0))
2151 enum tree_code subcode
;
2154 gcc_assert (COMPARISON_CLASS_P (t
));
2155 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2156 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2157 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2158 gcc_checking_assert (is_gimple_val (newop1
)
2159 && is_gimple_val (newop2
));
2160 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
2161 curr
->op
= gimple_get_lhs (sum
);
2170 /* Transform repeated addition of same values into multiply with
2173 transform_add_to_multiply (vec
<operand_entry
*> *ops
)
2176 tree op
= NULL_TREE
;
2178 int i
, start
= -1, end
= 0, count
= 0;
2179 auto_vec
<std::pair
<int, int> > indxs
;
2180 bool changed
= false;
2182 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2183 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2184 || !flag_unsafe_math_optimizations
))
2187 /* Look for repeated operands. */
2188 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2196 else if (operand_equal_p (oe
->op
, op
, 0))
2204 indxs
.safe_push (std::make_pair (start
, end
));
2212 indxs
.safe_push (std::make_pair (start
, end
));
2214 for (j
= indxs
.length () - 1; j
>= 0; --j
)
2216 /* Convert repeated operand addition to multiplication. */
2217 start
= indxs
[j
].first
;
2218 end
= indxs
[j
].second
;
2219 op
= (*ops
)[start
]->op
;
2220 count
= end
- start
+ 1;
2221 for (i
= end
; i
>= start
; --i
)
2222 ops
->unordered_remove (i
);
2223 tree tmp
= make_ssa_name (TREE_TYPE (op
));
2224 tree cst
= build_int_cst (integer_type_node
, count
);
2226 = gimple_build_assign (tmp
, MULT_EXPR
,
2227 op
, fold_convert (TREE_TYPE (op
), cst
));
2228 gimple_set_visited (mul_stmt
, true);
2229 add_to_ops_vec (ops
, tmp
, mul_stmt
);
2237 /* Perform various identities and other optimizations on the list of
2238 operand entries, stored in OPS. The tree code for the binary
2239 operation between all the operands is OPCODE. */
2242 optimize_ops_list (enum tree_code opcode
,
2243 vec
<operand_entry
*> *ops
)
2245 unsigned int length
= ops
->length ();
2248 operand_entry
*oelast
= NULL
;
2249 bool iterate
= false;
2254 oelast
= ops
->last ();
2256 /* If the last two are constants, pop the constants off, merge them
2257 and try the next two. */
2258 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
2260 operand_entry
*oelm1
= (*ops
)[length
- 2];
2262 if (oelm1
->rank
== 0
2263 && is_gimple_min_invariant (oelm1
->op
)
2264 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
2265 TREE_TYPE (oelast
->op
)))
2267 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
2268 oelm1
->op
, oelast
->op
);
2270 if (folded
&& is_gimple_min_invariant (folded
))
2272 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2273 fprintf (dump_file
, "Merging constants\n");
2278 add_to_ops_vec (ops
, folded
);
2279 reassociate_stats
.constants_eliminated
++;
2281 optimize_ops_list (opcode
, ops
);
2287 eliminate_using_constants (opcode
, ops
);
2290 for (i
= 0; ops
->iterate (i
, &oe
);)
2294 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
2296 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
2297 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
2298 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
2311 optimize_ops_list (opcode
, ops
);
2314 /* The following functions are subroutines to optimize_range_tests and allow
2315 it to try to change a logical combination of comparisons into a range
2319 X == 2 || X == 5 || X == 3 || X == 4
2323 (unsigned) (X - 2) <= 3
2325 For more information see comments above fold_test_range in fold-const.c,
2326 this implementation is for GIMPLE. */
2334 bool strict_overflow_p
;
2335 unsigned int idx
, next
;
2338 /* This is similar to make_range in fold-const.c, but on top of
2339 GIMPLE instead of trees. If EXP is non-NULL, it should be
2340 an SSA_NAME and STMT argument is ignored, otherwise STMT
2341 argument should be a GIMPLE_COND. */
2344 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
2348 bool is_bool
, strict_overflow_p
;
2352 r
->strict_overflow_p
= false;
2354 r
->high
= NULL_TREE
;
2355 if (exp
!= NULL_TREE
2356 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
2359 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2360 and see if we can refine the range. Some of the cases below may not
2361 happen, but it doesn't seem worth worrying about this. We "continue"
2362 the outer loop when we've changed something; otherwise we "break"
2363 the switch, which will "break" the while. */
2364 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
2367 strict_overflow_p
= false;
2369 if (exp
== NULL_TREE
)
2371 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
2373 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
2378 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
2383 enum tree_code code
;
2384 tree arg0
, arg1
, exp_type
;
2388 if (exp
!= NULL_TREE
)
2390 if (TREE_CODE (exp
) != SSA_NAME
2391 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
2394 stmt
= SSA_NAME_DEF_STMT (exp
);
2395 if (!is_gimple_assign (stmt
))
2398 code
= gimple_assign_rhs_code (stmt
);
2399 arg0
= gimple_assign_rhs1 (stmt
);
2400 arg1
= gimple_assign_rhs2 (stmt
);
2401 exp_type
= TREE_TYPE (exp
);
2405 code
= gimple_cond_code (stmt
);
2406 arg0
= gimple_cond_lhs (stmt
);
2407 arg1
= gimple_cond_rhs (stmt
);
2408 exp_type
= boolean_type_node
;
2411 if (TREE_CODE (arg0
) != SSA_NAME
2412 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0
))
2414 loc
= gimple_location (stmt
);
2418 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
2419 /* Ensure the range is either +[-,0], +[0,0],
2420 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2421 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2422 or similar expression of unconditional true or
2423 false, it should not be negated. */
2424 && ((high
&& integer_zerop (high
))
2425 || (low
&& integer_onep (low
))))
2438 if ((TYPE_PRECISION (exp_type
) == 1
2439 || TREE_CODE (exp_type
) == BOOLEAN_TYPE
)
2440 && TYPE_PRECISION (TREE_TYPE (arg0
)) > 1)
2443 else if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
2445 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
2450 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2465 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2467 &strict_overflow_p
);
2468 if (nexp
!= NULL_TREE
)
2471 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2484 r
->strict_overflow_p
= strict_overflow_p
;
2488 /* Comparison function for qsort. Sort entries
2489 without SSA_NAME exp first, then with SSA_NAMEs sorted
2490 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2491 by increasing ->low and if ->low is the same, by increasing
2492 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2496 range_entry_cmp (const void *a
, const void *b
)
2498 const struct range_entry
*p
= (const struct range_entry
*) a
;
2499 const struct range_entry
*q
= (const struct range_entry
*) b
;
2501 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2503 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2505 /* Group range_entries for the same SSA_NAME together. */
2506 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2508 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2510 /* If ->low is different, NULL low goes first, then by
2512 if (p
->low
!= NULL_TREE
)
2514 if (q
->low
!= NULL_TREE
)
2516 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2518 if (tem
&& integer_onep (tem
))
2520 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2522 if (tem
&& integer_onep (tem
))
2528 else if (q
->low
!= NULL_TREE
)
2530 /* If ->high is different, NULL high goes last, before that by
2532 if (p
->high
!= NULL_TREE
)
2534 if (q
->high
!= NULL_TREE
)
2536 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2538 if (tem
&& integer_onep (tem
))
2540 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2542 if (tem
&& integer_onep (tem
))
2548 else if (q
->high
!= NULL_TREE
)
2550 /* If both ranges are the same, sort below by ascending idx. */
2555 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2558 if (p
->idx
< q
->idx
)
2562 gcc_checking_assert (p
->idx
> q
->idx
);
2567 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2568 insert needed statements BEFORE or after GSI. */
2571 force_into_ssa_name (gimple_stmt_iterator
*gsi
, tree expr
, bool before
)
2573 enum gsi_iterator_update m
= before
? GSI_SAME_STMT
: GSI_CONTINUE_LINKING
;
2574 tree ret
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
, before
, m
);
2575 if (TREE_CODE (ret
) != SSA_NAME
)
2577 gimple
*g
= gimple_build_assign (make_ssa_name (TREE_TYPE (ret
)), ret
);
2579 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2581 gsi_insert_after (gsi
, g
, GSI_CONTINUE_LINKING
);
2582 ret
= gimple_assign_lhs (g
);
2587 /* Helper routine of optimize_range_test.
2588 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2589 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2590 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2591 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2592 an array of COUNT pointers to other ranges. Return
2593 true if the range merge has been successful.
2594 If OPCODE is ERROR_MARK, this is called from within
2595 maybe_optimize_range_tests and is performing inter-bb range optimization.
2596 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2600 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2601 struct range_entry
**otherrangep
,
2602 unsigned int count
, enum tree_code opcode
,
2603 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2604 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2606 operand_entry
*oe
= (*ops
)[range
->idx
];
2608 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2609 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2610 location_t loc
= gimple_location (stmt
);
2611 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2612 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2614 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2615 gimple_stmt_iterator gsi
;
2616 unsigned int i
, uid
;
2618 if (tem
== NULL_TREE
)
2621 /* If op is default def SSA_NAME, there is no place to insert the
2622 new comparison. Give up, unless we can use OP itself as the
2624 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2626 if (op
== range
->exp
2627 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2628 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2630 || (TREE_CODE (tem
) == EQ_EXPR
2631 && TREE_OPERAND (tem
, 0) == op
2632 && integer_onep (TREE_OPERAND (tem
, 1))))
2633 && opcode
!= BIT_IOR_EXPR
2634 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2643 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2644 warning_at (loc
, OPT_Wstrict_overflow
,
2645 "assuming signed overflow does not occur "
2646 "when simplifying range test");
2648 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2650 struct range_entry
*r
;
2651 fprintf (dump_file
, "Optimizing range tests ");
2652 print_generic_expr (dump_file
, range
->exp
);
2653 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2654 print_generic_expr (dump_file
, range
->low
);
2655 fprintf (dump_file
, ", ");
2656 print_generic_expr (dump_file
, range
->high
);
2657 fprintf (dump_file
, "]");
2658 for (i
= 0; i
< count
; i
++)
2665 && r
->exp
!= range
->exp
2666 && TREE_CODE (r
->exp
) == SSA_NAME
)
2668 fprintf (dump_file
, " and ");
2669 print_generic_expr (dump_file
, r
->exp
);
2672 fprintf (dump_file
, " and");
2673 fprintf (dump_file
, " %c[", r
->in_p
? '+' : '-');
2674 print_generic_expr (dump_file
, r
->low
);
2675 fprintf (dump_file
, ", ");
2676 print_generic_expr (dump_file
, r
->high
);
2677 fprintf (dump_file
, "]");
2679 fprintf (dump_file
, "\n into ");
2680 print_generic_expr (dump_file
, tem
);
2681 fprintf (dump_file
, "\n");
2684 if (opcode
== BIT_IOR_EXPR
2685 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2686 tem
= invert_truthvalue_loc (loc
, tem
);
2688 tem
= fold_convert_loc (loc
, optype
, tem
);
2691 gsi
= gsi_for_stmt (stmt
);
2692 uid
= gimple_uid (stmt
);
2700 gcc_checking_assert (tem
== op
);
2701 /* In rare cases range->exp can be equal to lhs of stmt.
2702 In that case we have to insert after the stmt rather then before
2703 it. If stmt is a PHI, insert it at the start of the basic block. */
2704 else if (op
!= range
->exp
)
2706 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2707 tem
= force_into_ssa_name (&gsi
, tem
, true);
2710 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2712 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2713 tem
= force_into_ssa_name (&gsi
, tem
, false);
2717 gsi
= gsi_after_labels (gimple_bb (stmt
));
2718 if (!gsi_end_p (gsi
))
2719 uid
= gimple_uid (gsi_stmt (gsi
));
2722 gsi
= gsi_start_bb (gimple_bb (stmt
));
2724 while (!gsi_end_p (gsi
))
2726 uid
= gimple_uid (gsi_stmt (gsi
));
2730 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2731 tem
= force_into_ssa_name (&gsi
, tem
, true);
2732 if (gsi_end_p (gsi
))
2733 gsi
= gsi_last_bb (gimple_bb (stmt
));
2737 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2738 if (gimple_uid (gsi_stmt (gsi
)))
2741 gimple_set_uid (gsi_stmt (gsi
), uid
);
2748 range
->strict_overflow_p
= false;
2750 for (i
= 0; i
< count
; i
++)
2753 range
= otherrange
+ i
;
2755 range
= otherrangep
[i
];
2756 oe
= (*ops
)[range
->idx
];
2757 /* Now change all the other range test immediate uses, so that
2758 those tests will be optimized away. */
2759 if (opcode
== ERROR_MARK
)
2762 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2763 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2765 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2766 ? boolean_false_node
: boolean_true_node
);
2769 oe
->op
= error_mark_node
;
2770 range
->exp
= NULL_TREE
;
2771 range
->low
= NULL_TREE
;
2772 range
->high
= NULL_TREE
;
2777 /* Optimize X == CST1 || X == CST2
2778 if popcount (CST1 ^ CST2) == 1 into
2779 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2780 Similarly for ranges. E.g.
2781 X != 2 && X != 3 && X != 10 && X != 11
2782 will be transformed by the previous optimization into
2783 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2784 and this loop can transform that into
2785 !(((X & ~8) - 2U) <= 1U). */
2788 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2789 tree lowi
, tree lowj
, tree highi
, tree highj
,
2790 vec
<operand_entry
*> *ops
,
2791 struct range_entry
*rangei
,
2792 struct range_entry
*rangej
)
2794 tree lowxor
, highxor
, tem
, exp
;
2795 /* Check lowi ^ lowj == highi ^ highj and
2796 popcount (lowi ^ lowj) == 1. */
2797 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2798 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2800 if (!integer_pow2p (lowxor
))
2802 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2803 if (!tree_int_cst_equal (lowxor
, highxor
))
2807 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
2808 int prec
= GET_MODE_PRECISION (mode
);
2809 if (TYPE_PRECISION (type
) < prec
2810 || (wi::to_wide (TYPE_MIN_VALUE (type
))
2811 != wi::min_value (prec
, TYPE_SIGN (type
)))
2812 || (wi::to_wide (TYPE_MAX_VALUE (type
))
2813 != wi::max_value (prec
, TYPE_SIGN (type
))))
2815 type
= build_nonstandard_integer_type (prec
, TYPE_UNSIGNED (type
));
2816 exp
= fold_convert (type
, exp
);
2817 lowxor
= fold_convert (type
, lowxor
);
2818 lowi
= fold_convert (type
, lowi
);
2819 highi
= fold_convert (type
, highi
);
2821 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2822 exp
= fold_build2 (BIT_AND_EXPR
, type
, exp
, tem
);
2823 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2824 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2825 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
2826 NULL
, rangei
->in_p
, lowj
, highj
,
2827 rangei
->strict_overflow_p
2828 || rangej
->strict_overflow_p
))
2833 /* Optimize X == CST1 || X == CST2
2834 if popcount (CST2 - CST1) == 1 into
2835 ((X - CST1) & ~(CST2 - CST1)) == 0.
2836 Similarly for ranges. E.g.
2837 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2838 || X == 75 || X == 45
2839 will be transformed by the previous optimization into
2840 (X - 43U) <= 3U || (X - 75U) <= 3U
2841 and this loop can transform that into
2842 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2844 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2845 tree lowi
, tree lowj
, tree highi
, tree highj
,
2846 vec
<operand_entry
*> *ops
,
2847 struct range_entry
*rangei
,
2848 struct range_entry
*rangej
)
2850 tree tem1
, tem2
, mask
;
2851 /* Check highi - lowi == highj - lowj. */
2852 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2853 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2855 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2856 if (!tree_int_cst_equal (tem1
, tem2
))
2858 /* Check popcount (lowj - lowi) == 1. */
2859 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2860 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2862 if (!integer_pow2p (tem1
))
2865 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
2866 int prec
= GET_MODE_PRECISION (mode
);
2867 if (TYPE_PRECISION (type
) < prec
2868 || (wi::to_wide (TYPE_MIN_VALUE (type
))
2869 != wi::min_value (prec
, TYPE_SIGN (type
)))
2870 || (wi::to_wide (TYPE_MAX_VALUE (type
))
2871 != wi::max_value (prec
, TYPE_SIGN (type
))))
2872 type
= build_nonstandard_integer_type (prec
, 1);
2874 type
= unsigned_type_for (type
);
2875 tem1
= fold_convert (type
, tem1
);
2876 tem2
= fold_convert (type
, tem2
);
2877 lowi
= fold_convert (type
, lowi
);
2878 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2879 tem1
= fold_build2 (MINUS_EXPR
, type
,
2880 fold_convert (type
, rangei
->exp
), lowi
);
2881 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2882 lowj
= build_int_cst (type
, 0);
2883 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
2884 NULL
, rangei
->in_p
, lowj
, tem2
,
2885 rangei
->strict_overflow_p
2886 || rangej
->strict_overflow_p
))
2891 /* It does some common checks for function optimize_range_tests_xor and
2892 optimize_range_tests_diff.
2893 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2894 Else it calls optimize_range_tests_diff. */
2897 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2898 bool optimize_xor
, vec
<operand_entry
*> *ops
,
2899 struct range_entry
*ranges
)
2902 bool any_changes
= false;
2903 for (i
= first
; i
< length
; i
++)
2905 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2907 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2909 type
= TREE_TYPE (ranges
[i
].exp
);
2910 if (!INTEGRAL_TYPE_P (type
))
2912 lowi
= ranges
[i
].low
;
2913 if (lowi
== NULL_TREE
)
2914 lowi
= TYPE_MIN_VALUE (type
);
2915 highi
= ranges
[i
].high
;
2916 if (highi
== NULL_TREE
)
2918 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2921 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2923 lowj
= ranges
[j
].low
;
2924 if (lowj
== NULL_TREE
)
2926 highj
= ranges
[j
].high
;
2927 if (highj
== NULL_TREE
)
2928 highj
= TYPE_MAX_VALUE (type
);
2929 /* Check lowj > highi. */
2930 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2932 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2935 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2937 ranges
+ i
, ranges
+ j
);
2939 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2941 ranges
+ i
, ranges
+ j
);
2952 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2953 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2954 EXP on success, NULL otherwise. */
2957 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
2958 wide_int
*mask
, tree
*totallowp
)
2960 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
2961 if (tem
== NULL_TREE
2962 || TREE_CODE (tem
) != INTEGER_CST
2963 || TREE_OVERFLOW (tem
)
2964 || tree_int_cst_sgn (tem
) == -1
2965 || compare_tree_int (tem
, prec
) != -1)
2968 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
2969 *mask
= wi::shifted_mask (0, max
, false, prec
);
2970 if (TREE_CODE (exp
) == BIT_AND_EXPR
2971 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2973 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
2974 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
2975 if (wi::popcount (msk
) == 1
2976 && wi::ltu_p (msk
, prec
- max
))
2978 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
2979 max
+= msk
.to_uhwi ();
2980 exp
= TREE_OPERAND (exp
, 0);
2981 if (integer_zerop (low
)
2982 && TREE_CODE (exp
) == PLUS_EXPR
2983 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
2985 tree ret
= TREE_OPERAND (exp
, 0);
2988 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
2989 TYPE_PRECISION (TREE_TYPE (low
))));
2990 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
2996 else if (!tree_int_cst_lt (totallow
, tbias
))
2998 bias
= wi::to_widest (tbias
);
2999 bias
-= wi::to_widest (totallow
);
3000 if (bias
>= 0 && bias
< prec
- max
)
3002 *mask
= wi::lshift (*mask
, bias
);
3010 if (!tree_int_cst_lt (totallow
, low
))
3012 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
3013 if (tem
== NULL_TREE
3014 || TREE_CODE (tem
) != INTEGER_CST
3015 || TREE_OVERFLOW (tem
)
3016 || compare_tree_int (tem
, prec
- max
) == 1)
3019 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
3023 /* Attempt to optimize small range tests using bit test.
3025 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3026 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3027 has been by earlier optimizations optimized into:
3028 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3029 As all the 43 through 82 range is less than 64 numbers,
3030 for 64-bit word targets optimize that into:
3031 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3034 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
3035 vec
<operand_entry
*> *ops
,
3036 struct range_entry
*ranges
)
3039 bool any_changes
= false;
3040 int prec
= GET_MODE_BITSIZE (word_mode
);
3041 auto_vec
<struct range_entry
*, 64> candidates
;
3043 for (i
= first
; i
< length
- 2; i
++)
3045 tree lowi
, highi
, lowj
, highj
, type
;
3047 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3049 type
= TREE_TYPE (ranges
[i
].exp
);
3050 if (!INTEGRAL_TYPE_P (type
))
3052 lowi
= ranges
[i
].low
;
3053 if (lowi
== NULL_TREE
)
3054 lowi
= TYPE_MIN_VALUE (type
);
3055 highi
= ranges
[i
].high
;
3056 if (highi
== NULL_TREE
)
3059 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
3060 highi
, &mask
, &lowi
);
3061 if (exp
== NULL_TREE
)
3063 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3064 candidates
.truncate (0);
3065 int end
= MIN (i
+ 64, length
);
3066 for (j
= i
+ 1; j
< end
; j
++)
3069 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
3071 if (ranges
[j
].exp
== exp
)
3073 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
3075 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
3078 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
3080 exp2
= TREE_OPERAND (exp2
, 0);
3090 lowj
= ranges
[j
].low
;
3091 if (lowj
== NULL_TREE
)
3093 highj
= ranges
[j
].high
;
3094 if (highj
== NULL_TREE
)
3095 highj
= TYPE_MAX_VALUE (type
);
3097 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
3098 highj
, &mask2
, NULL
);
3102 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3103 candidates
.safe_push (&ranges
[j
]);
3106 /* If we need otherwise 3 or more comparisons, use a bit test. */
3107 if (candidates
.length () >= 2)
3109 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
3110 wi::to_widest (lowi
)
3111 + prec
- 1 - wi::clz (mask
));
3112 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3114 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
3115 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3116 location_t loc
= gimple_location (stmt
);
3117 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
3119 /* See if it isn't cheaper to pretend the minimum value of the
3120 range is 0, if maximum value is small enough.
3121 We can avoid then subtraction of the minimum value, but the
3122 mask constant could be perhaps more expensive. */
3123 if (compare_tree_int (lowi
, 0) > 0
3124 && compare_tree_int (high
, prec
) < 0)
3127 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
3128 rtx reg
= gen_raw_REG (word_mode
, 10000);
3129 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
3130 cost_diff
= set_rtx_cost (gen_rtx_PLUS (word_mode
, reg
,
3131 GEN_INT (-m
)), speed_p
);
3132 rtx r
= immed_wide_int_const (mask
, word_mode
);
3133 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3134 word_mode
, speed_p
);
3135 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
3136 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3137 word_mode
, speed_p
);
3140 mask
= wi::lshift (mask
, m
);
3141 lowi
= build_zero_cst (TREE_TYPE (lowi
));
3145 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
3147 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
3149 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
3150 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
3151 fold_convert_loc (loc
, etype
, exp
),
3152 fold_convert_loc (loc
, etype
, lowi
));
3153 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
3154 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
3155 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
3156 build_int_cst (word_type
, 1), exp
);
3157 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
3158 wide_int_to_tree (word_type
, mask
));
3159 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
3160 build_zero_cst (word_type
));
3161 if (is_gimple_val (exp
))
3164 /* The shift might have undefined behavior if TEM is true,
3165 but reassociate_bb isn't prepared to have basic blocks
3166 split when it is running. So, temporarily emit a code
3167 with BIT_IOR_EXPR instead of &&, and fix it up in
3170 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
3171 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
3172 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
3174 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
3175 gimple_seq_add_seq_without_update (&seq
, seq2
);
3176 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
3177 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
3178 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
3179 BIT_IOR_EXPR
, tem
, exp
);
3180 gimple_set_location (g
, loc
);
3181 gimple_seq_add_stmt_without_update (&seq
, g
);
3182 exp
= gimple_assign_lhs (g
);
3183 tree val
= build_zero_cst (optype
);
3184 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
3185 candidates
.length (), opcode
, ops
, exp
,
3186 seq
, false, val
, val
, strict_overflow_p
))
3189 reassoc_branch_fixups
.safe_push (tem
);
3192 gimple_seq_discard (seq
);
3198 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3199 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1. */
3202 optimize_range_tests_cmp_bitwise (enum tree_code opcode
, int first
, int length
,
3203 vec
<operand_entry
*> *ops
,
3204 struct range_entry
*ranges
)
3208 bool any_changes
= false;
3209 auto_vec
<int, 128> buckets
;
3210 auto_vec
<int, 32> chains
;
3211 auto_vec
<struct range_entry
*, 32> candidates
;
3213 for (i
= first
; i
< length
; i
++)
3215 if (ranges
[i
].exp
== NULL_TREE
3216 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3218 || TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) <= 1
3219 || TREE_CODE (TREE_TYPE (ranges
[i
].exp
)) == BOOLEAN_TYPE
3220 || ranges
[i
].low
== NULL_TREE
3221 || ranges
[i
].low
!= ranges
[i
].high
)
3224 bool zero_p
= integer_zerop (ranges
[i
].low
);
3225 if (!zero_p
&& !integer_all_onesp (ranges
[i
].low
))
3228 b
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) * 2 + !zero_p
;
3229 if (buckets
.length () <= b
)
3230 buckets
.safe_grow_cleared (b
+ 1);
3231 if (chains
.length () <= (unsigned) i
)
3232 chains
.safe_grow (i
+ 1);
3233 chains
[i
] = buckets
[b
];
3237 FOR_EACH_VEC_ELT (buckets
, b
, i
)
3238 if (i
&& chains
[i
- 1])
3241 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3243 gimple
*gk
= SSA_NAME_DEF_STMT (ranges
[k
- 1].exp
);
3244 gimple
*gj
= SSA_NAME_DEF_STMT (ranges
[j
- 1].exp
);
3245 if (reassoc_stmt_dominates_stmt_p (gk
, gj
))
3248 tree type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3249 tree type2
= NULL_TREE
;
3250 bool strict_overflow_p
= false;
3251 candidates
.truncate (0);
3252 for (j
= i
; j
; j
= chains
[j
- 1])
3254 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3255 strict_overflow_p
|= ranges
[j
- 1].strict_overflow_p
;
3257 || useless_type_conversion_p (type1
, type
))
3259 else if (type2
== NULL_TREE
3260 || useless_type_conversion_p (type2
, type
))
3262 if (type2
== NULL_TREE
)
3264 candidates
.safe_push (&ranges
[j
- 1]);
3267 unsigned l
= candidates
.length ();
3268 for (j
= i
; j
; j
= chains
[j
- 1])
3270 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3273 if (useless_type_conversion_p (type1
, type
))
3275 else if (type2
== NULL_TREE
3276 || useless_type_conversion_p (type2
, type
))
3278 candidates
.safe_push (&ranges
[j
- 1]);
3280 gimple_seq seq
= NULL
;
3281 tree op
= NULL_TREE
;
3283 struct range_entry
*r
;
3284 candidates
.safe_push (&ranges
[k
- 1]);
3285 FOR_EACH_VEC_ELT (candidates
, id
, r
)
3295 g
= gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, op
);
3296 gimple_seq_add_stmt_without_update (&seq
, g
);
3297 op
= gimple_assign_lhs (g
);
3299 tree type
= TREE_TYPE (r
->exp
);
3301 if (id
>= l
&& !useless_type_conversion_p (type1
, type
))
3303 g
= gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, exp
);
3304 gimple_seq_add_stmt_without_update (&seq
, g
);
3305 exp
= gimple_assign_lhs (g
);
3307 g
= gimple_build_assign (make_ssa_name (id
>= l
? type1
: type2
),
3308 (b
& 1) ? BIT_AND_EXPR
: BIT_IOR_EXPR
,
3310 gimple_seq_add_stmt_without_update (&seq
, g
);
3311 op
= gimple_assign_lhs (g
);
3314 if (update_range_test (&ranges
[k
- 1], NULL
, candidates
.address (),
3315 candidates
.length (), opcode
, ops
, op
,
3316 seq
, true, ranges
[k
- 1].low
,
3317 ranges
[k
- 1].low
, strict_overflow_p
))
3320 gimple_seq_discard (seq
);
3326 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3327 a >= 0 && a < b into (unsigned) a < (unsigned) b
3328 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3331 optimize_range_tests_var_bound (enum tree_code opcode
, int first
, int length
,
3332 vec
<operand_entry
*> *ops
,
3333 struct range_entry
*ranges
,
3334 basic_block first_bb
)
3337 bool any_changes
= false;
3338 hash_map
<tree
, int> *map
= NULL
;
3340 for (i
= first
; i
< length
; i
++)
3342 if (ranges
[i
].exp
== NULL_TREE
3343 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3347 tree type
= TREE_TYPE (ranges
[i
].exp
);
3348 if (!INTEGRAL_TYPE_P (type
)
3349 || TYPE_UNSIGNED (type
)
3350 || ranges
[i
].low
== NULL_TREE
3351 || !integer_zerop (ranges
[i
].low
)
3352 || ranges
[i
].high
!= NULL_TREE
)
3354 /* EXP >= 0 here. */
3356 map
= new hash_map
<tree
, int>;
3357 map
->put (ranges
[i
].exp
, i
);
3363 for (i
= 0; i
< length
; i
++)
3365 bool in_p
= ranges
[i
].in_p
;
3366 if (ranges
[i
].low
== NULL_TREE
3367 || ranges
[i
].high
== NULL_TREE
)
3369 if (!integer_zerop (ranges
[i
].low
)
3370 || !integer_zerop (ranges
[i
].high
))
3373 && TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) == 1
3374 && TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3375 && integer_onep (ranges
[i
].low
)
3376 && integer_onep (ranges
[i
].high
))
3387 if (TREE_CODE (ranges
[i
].exp
) != SSA_NAME
)
3389 stmt
= SSA_NAME_DEF_STMT (ranges
[i
].exp
);
3390 if (!is_gimple_assign (stmt
))
3392 ccode
= gimple_assign_rhs_code (stmt
);
3393 rhs1
= gimple_assign_rhs1 (stmt
);
3394 rhs2
= gimple_assign_rhs2 (stmt
);
3398 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3399 stmt
= last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3400 if (gimple_code (stmt
) != GIMPLE_COND
)
3402 ccode
= gimple_cond_code (stmt
);
3403 rhs1
= gimple_cond_lhs (stmt
);
3404 rhs2
= gimple_cond_rhs (stmt
);
3407 if (TREE_CODE (rhs1
) != SSA_NAME
3408 || rhs2
== NULL_TREE
3409 || TREE_CODE (rhs2
) != SSA_NAME
)
3423 ccode
= invert_tree_comparison (ccode
, false);
3428 std::swap (rhs1
, rhs2
);
3429 ccode
= swap_tree_comparison (ccode
);
3438 int *idx
= map
->get (rhs1
);
3442 /* maybe_optimize_range_tests allows statements without side-effects
3443 in the basic blocks as long as they are consumed in the same bb.
3444 Make sure rhs2's def stmt is not among them, otherwise we can't
3445 use safely get_nonzero_bits on it. E.g. in:
3446 # RANGE [-83, 1] NONZERO 173
3447 # k_32 = PHI <k_47(13), k_12(9)>
3450 goto <bb 5>; [26.46%]
3452 goto <bb 9>; [73.54%]
3454 <bb 5> [local count: 140323371]:
3455 # RANGE [0, 1] NONZERO 1
3457 # RANGE [0, 4] NONZERO 4
3459 # RANGE [0, 4] NONZERO 4
3460 iftmp.0_44 = (char) _21;
3461 if (k_32 < iftmp.0_44)
3462 goto <bb 6>; [84.48%]
3464 goto <bb 9>; [15.52%]
3465 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3466 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3467 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3468 those stmts even for negative k_32 and the value ranges would be no
3469 longer guaranteed and so the optimization would be invalid. */
3470 while (opcode
== ERROR_MARK
)
3472 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3473 basic_block bb2
= gimple_bb (g
);
3476 && dominated_by_p (CDI_DOMINATORS
, bb2
, first_bb
))
3478 /* As an exception, handle a few common cases. */
3479 if (gimple_assign_cast_p (g
)
3480 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g
))))
3482 tree op0
= gimple_assign_rhs1 (g
);
3483 if (TYPE_UNSIGNED (TREE_TYPE (op0
))
3484 && (TYPE_PRECISION (TREE_TYPE (rhs2
))
3485 > TYPE_PRECISION (TREE_TYPE (op0
))))
3486 /* Zero-extension is always ok. */
3488 else if (TYPE_PRECISION (TREE_TYPE (rhs2
))
3489 == TYPE_PRECISION (TREE_TYPE (op0
))
3490 && TREE_CODE (op0
) == SSA_NAME
)
3492 /* Cast from signed to unsigned or vice versa. Retry
3493 with the op0 as new rhs2. */
3498 else if (is_gimple_assign (g
)
3499 && gimple_assign_rhs_code (g
) == BIT_AND_EXPR
3500 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
3501 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g
))))
3502 /* Masking with INTEGER_CST with MSB clear is always ok
3509 if (rhs2
== NULL_TREE
)
3512 wide_int nz
= get_nonzero_bits (rhs2
);
3516 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3517 and RHS2 is known to be RHS2 >= 0. */
3518 tree utype
= unsigned_type_for (TREE_TYPE (rhs1
));
3520 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
3521 if ((ranges
[*idx
].strict_overflow_p
3522 || ranges
[i
].strict_overflow_p
)
3523 && issue_strict_overflow_warning (wc
))
3524 warning_at (gimple_location (stmt
), OPT_Wstrict_overflow
,
3525 "assuming signed overflow does not occur "
3526 "when simplifying range test");
3528 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3530 struct range_entry
*r
= &ranges
[*idx
];
3531 fprintf (dump_file
, "Optimizing range test ");
3532 print_generic_expr (dump_file
, r
->exp
);
3533 fprintf (dump_file
, " +[");
3534 print_generic_expr (dump_file
, r
->low
);
3535 fprintf (dump_file
, ", ");
3536 print_generic_expr (dump_file
, r
->high
);
3537 fprintf (dump_file
, "] and comparison ");
3538 print_generic_expr (dump_file
, rhs1
);
3539 fprintf (dump_file
, " %s ", op_symbol_code (ccode
));
3540 print_generic_expr (dump_file
, rhs2
);
3541 fprintf (dump_file
, "\n into (");
3542 print_generic_expr (dump_file
, utype
);
3543 fprintf (dump_file
, ") ");
3544 print_generic_expr (dump_file
, rhs1
);
3545 fprintf (dump_file
, " %s (", op_symbol_code (ccode
));
3546 print_generic_expr (dump_file
, utype
);
3547 fprintf (dump_file
, ") ");
3548 print_generic_expr (dump_file
, rhs2
);
3549 fprintf (dump_file
, "\n");
3552 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3554 if (opcode
== BIT_IOR_EXPR
3555 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3558 ccode
= invert_tree_comparison (ccode
, false);
3561 unsigned int uid
= gimple_uid (stmt
);
3562 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3563 gimple
*g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs1
);
3564 gimple_set_uid (g
, uid
);
3565 rhs1
= gimple_assign_lhs (g
);
3566 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3567 if (!useless_type_conversion_p (utype
, TREE_TYPE (rhs2
)))
3569 g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs2
);
3570 gimple_set_uid (g
, uid
);
3571 rhs2
= gimple_assign_lhs (g
);
3572 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3574 if (tree_swap_operands_p (rhs1
, rhs2
))
3576 std::swap (rhs1
, rhs2
);
3577 ccode
= swap_tree_comparison (ccode
);
3579 if (gimple_code (stmt
) == GIMPLE_COND
)
3581 gcond
*c
= as_a
<gcond
*> (stmt
);
3582 gimple_cond_set_code (c
, ccode
);
3583 gimple_cond_set_lhs (c
, rhs1
);
3584 gimple_cond_set_rhs (c
, rhs2
);
3589 tree ctype
= oe
->op
? TREE_TYPE (oe
->op
) : boolean_type_node
;
3590 if (!INTEGRAL_TYPE_P (ctype
)
3591 || (TREE_CODE (ctype
) != BOOLEAN_TYPE
3592 && TYPE_PRECISION (ctype
) != 1))
3593 ctype
= boolean_type_node
;
3594 g
= gimple_build_assign (make_ssa_name (ctype
), ccode
, rhs1
, rhs2
);
3595 gimple_set_uid (g
, uid
);
3596 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3597 if (oe
->op
&& ctype
!= TREE_TYPE (oe
->op
))
3599 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (oe
->op
)),
3600 NOP_EXPR
, gimple_assign_lhs (g
));
3601 gimple_set_uid (g
, uid
);
3602 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3604 ranges
[i
].exp
= gimple_assign_lhs (g
);
3605 oe
->op
= ranges
[i
].exp
;
3606 ranges
[i
].low
= build_zero_cst (TREE_TYPE (ranges
[i
].exp
));
3607 ranges
[i
].high
= ranges
[i
].low
;
3609 ranges
[i
].strict_overflow_p
= false;
3610 oe
= (*ops
)[ranges
[*idx
].idx
];
3611 /* Now change all the other range test immediate uses, so that
3612 those tests will be optimized away. */
3613 if (opcode
== ERROR_MARK
)
3616 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
3617 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
3619 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
3620 ? boolean_false_node
: boolean_true_node
);
3623 oe
->op
= error_mark_node
;
3624 ranges
[*idx
].exp
= NULL_TREE
;
3625 ranges
[*idx
].low
= NULL_TREE
;
3626 ranges
[*idx
].high
= NULL_TREE
;
3634 /* Optimize range tests, similarly how fold_range_test optimizes
3635 it on trees. The tree code for the binary
3636 operation between all the operands is OPCODE.
3637 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
3638 maybe_optimize_range_tests for inter-bb range optimization.
3639 In that case if oe->op is NULL, oe->id is bb->index whose
3640 GIMPLE_COND is && or ||ed into the test, and oe->rank says
3642 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
3645 optimize_range_tests (enum tree_code opcode
,
3646 vec
<operand_entry
*> *ops
, basic_block first_bb
)
3648 unsigned int length
= ops
->length (), i
, j
, first
;
3650 struct range_entry
*ranges
;
3651 bool any_changes
= false;
3656 ranges
= XNEWVEC (struct range_entry
, length
);
3657 for (i
= 0; i
< length
; i
++)
3661 init_range_entry (ranges
+ i
, oe
->op
,
3664 : last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
3665 /* For | invert it now, we will invert it again before emitting
3666 the optimized expression. */
3667 if (opcode
== BIT_IOR_EXPR
3668 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
3669 ranges
[i
].in_p
= !ranges
[i
].in_p
;
3672 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
3673 for (i
= 0; i
< length
; i
++)
3674 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
3677 /* Try to merge ranges. */
3678 for (first
= i
; i
< length
; i
++)
3680 tree low
= ranges
[i
].low
;
3681 tree high
= ranges
[i
].high
;
3682 int in_p
= ranges
[i
].in_p
;
3683 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3684 int update_fail_count
= 0;
3686 for (j
= i
+ 1; j
< length
; j
++)
3688 if (ranges
[i
].exp
!= ranges
[j
].exp
)
3690 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
3691 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
3693 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3699 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
3700 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
3701 low
, high
, strict_overflow_p
))
3706 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
3707 while update_range_test would fail. */
3708 else if (update_fail_count
== 64)
3711 ++update_fail_count
;
3714 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
3717 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
3718 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
3720 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
3721 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
3723 any_changes
|= optimize_range_tests_cmp_bitwise (opcode
, first
, length
,
3725 any_changes
|= optimize_range_tests_var_bound (opcode
, first
, length
, ops
,
3728 if (any_changes
&& opcode
!= ERROR_MARK
)
3731 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3733 if (oe
->op
== error_mark_node
)
3742 XDELETEVEC (ranges
);
3746 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3747 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3748 otherwise the comparison code. */
3751 ovce_extract_ops (tree var
, gassign
**rets
, bool *reti
)
3753 if (TREE_CODE (var
) != SSA_NAME
)
3756 gassign
*stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (var
));
3760 /* ??? If we start creating more COND_EXPR, we could perform
3761 this same optimization with them. For now, simplify. */
3762 if (gimple_assign_rhs_code (stmt
) != VEC_COND_EXPR
)
3765 tree cond
= gimple_assign_rhs1 (stmt
);
3766 tree_code cmp
= TREE_CODE (cond
);
3767 if (TREE_CODE_CLASS (cmp
) != tcc_comparison
)
3770 /* ??? For now, allow only canonical true and false result vectors.
3771 We could expand this to other constants should the need arise,
3772 but at the moment we don't create them. */
3773 tree t
= gimple_assign_rhs2 (stmt
);
3774 tree f
= gimple_assign_rhs3 (stmt
);
3776 if (integer_all_onesp (t
))
3778 else if (integer_all_onesp (f
))
3780 cmp
= invert_tree_comparison (cmp
, false);
3785 if (!integer_zerop (f
))
3796 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3797 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3800 optimize_vec_cond_expr (tree_code opcode
, vec
<operand_entry
*> *ops
)
3802 unsigned int length
= ops
->length (), i
, j
;
3803 bool any_changes
= false;
3808 for (i
= 0; i
< length
; ++i
)
3810 tree elt0
= (*ops
)[i
]->op
;
3814 tree_code cmp0
= ovce_extract_ops (elt0
, &stmt0
, &invert
);
3815 if (cmp0
== ERROR_MARK
)
3818 for (j
= i
+ 1; j
< length
; ++j
)
3820 tree
&elt1
= (*ops
)[j
]->op
;
3823 tree_code cmp1
= ovce_extract_ops (elt1
, &stmt1
, NULL
);
3824 if (cmp1
== ERROR_MARK
)
3827 tree cond0
= gimple_assign_rhs1 (stmt0
);
3828 tree x0
= TREE_OPERAND (cond0
, 0);
3829 tree y0
= TREE_OPERAND (cond0
, 1);
3831 tree cond1
= gimple_assign_rhs1 (stmt1
);
3832 tree x1
= TREE_OPERAND (cond1
, 0);
3833 tree y1
= TREE_OPERAND (cond1
, 1);
3836 if (opcode
== BIT_AND_EXPR
)
3837 comb
= maybe_fold_and_comparisons (cmp0
, x0
, y0
, cmp1
, x1
, y1
);
3838 else if (opcode
== BIT_IOR_EXPR
)
3839 comb
= maybe_fold_or_comparisons (cmp0
, x0
, y0
, cmp1
, x1
, y1
);
3846 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3848 fprintf (dump_file
, "Transforming ");
3849 print_generic_expr (dump_file
, cond0
);
3850 fprintf (dump_file
, " %c ", opcode
== BIT_AND_EXPR
? '&' : '|');
3851 print_generic_expr (dump_file
, cond1
);
3852 fprintf (dump_file
, " into ");
3853 print_generic_expr (dump_file
, comb
);
3854 fputc ('\n', dump_file
);
3857 gimple_assign_set_rhs1 (stmt0
, comb
);
3859 std::swap (*gimple_assign_rhs2_ptr (stmt0
),
3860 *gimple_assign_rhs3_ptr (stmt0
));
3861 update_stmt (stmt0
);
3863 elt1
= error_mark_node
;
3872 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
3874 if (oe
->op
== error_mark_node
)
3886 /* Return true if STMT is a cast like:
3892 # _345 = PHI <_123(N), 1(...), 1(...)>
3893 where _234 has bool type, _123 has single use and
3894 bb N has a single successor M. This is commonly used in
3895 the last block of a range test.
3897 Also Return true if STMT is tcc_compare like:
3903 # _345 = PHI <_234(N), 1(...), 1(...)>
3905 where _234 has booltype, single use and
3906 bb N has a single successor M. This is commonly used in
3907 the last block of a range test. */
3910 final_range_test_p (gimple
*stmt
)
3912 basic_block bb
, rhs_bb
, lhs_bb
;
3915 use_operand_p use_p
;
3918 if (!gimple_assign_cast_p (stmt
)
3919 && (!is_gimple_assign (stmt
)
3920 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
3921 != tcc_comparison
)))
3923 bb
= gimple_bb (stmt
);
3924 if (!single_succ_p (bb
))
3926 e
= single_succ_edge (bb
);
3927 if (e
->flags
& EDGE_COMPLEX
)
3930 lhs
= gimple_assign_lhs (stmt
);
3931 rhs
= gimple_assign_rhs1 (stmt
);
3932 if (gimple_assign_cast_p (stmt
)
3933 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
3934 || TREE_CODE (rhs
) != SSA_NAME
3935 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
))
3938 if (!gimple_assign_cast_p (stmt
)
3939 && (TREE_CODE (TREE_TYPE (lhs
)) != BOOLEAN_TYPE
))
3942 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3943 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
3946 if (gimple_code (use_stmt
) != GIMPLE_PHI
3947 || gimple_bb (use_stmt
) != e
->dest
)
3950 /* And that the rhs is defined in the same loop. */
3951 if (gimple_assign_cast_p (stmt
))
3953 if (TREE_CODE (rhs
) != SSA_NAME
3954 || !(rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
)))
3955 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
3960 if (TREE_CODE (lhs
) != SSA_NAME
3961 || !(lhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
)))
3962 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), lhs_bb
))
3969 /* Return true if BB is suitable basic block for inter-bb range test
3970 optimization. If BACKWARD is true, BB should be the only predecessor
3971 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3972 or compared with to find a common basic block to which all conditions
3973 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3974 be the only predecessor of BB. */
3977 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
3980 edge_iterator ei
, ei2
;
3984 bool other_edge_seen
= false;
3989 /* Check last stmt first. */
3990 stmt
= last_stmt (bb
);
3992 || (gimple_code (stmt
) != GIMPLE_COND
3993 && (backward
|| !final_range_test_p (stmt
)))
3994 || gimple_visited_p (stmt
)
3995 || stmt_could_throw_p (cfun
, stmt
)
3998 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
4001 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4002 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4003 to *OTHER_BB (if not set yet, try to find it out). */
4004 if (EDGE_COUNT (bb
->succs
) != 2)
4006 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
4008 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4010 if (e
->dest
== test_bb
)
4019 if (*other_bb
== NULL
)
4021 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
4022 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4024 else if (e
->dest
== e2
->dest
)
4025 *other_bb
= e
->dest
;
4026 if (*other_bb
== NULL
)
4029 if (e
->dest
== *other_bb
)
4030 other_edge_seen
= true;
4034 if (*other_bb
== NULL
|| !other_edge_seen
)
4037 else if (single_succ (bb
) != *other_bb
)
4040 /* Now check all PHIs of *OTHER_BB. */
4041 e
= find_edge (bb
, *other_bb
);
4042 e2
= find_edge (test_bb
, *other_bb
);
4043 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4045 gphi
*phi
= gsi
.phi ();
4046 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4047 corresponding to BB and TEST_BB predecessor must be the same. */
4048 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
4049 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
4051 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4052 one of the PHIs should have the lhs of the last stmt in
4053 that block as PHI arg and that PHI should have 0 or 1
4054 corresponding to it in all other range test basic blocks
4058 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
4059 == gimple_assign_lhs (stmt
)
4060 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
4061 || integer_onep (gimple_phi_arg_def (phi
,
4067 gimple
*test_last
= last_stmt (test_bb
);
4068 if (gimple_code (test_last
) != GIMPLE_COND
4069 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
4070 == gimple_assign_lhs (test_last
)
4071 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
4072 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
4082 /* Return true if BB doesn't have side-effects that would disallow
4083 range test optimization, all SSA_NAMEs set in the bb are consumed
4084 in the bb and there are no PHIs. */
4087 no_side_effect_bb (basic_block bb
)
4089 gimple_stmt_iterator gsi
;
4092 if (!gimple_seq_empty_p (phi_nodes (bb
)))
4094 last
= last_stmt (bb
);
4095 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4097 gimple
*stmt
= gsi_stmt (gsi
);
4099 imm_use_iterator imm_iter
;
4100 use_operand_p use_p
;
4102 if (is_gimple_debug (stmt
))
4104 if (gimple_has_side_effects (stmt
))
4108 if (!is_gimple_assign (stmt
))
4110 lhs
= gimple_assign_lhs (stmt
);
4111 if (TREE_CODE (lhs
) != SSA_NAME
)
4113 if (gimple_assign_rhs_could_trap_p (stmt
))
4115 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
4117 gimple
*use_stmt
= USE_STMT (use_p
);
4118 if (is_gimple_debug (use_stmt
))
4120 if (gimple_bb (use_stmt
) != bb
)
4127 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4128 return true and fill in *OPS recursively. */
4131 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
4134 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4138 if (!is_reassociable_op (stmt
, code
, loop
))
4141 rhs
[0] = gimple_assign_rhs1 (stmt
);
4142 rhs
[1] = gimple_assign_rhs2 (stmt
);
4143 gimple_set_visited (stmt
, true);
4144 for (i
= 0; i
< 2; i
++)
4145 if (TREE_CODE (rhs
[i
]) == SSA_NAME
4146 && !get_ops (rhs
[i
], code
, ops
, loop
)
4147 && has_single_use (rhs
[i
]))
4149 operand_entry
*oe
= operand_entry_pool
.allocate ();
4155 oe
->stmt_to_insert
= NULL
;
4156 ops
->safe_push (oe
);
4161 /* Find the ops that were added by get_ops starting from VAR, see if
4162 they were changed during update_range_test and if yes, create new
4166 update_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> ops
,
4167 unsigned int *pidx
, class loop
*loop
)
4169 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4173 if (!is_reassociable_op (stmt
, code
, loop
))
4176 rhs
[0] = gimple_assign_rhs1 (stmt
);
4177 rhs
[1] = gimple_assign_rhs2 (stmt
);
4180 for (i
= 0; i
< 2; i
++)
4181 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
4183 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
4184 if (rhs
[2 + i
] == NULL_TREE
)
4186 if (has_single_use (rhs
[i
]))
4187 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
4189 rhs
[2 + i
] = rhs
[i
];
4192 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
4193 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
4195 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4196 var
= make_ssa_name (TREE_TYPE (var
));
4197 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
4199 gimple_set_uid (g
, gimple_uid (stmt
));
4200 gimple_set_visited (g
, true);
4201 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4206 /* Structure to track the initial value passed to get_ops and
4207 the range in the ops vector for each basic block. */
4209 struct inter_bb_range_test_entry
4212 unsigned int first_idx
, last_idx
;
4215 /* Inter-bb range test optimization.
4217 Returns TRUE if a gimple conditional is optimized to a true/false,
4218 otherwise return FALSE.
4220 This indicates to the caller that it should run a CFG cleanup pass
4221 once reassociation is completed. */
4224 maybe_optimize_range_tests (gimple
*stmt
)
4226 basic_block first_bb
= gimple_bb (stmt
);
4227 basic_block last_bb
= first_bb
;
4228 basic_block other_bb
= NULL
;
4232 auto_vec
<operand_entry
*> ops
;
4233 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
4234 bool any_changes
= false;
4235 bool cfg_cleanup_needed
= false;
4237 /* Consider only basic blocks that end with GIMPLE_COND or
4238 a cast statement satisfying final_range_test_p. All
4239 but the last bb in the first_bb .. last_bb range
4240 should end with GIMPLE_COND. */
4241 if (gimple_code (stmt
) == GIMPLE_COND
)
4243 if (EDGE_COUNT (first_bb
->succs
) != 2)
4244 return cfg_cleanup_needed
;
4246 else if (final_range_test_p (stmt
))
4247 other_bb
= single_succ (first_bb
);
4249 return cfg_cleanup_needed
;
4251 if (stmt_could_throw_p (cfun
, stmt
))
4252 return cfg_cleanup_needed
;
4254 /* As relative ordering of post-dominator sons isn't fixed,
4255 maybe_optimize_range_tests can be called first on any
4256 bb in the range we want to optimize. So, start searching
4257 backwards, if first_bb can be set to a predecessor. */
4258 while (single_pred_p (first_bb
))
4260 basic_block pred_bb
= single_pred (first_bb
);
4261 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
4263 if (!no_side_effect_bb (first_bb
))
4267 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4268 Before starting forward search in last_bb successors, find
4269 out the other_bb. */
4270 if (first_bb
== last_bb
)
4273 /* As non-GIMPLE_COND last stmt always terminates the range,
4274 if forward search didn't discover anything, just give up. */
4275 if (gimple_code (stmt
) != GIMPLE_COND
)
4276 return cfg_cleanup_needed
;
4277 /* Look at both successors. Either it ends with a GIMPLE_COND
4278 and satisfies suitable_cond_bb, or ends with a cast and
4279 other_bb is that cast's successor. */
4280 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
4281 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
4282 || e
->dest
== first_bb
)
4283 return cfg_cleanup_needed
;
4284 else if (single_pred_p (e
->dest
))
4286 stmt
= last_stmt (e
->dest
);
4288 && gimple_code (stmt
) == GIMPLE_COND
4289 && EDGE_COUNT (e
->dest
->succs
) == 2)
4291 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
4297 && final_range_test_p (stmt
)
4298 && find_edge (first_bb
, single_succ (e
->dest
)))
4300 other_bb
= single_succ (e
->dest
);
4301 if (other_bb
== first_bb
)
4305 if (other_bb
== NULL
)
4306 return cfg_cleanup_needed
;
4308 /* Now do the forward search, moving last_bb to successor bbs
4309 that aren't other_bb. */
4310 while (EDGE_COUNT (last_bb
->succs
) == 2)
4312 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
4313 if (e
->dest
!= other_bb
)
4317 if (!single_pred_p (e
->dest
))
4319 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
4321 if (!no_side_effect_bb (e
->dest
))
4325 if (first_bb
== last_bb
)
4326 return cfg_cleanup_needed
;
4327 /* Here basic blocks first_bb through last_bb's predecessor
4328 end with GIMPLE_COND, all of them have one of the edges to
4329 other_bb and another to another block in the range,
4330 all blocks except first_bb don't have side-effects and
4331 last_bb ends with either GIMPLE_COND, or cast satisfying
4332 final_range_test_p. */
4333 for (bb
= last_bb
; ; bb
= single_pred (bb
))
4335 enum tree_code code
;
4337 inter_bb_range_test_entry bb_ent
;
4339 bb_ent
.op
= NULL_TREE
;
4340 bb_ent
.first_idx
= ops
.length ();
4341 bb_ent
.last_idx
= bb_ent
.first_idx
;
4342 e
= find_edge (bb
, other_bb
);
4343 stmt
= last_stmt (bb
);
4344 gimple_set_visited (stmt
, true);
4345 if (gimple_code (stmt
) != GIMPLE_COND
)
4347 use_operand_p use_p
;
4352 lhs
= gimple_assign_lhs (stmt
);
4353 rhs
= gimple_assign_rhs1 (stmt
);
4354 gcc_assert (bb
== last_bb
);
4363 # _345 = PHI <_123(N), 1(...), 1(...)>
4365 or 0 instead of 1. If it is 0, the _234
4366 range test is anded together with all the
4367 other range tests, if it is 1, it is ored with
4369 single_imm_use (lhs
, &use_p
, &phi
);
4370 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
4371 e2
= find_edge (first_bb
, other_bb
);
4373 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
4374 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
4375 code
= BIT_AND_EXPR
;
4378 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
4379 code
= BIT_IOR_EXPR
;
4382 /* If _234 SSA_NAME_DEF_STMT is
4384 (or &, corresponding to 1/0 in the phi arguments,
4385 push into ops the individual range test arguments
4386 of the bitwise or resp. and, recursively. */
4387 if (TREE_CODE (rhs
) == SSA_NAME
4388 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4390 && !get_ops (rhs
, code
, &ops
,
4391 loop_containing_stmt (stmt
))
4392 && has_single_use (rhs
))
4394 /* Otherwise, push the _234 range test itself. */
4395 operand_entry
*oe
= operand_entry_pool
.allocate ();
4401 oe
->stmt_to_insert
= NULL
;
4406 else if (is_gimple_assign (stmt
)
4407 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4409 && !get_ops (lhs
, code
, &ops
,
4410 loop_containing_stmt (stmt
))
4411 && has_single_use (lhs
))
4413 operand_entry
*oe
= operand_entry_pool
.allocate ();
4424 bb_ent
.last_idx
= ops
.length ();
4427 bbinfo
.safe_push (bb_ent
);
4430 /* Otherwise stmt is GIMPLE_COND. */
4431 code
= gimple_cond_code (stmt
);
4432 lhs
= gimple_cond_lhs (stmt
);
4433 rhs
= gimple_cond_rhs (stmt
);
4434 if (TREE_CODE (lhs
) == SSA_NAME
4435 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4436 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4437 || rhs
!= boolean_false_node
4438 /* Either push into ops the individual bitwise
4439 or resp. and operands, depending on which
4440 edge is other_bb. */
4441 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
4442 ^ (code
== EQ_EXPR
))
4443 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
4444 loop_containing_stmt (stmt
))))
4446 /* Or push the GIMPLE_COND stmt itself. */
4447 operand_entry
*oe
= operand_entry_pool
.allocate ();
4450 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
4451 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
4452 /* oe->op = NULL signs that there is no SSA_NAME
4453 for the range test, and oe->id instead is the
4454 basic block number, at which's end the GIMPLE_COND
4458 oe
->stmt_to_insert
= NULL
;
4463 else if (ops
.length () > bb_ent
.first_idx
)
4466 bb_ent
.last_idx
= ops
.length ();
4468 bbinfo
.safe_push (bb_ent
);
4472 if (ops
.length () > 1)
4473 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
, first_bb
);
4476 unsigned int idx
, max_idx
= 0;
4477 /* update_ops relies on has_single_use predicates returning the
4478 same values as it did during get_ops earlier. Additionally it
4479 never removes statements, only adds new ones and it should walk
4480 from the single imm use and check the predicate already before
4481 making those changes.
4482 On the other side, the handling of GIMPLE_COND directly can turn
4483 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
4484 it needs to be done in a separate loop afterwards. */
4485 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4487 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4488 && bbinfo
[idx
].op
!= NULL_TREE
)
4493 stmt
= last_stmt (bb
);
4494 new_op
= update_ops (bbinfo
[idx
].op
,
4496 ops
[bbinfo
[idx
].first_idx
]->rank
,
4497 ops
, &bbinfo
[idx
].first_idx
,
4498 loop_containing_stmt (stmt
));
4499 if (new_op
== NULL_TREE
)
4501 gcc_assert (bb
== last_bb
);
4502 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
4504 if (bbinfo
[idx
].op
!= new_op
)
4506 imm_use_iterator iter
;
4507 use_operand_p use_p
;
4508 gimple
*use_stmt
, *cast_or_tcc_cmp_stmt
= NULL
;
4510 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
4511 if (is_gimple_debug (use_stmt
))
4513 else if (gimple_code (use_stmt
) == GIMPLE_COND
4514 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4515 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4516 SET_USE (use_p
, new_op
);
4517 else if ((is_gimple_assign (use_stmt
)
4519 (gimple_assign_rhs_code (use_stmt
))
4520 == tcc_comparison
)))
4521 cast_or_tcc_cmp_stmt
= use_stmt
;
4522 else if (gimple_assign_cast_p (use_stmt
))
4523 cast_or_tcc_cmp_stmt
= use_stmt
;
4527 if (cast_or_tcc_cmp_stmt
)
4529 gcc_assert (bb
== last_bb
);
4530 tree lhs
= gimple_assign_lhs (cast_or_tcc_cmp_stmt
);
4531 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
4532 enum tree_code rhs_code
4533 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt
)
4534 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt
)
4537 if (is_gimple_min_invariant (new_op
))
4539 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
4540 g
= gimple_build_assign (new_lhs
, new_op
);
4543 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
4544 gimple_stmt_iterator gsi
4545 = gsi_for_stmt (cast_or_tcc_cmp_stmt
);
4546 gimple_set_uid (g
, gimple_uid (cast_or_tcc_cmp_stmt
));
4547 gimple_set_visited (g
, true);
4548 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4549 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
4550 if (is_gimple_debug (use_stmt
))
4552 else if (gimple_code (use_stmt
) == GIMPLE_COND
4553 || gimple_code (use_stmt
) == GIMPLE_PHI
)
4554 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
4555 SET_USE (use_p
, new_lhs
);
4564 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
4566 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
4567 && bbinfo
[idx
].op
== NULL_TREE
4568 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
4570 gcond
*cond_stmt
= as_a
<gcond
*> (last_stmt (bb
));
4575 /* If we collapse the conditional to a true/false
4576 condition, then bubble that knowledge up to our caller. */
4577 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
4579 gimple_cond_make_false (cond_stmt
);
4580 cfg_cleanup_needed
= true;
4582 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
4584 gimple_cond_make_true (cond_stmt
);
4585 cfg_cleanup_needed
= true;
4589 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
4590 gimple_cond_set_lhs (cond_stmt
,
4591 ops
[bbinfo
[idx
].first_idx
]->op
);
4592 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
4594 update_stmt (cond_stmt
);
4600 /* The above changes could result in basic blocks after the first
4601 modified one, up to and including last_bb, to be executed even if
4602 they would not be in the original program. If the value ranges of
4603 assignment lhs' in those bbs were dependent on the conditions
4604 guarding those basic blocks which now can change, the VRs might
4605 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
4606 are only used within the same bb, it should be not a big deal if
4607 we just reset all the VRs in those bbs. See PR68671. */
4608 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
4609 reset_flow_sensitive_info_in_bb (bb
);
4611 return cfg_cleanup_needed
;
4614 /* Return true if OPERAND is defined by a PHI node which uses the LHS
4615 of STMT in it's operands. This is also known as a "destructive
4616 update" operation. */
4619 is_phi_for_stmt (gimple
*stmt
, tree operand
)
4624 use_operand_p arg_p
;
4627 if (TREE_CODE (operand
) != SSA_NAME
)
4630 lhs
= gimple_assign_lhs (stmt
);
4632 def_stmt
= SSA_NAME_DEF_STMT (operand
);
4633 def_phi
= dyn_cast
<gphi
*> (def_stmt
);
4637 FOR_EACH_PHI_ARG (arg_p
, def_phi
, i
, SSA_OP_USE
)
4638 if (lhs
== USE_FROM_PTR (arg_p
))
4643 /* Remove def stmt of VAR if VAR has zero uses and recurse
4644 on rhs1 operand if so. */
4647 remove_visited_stmt_chain (tree var
)
4650 gimple_stmt_iterator gsi
;
4654 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
4656 stmt
= SSA_NAME_DEF_STMT (var
);
4657 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
4659 var
= gimple_assign_rhs1 (stmt
);
4660 gsi
= gsi_for_stmt (stmt
);
4661 reassoc_remove_stmt (&gsi
);
4662 release_defs (stmt
);
4669 /* This function checks three consequtive operands in
4670 passed operands vector OPS starting from OPINDEX and
4671 swaps two operands if it is profitable for binary operation
4672 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
4674 We pair ops with the same rank if possible.
4676 The alternative we try is to see if STMT is a destructive
4677 update style statement, which is like:
4680 In that case, we want to use the destructive update form to
4681 expose the possible vectorizer sum reduction opportunity.
4682 In that case, the third operand will be the phi node. This
4683 check is not performed if STMT is null.
4685 We could, of course, try to be better as noted above, and do a
4686 lot of work to try to find these opportunities in >3 operand
4687 cases, but it is unlikely to be worth it. */
4690 swap_ops_for_binary_stmt (vec
<operand_entry
*> ops
,
4691 unsigned int opindex
, gimple
*stmt
)
4693 operand_entry
*oe1
, *oe2
, *oe3
;
4696 oe2
= ops
[opindex
+ 1];
4697 oe3
= ops
[opindex
+ 2];
4699 if ((oe1
->rank
== oe2
->rank
4700 && oe2
->rank
!= oe3
->rank
)
4701 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
4702 && !is_phi_for_stmt (stmt
, oe1
->op
)
4703 && !is_phi_for_stmt (stmt
, oe2
->op
)))
4704 std::swap (*oe1
, *oe3
);
4705 else if ((oe1
->rank
== oe3
->rank
4706 && oe2
->rank
!= oe3
->rank
)
4707 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
4708 && !is_phi_for_stmt (stmt
, oe1
->op
)
4709 && !is_phi_for_stmt (stmt
, oe3
->op
)))
4710 std::swap (*oe1
, *oe2
);
4713 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
4714 two definitions, otherwise return STMT. */
4716 static inline gimple
*
4717 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
)
4719 if (TREE_CODE (rhs1
) == SSA_NAME
4720 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
4721 stmt
= SSA_NAME_DEF_STMT (rhs1
);
4722 if (TREE_CODE (rhs2
) == SSA_NAME
4723 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
4724 stmt
= SSA_NAME_DEF_STMT (rhs2
);
4728 /* If the stmt that defines operand has to be inserted, insert it
4731 insert_stmt_before_use (gimple
*stmt
, gimple
*stmt_to_insert
)
4733 gcc_assert (is_gimple_assign (stmt_to_insert
));
4734 tree rhs1
= gimple_assign_rhs1 (stmt_to_insert
);
4735 tree rhs2
= gimple_assign_rhs2 (stmt_to_insert
);
4736 gimple
*insert_point
= find_insert_point (stmt
, rhs1
, rhs2
);
4737 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_point
);
4738 gimple_set_uid (stmt_to_insert
, gimple_uid (insert_point
));
4740 /* If the insert point is not stmt, then insert_point would be
4741 the point where operand rhs1 or rhs2 is defined. In this case,
4742 stmt_to_insert has to be inserted afterwards. This would
4743 only happen when the stmt insertion point is flexible. */
4744 if (stmt
== insert_point
)
4745 gsi_insert_before (&gsi
, stmt_to_insert
, GSI_NEW_STMT
);
4747 insert_stmt_after (stmt_to_insert
, insert_point
);
4751 /* Recursively rewrite our linearized statements so that the operators
4752 match those in OPS[OPINDEX], putting the computation in rank
4753 order. Return new lhs.
4754 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
4755 the current stmt and during recursive invocations.
4756 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
4757 recursive invocations. */
4760 rewrite_expr_tree (gimple
*stmt
, unsigned int opindex
,
4761 vec
<operand_entry
*> ops
, bool changed
, bool next_changed
)
4763 tree rhs1
= gimple_assign_rhs1 (stmt
);
4764 tree rhs2
= gimple_assign_rhs2 (stmt
);
4765 tree lhs
= gimple_assign_lhs (stmt
);
4768 /* The final recursion case for this function is that you have
4769 exactly two operations left.
4770 If we had exactly one op in the entire list to start with, we
4771 would have never called this function, and the tail recursion
4772 rewrites them one at a time. */
4773 if (opindex
+ 2 == ops
.length ())
4775 operand_entry
*oe1
, *oe2
;
4778 oe2
= ops
[opindex
+ 1];
4780 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
4782 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4783 unsigned int uid
= gimple_uid (stmt
);
4785 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4787 fprintf (dump_file
, "Transforming ");
4788 print_gimple_stmt (dump_file
, stmt
, 0);
4791 /* If the stmt that defines operand has to be inserted, insert it
4793 if (oe1
->stmt_to_insert
)
4794 insert_stmt_before_use (stmt
, oe1
->stmt_to_insert
);
4795 if (oe2
->stmt_to_insert
)
4796 insert_stmt_before_use (stmt
, oe2
->stmt_to_insert
);
4797 /* Even when changed is false, reassociation could have e.g. removed
4798 some redundant operations, so unless we are just swapping the
4799 arguments or unless there is no change at all (then we just
4800 return lhs), force creation of a new SSA_NAME. */
4801 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
4803 gimple
*insert_point
4804 = find_insert_point (stmt
, oe1
->op
, oe2
->op
);
4805 lhs
= make_ssa_name (TREE_TYPE (lhs
));
4807 = gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
4809 gimple_set_uid (stmt
, uid
);
4810 gimple_set_visited (stmt
, true);
4811 if (insert_point
== gsi_stmt (gsi
))
4812 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
4814 insert_stmt_after (stmt
, insert_point
);
4818 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
4820 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
4821 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
4825 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
4826 remove_visited_stmt_chain (rhs1
);
4828 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4830 fprintf (dump_file
, " into ");
4831 print_gimple_stmt (dump_file
, stmt
, 0);
4837 /* If we hit here, we should have 3 or more ops left. */
4838 gcc_assert (opindex
+ 2 < ops
.length ());
4840 /* Rewrite the next operator. */
4843 /* If the stmt that defines operand has to be inserted, insert it
4845 if (oe
->stmt_to_insert
)
4846 insert_stmt_before_use (stmt
, oe
->stmt_to_insert
);
4848 /* Recurse on the LHS of the binary operator, which is guaranteed to
4849 be the non-leaf side. */
4851 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
4852 changed
|| oe
->op
!= rhs2
|| next_changed
,
4855 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
4857 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4859 fprintf (dump_file
, "Transforming ");
4860 print_gimple_stmt (dump_file
, stmt
, 0);
4863 /* If changed is false, this is either opindex == 0
4864 or all outer rhs2's were equal to corresponding oe->op,
4865 and powi_result is NULL.
4866 That means lhs is equivalent before and after reassociation.
4867 Otherwise ensure the old lhs SSA_NAME is not reused and
4868 create a new stmt as well, so that any debug stmts will be
4869 properly adjusted. */
4872 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4873 unsigned int uid
= gimple_uid (stmt
);
4874 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
4876 lhs
= make_ssa_name (TREE_TYPE (lhs
));
4877 stmt
= gimple_build_assign (lhs
, gimple_assign_rhs_code (stmt
),
4879 gimple_set_uid (stmt
, uid
);
4880 gimple_set_visited (stmt
, true);
4881 if (insert_point
== gsi_stmt (gsi
))
4882 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
4884 insert_stmt_after (stmt
, insert_point
);
4888 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
4890 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
4891 gimple_assign_set_rhs2 (stmt
, oe
->op
);
4895 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4897 fprintf (dump_file
, " into ");
4898 print_gimple_stmt (dump_file
, stmt
, 0);
4904 /* Find out how many cycles we need to compute statements chain.
4905 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4906 maximum number of independent statements we may execute per cycle. */
4909 get_required_cycles (int ops_num
, int cpu_width
)
4915 /* While we have more than 2 * cpu_width operands
4916 we may reduce number of operands by cpu_width
4918 res
= ops_num
/ (2 * cpu_width
);
4920 /* Remained operands count may be reduced twice per cycle
4921 until we have only one operand. */
4922 rest
= (unsigned)(ops_num
- res
* cpu_width
);
4923 elog
= exact_log2 (rest
);
4927 res
+= floor_log2 (rest
) + 1;
4932 /* Returns an optimal number of registers to use for computation of
4933 given statements. */
4936 get_reassociation_width (int ops_num
, enum tree_code opc
,
4939 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
4944 if (param_width
> 0)
4945 width
= param_width
;
4947 width
= targetm
.sched
.reassociation_width (opc
, mode
);
4952 /* Get the minimal time required for sequence computation. */
4953 cycles_best
= get_required_cycles (ops_num
, width
);
4955 /* Check if we may use less width and still compute sequence for
4956 the same time. It will allow us to reduce registers usage.
4957 get_required_cycles is monotonically increasing with lower width
4958 so we can perform a binary search for the minimal width that still
4959 results in the optimal cycle count. */
4961 while (width
> width_min
)
4963 int width_mid
= (width
+ width_min
) / 2;
4965 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
4967 else if (width_min
< width_mid
)
4968 width_min
= width_mid
;
4976 /* Recursively rewrite our linearized statements so that the operators
4977 match those in OPS[OPINDEX], putting the computation in rank
4978 order and trying to allow operations to be executed in
4982 rewrite_expr_tree_parallel (gassign
*stmt
, int width
,
4983 vec
<operand_entry
*> ops
)
4985 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
4986 int op_num
= ops
.length ();
4987 gcc_assert (op_num
> 0);
4988 int stmt_num
= op_num
- 1;
4989 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
4990 int op_index
= op_num
- 1;
4992 int ready_stmts_end
= 0;
4994 gimple
*stmt1
= NULL
, *stmt2
= NULL
;
4995 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
4997 /* We start expression rewriting from the top statements.
4998 So, in this loop we create a full list of statements
4999 we will work with. */
5000 stmts
[stmt_num
- 1] = stmt
;
5001 for (i
= stmt_num
- 2; i
>= 0; i
--)
5002 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
5004 for (i
= 0; i
< stmt_num
; i
++)
5008 /* Determine whether we should use results of
5009 already handled statements or not. */
5010 if (ready_stmts_end
== 0
5011 && (i
- stmt_index
>= width
|| op_index
< 1))
5012 ready_stmts_end
= i
;
5014 /* Now we choose operands for the next statement. Non zero
5015 value in ready_stmts_end means here that we should use
5016 the result of already generated statements as new operand. */
5017 if (ready_stmts_end
> 0)
5019 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
5020 if (ready_stmts_end
> stmt_index
)
5021 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
5022 else if (op_index
>= 0)
5024 operand_entry
*oe
= ops
[op_index
--];
5025 stmt2
= oe
->stmt_to_insert
;
5030 gcc_assert (stmt_index
< i
);
5031 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
5034 if (stmt_index
>= ready_stmts_end
)
5035 ready_stmts_end
= 0;
5040 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
5041 operand_entry
*oe2
= ops
[op_index
--];
5042 operand_entry
*oe1
= ops
[op_index
--];
5044 stmt2
= oe2
->stmt_to_insert
;
5046 stmt1
= oe1
->stmt_to_insert
;
5049 /* If we emit the last statement then we should put
5050 operands into the last statement. It will also
5052 if (op_index
< 0 && stmt_index
== i
)
5055 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5057 fprintf (dump_file
, "Transforming ");
5058 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5061 /* If the stmt that defines operand has to be inserted, insert it
5064 insert_stmt_before_use (stmts
[i
], stmt1
);
5066 insert_stmt_before_use (stmts
[i
], stmt2
);
5067 stmt1
= stmt2
= NULL
;
5069 /* We keep original statement only for the last one. All
5070 others are recreated. */
5071 if (i
== stmt_num
- 1)
5073 gimple_assign_set_rhs1 (stmts
[i
], op1
);
5074 gimple_assign_set_rhs2 (stmts
[i
], op2
);
5075 update_stmt (stmts
[i
]);
5079 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
5080 gimple_set_visited (stmts
[i
], true);
5082 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5084 fprintf (dump_file
, " into ");
5085 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5089 remove_visited_stmt_chain (last_rhs1
);
5092 /* Transform STMT, which is really (A +B) + (C + D) into the left
5093 linear form, ((A+B)+C)+D.
5094 Recurse on D if necessary. */
5097 linearize_expr (gimple
*stmt
)
5099 gimple_stmt_iterator gsi
;
5100 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
5101 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5102 gimple
*oldbinrhs
= binrhs
;
5103 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5104 gimple
*newbinrhs
= NULL
;
5105 class loop
*loop
= loop_containing_stmt (stmt
);
5106 tree lhs
= gimple_assign_lhs (stmt
);
5108 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
5109 && is_reassociable_op (binrhs
, rhscode
, loop
));
5111 gsi
= gsi_for_stmt (stmt
);
5113 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
5114 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
5115 gimple_assign_rhs_code (binrhs
),
5116 gimple_assign_lhs (binlhs
),
5117 gimple_assign_rhs2 (binrhs
));
5118 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
5119 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
5120 gimple_set_uid (binrhs
, gimple_uid (stmt
));
5122 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
5123 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5125 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5127 fprintf (dump_file
, "Linearized: ");
5128 print_gimple_stmt (dump_file
, stmt
, 0);
5131 reassociate_stats
.linearized
++;
5134 gsi
= gsi_for_stmt (oldbinrhs
);
5135 reassoc_remove_stmt (&gsi
);
5136 release_defs (oldbinrhs
);
5138 gimple_set_visited (stmt
, true);
5139 gimple_set_visited (binlhs
, true);
5140 gimple_set_visited (binrhs
, true);
5142 /* Tail recurse on the new rhs if it still needs reassociation. */
5143 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
5144 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5145 want to change the algorithm while converting to tuples. */
5146 linearize_expr (stmt
);
5149 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5150 it. Otherwise, return NULL. */
5153 get_single_immediate_use (tree lhs
)
5155 use_operand_p immuse
;
5158 if (TREE_CODE (lhs
) == SSA_NAME
5159 && single_imm_use (lhs
, &immuse
, &immusestmt
)
5160 && is_gimple_assign (immusestmt
))
5166 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5167 representing the negated value. Insertions of any necessary
5168 instructions go before GSI.
5169 This function is recursive in that, if you hand it "a_5" as the
5170 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5171 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5174 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
5176 gimple
*negatedefstmt
= NULL
;
5177 tree resultofnegate
;
5178 gimple_stmt_iterator gsi
;
5181 /* If we are trying to negate a name, defined by an add, negate the
5182 add operands instead. */
5183 if (TREE_CODE (tonegate
) == SSA_NAME
)
5184 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
5185 if (TREE_CODE (tonegate
) == SSA_NAME
5186 && is_gimple_assign (negatedefstmt
)
5187 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
5188 && has_single_use (gimple_assign_lhs (negatedefstmt
))
5189 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
5191 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
5192 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
5193 tree lhs
= gimple_assign_lhs (negatedefstmt
);
5196 gsi
= gsi_for_stmt (negatedefstmt
);
5197 rhs1
= negate_value (rhs1
, &gsi
);
5199 gsi
= gsi_for_stmt (negatedefstmt
);
5200 rhs2
= negate_value (rhs2
, &gsi
);
5202 gsi
= gsi_for_stmt (negatedefstmt
);
5203 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5204 gimple_set_visited (negatedefstmt
, true);
5205 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
5206 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
5207 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5211 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
5212 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
5213 NULL_TREE
, true, GSI_SAME_STMT
);
5215 uid
= gimple_uid (gsi_stmt (gsi
));
5216 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
5218 gimple
*stmt
= gsi_stmt (gsi
);
5219 if (gimple_uid (stmt
) != 0)
5221 gimple_set_uid (stmt
, uid
);
5223 return resultofnegate
;
5226 /* Return true if we should break up the subtract in STMT into an add
5227 with negate. This is true when we the subtract operands are really
5228 adds, or the subtract itself is used in an add expression. In
5229 either case, breaking up the subtract into an add with negate
5230 exposes the adds to reassociation. */
5233 should_break_up_subtract (gimple
*stmt
)
5235 tree lhs
= gimple_assign_lhs (stmt
);
5236 tree binlhs
= gimple_assign_rhs1 (stmt
);
5237 tree binrhs
= gimple_assign_rhs2 (stmt
);
5239 class loop
*loop
= loop_containing_stmt (stmt
);
5241 if (TREE_CODE (binlhs
) == SSA_NAME
5242 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
5245 if (TREE_CODE (binrhs
) == SSA_NAME
5246 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
5249 if (TREE_CODE (lhs
) == SSA_NAME
5250 && (immusestmt
= get_single_immediate_use (lhs
))
5251 && is_gimple_assign (immusestmt
)
5252 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
5253 || (gimple_assign_rhs_code (immusestmt
) == MINUS_EXPR
5254 && gimple_assign_rhs1 (immusestmt
) == lhs
)
5255 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
5260 /* Transform STMT from A - B into A + -B. */
5263 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
5265 tree rhs1
= gimple_assign_rhs1 (stmt
);
5266 tree rhs2
= gimple_assign_rhs2 (stmt
);
5268 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5270 fprintf (dump_file
, "Breaking up subtract ");
5271 print_gimple_stmt (dump_file
, stmt
, 0);
5274 rhs2
= negate_value (rhs2
, gsip
);
5275 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
5279 /* Determine whether STMT is a builtin call that raises an SSA name
5280 to an integer power and has only one use. If so, and this is early
5281 reassociation and unsafe math optimizations are permitted, place
5282 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
5283 If any of these conditions does not hold, return FALSE. */
5286 acceptable_pow_call (gcall
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
5289 REAL_VALUE_TYPE c
, cint
;
5291 switch (gimple_call_combined_fn (stmt
))
5294 if (flag_errno_math
)
5297 *base
= gimple_call_arg (stmt
, 0);
5298 arg1
= gimple_call_arg (stmt
, 1);
5300 if (TREE_CODE (arg1
) != REAL_CST
)
5303 c
= TREE_REAL_CST (arg1
);
5305 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
5308 *exponent
= real_to_integer (&c
);
5309 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
5310 if (!real_identical (&c
, &cint
))
5316 *base
= gimple_call_arg (stmt
, 0);
5317 arg1
= gimple_call_arg (stmt
, 1);
5319 if (!tree_fits_shwi_p (arg1
))
5322 *exponent
= tree_to_shwi (arg1
);
5329 /* Expanding negative exponents is generally unproductive, so we don't
5330 complicate matters with those. Exponents of zero and one should
5331 have been handled by expression folding. */
5332 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
5338 /* Try to derive and add operand entry for OP to *OPS. Return false if
5342 try_special_add_to_ops (vec
<operand_entry
*> *ops
,
5343 enum tree_code code
,
5344 tree op
, gimple
* def_stmt
)
5346 tree base
= NULL_TREE
;
5347 HOST_WIDE_INT exponent
= 0;
5349 if (TREE_CODE (op
) != SSA_NAME
5350 || ! has_single_use (op
))
5353 if (code
== MULT_EXPR
5354 && reassoc_insert_powi_p
5355 && flag_unsafe_math_optimizations
5356 && is_gimple_call (def_stmt
)
5357 && acceptable_pow_call (as_a
<gcall
*> (def_stmt
), &base
, &exponent
))
5359 add_repeat_to_ops_vec (ops
, base
, exponent
);
5360 gimple_set_visited (def_stmt
, true);
5363 else if (code
== MULT_EXPR
5364 && is_gimple_assign (def_stmt
)
5365 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
5366 && !HONOR_SNANS (TREE_TYPE (op
))
5367 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op
))
5368 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op
))))
5370 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
5371 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
5372 add_to_ops_vec (ops
, rhs1
);
5373 add_to_ops_vec (ops
, cst
);
5374 gimple_set_visited (def_stmt
, true);
5381 /* Recursively linearize a binary expression that is the RHS of STMT.
5382 Place the operands of the expression tree in the vector named OPS. */
5385 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
5386 bool is_associative
, bool set_visited
)
5388 tree binlhs
= gimple_assign_rhs1 (stmt
);
5389 tree binrhs
= gimple_assign_rhs2 (stmt
);
5390 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
5391 bool binlhsisreassoc
= false;
5392 bool binrhsisreassoc
= false;
5393 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5394 class loop
*loop
= loop_containing_stmt (stmt
);
5397 gimple_set_visited (stmt
, true);
5399 if (TREE_CODE (binlhs
) == SSA_NAME
)
5401 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
5402 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
5403 && !stmt_could_throw_p (cfun
, binlhsdef
));
5406 if (TREE_CODE (binrhs
) == SSA_NAME
)
5408 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
5409 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
5410 && !stmt_could_throw_p (cfun
, binrhsdef
));
5413 /* If the LHS is not reassociable, but the RHS is, we need to swap
5414 them. If neither is reassociable, there is nothing we can do, so
5415 just put them in the ops vector. If the LHS is reassociable,
5416 linearize it. If both are reassociable, then linearize the RHS
5419 if (!binlhsisreassoc
)
5421 /* If this is not a associative operation like division, give up. */
5422 if (!is_associative
)
5424 add_to_ops_vec (ops
, binrhs
);
5428 if (!binrhsisreassoc
)
5430 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5431 add_to_ops_vec (ops
, binrhs
);
5433 if (!try_special_add_to_ops (ops
, rhscode
, binlhs
, binlhsdef
))
5434 add_to_ops_vec (ops
, binlhs
);
5439 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5441 fprintf (dump_file
, "swapping operands of ");
5442 print_gimple_stmt (dump_file
, stmt
, 0);
5445 swap_ssa_operands (stmt
,
5446 gimple_assign_rhs1_ptr (stmt
),
5447 gimple_assign_rhs2_ptr (stmt
));
5450 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5452 fprintf (dump_file
, " is now ");
5453 print_gimple_stmt (dump_file
, stmt
, 0);
5456 /* We want to make it so the lhs is always the reassociative op,
5458 std::swap (binlhs
, binrhs
);
5460 else if (binrhsisreassoc
)
5462 linearize_expr (stmt
);
5463 binlhs
= gimple_assign_rhs1 (stmt
);
5464 binrhs
= gimple_assign_rhs2 (stmt
);
5467 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
5468 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
5470 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
5471 is_associative
, set_visited
);
5473 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
5474 add_to_ops_vec (ops
, binrhs
);
5477 /* Repropagate the negates back into subtracts, since no other pass
5478 currently does it. */
5481 repropagate_negates (void)
5486 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
5488 gimple
*user
= get_single_immediate_use (negate
);
5490 if (!user
|| !is_gimple_assign (user
))
5493 /* The negate operand can be either operand of a PLUS_EXPR
5494 (it can be the LHS if the RHS is a constant for example).
5496 Force the negate operand to the RHS of the PLUS_EXPR, then
5497 transform the PLUS_EXPR into a MINUS_EXPR. */
5498 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
5500 /* If the negated operand appears on the LHS of the
5501 PLUS_EXPR, exchange the operands of the PLUS_EXPR
5502 to force the negated operand to the RHS of the PLUS_EXPR. */
5503 if (gimple_assign_rhs1 (user
) == negate
)
5505 swap_ssa_operands (user
,
5506 gimple_assign_rhs1_ptr (user
),
5507 gimple_assign_rhs2_ptr (user
));
5510 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
5511 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
5512 if (gimple_assign_rhs2 (user
) == negate
)
5514 tree rhs1
= gimple_assign_rhs1 (user
);
5515 tree rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
5516 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
5517 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
5521 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
5523 if (gimple_assign_rhs1 (user
) == negate
)
5528 which we transform into
5531 This pushes down the negate which we possibly can merge
5532 into some other operation, hence insert it into the
5533 plus_negates vector. */
5534 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
5535 tree a
= gimple_assign_rhs1 (feed
);
5536 tree b
= gimple_assign_rhs2 (user
);
5537 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
5538 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
5539 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
5540 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, a
, b
);
5541 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
5542 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
5543 user
= gsi_stmt (gsi2
);
5545 reassoc_remove_stmt (&gsi
);
5546 release_defs (feed
);
5547 plus_negates
.safe_push (gimple_assign_lhs (user
));
5551 /* Transform "x = -a; y = b - x" into "y = b + a", getting
5552 rid of one operation. */
5553 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
5554 tree a
= gimple_assign_rhs1 (feed
);
5555 tree rhs1
= gimple_assign_rhs1 (user
);
5556 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
5557 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
5558 update_stmt (gsi_stmt (gsi
));
5564 /* Returns true if OP is of a type for which we can do reassociation.
5565 That is for integral or non-saturating fixed-point types, and for
5566 floating point type when associative-math is enabled. */
5569 can_reassociate_p (tree op
)
5571 tree type
= TREE_TYPE (op
);
5572 if (TREE_CODE (op
) == SSA_NAME
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op
))
5574 if ((ANY_INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
5575 || NON_SAT_FIXED_POINT_TYPE_P (type
)
5576 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
5581 /* Break up subtract operations in block BB.
5583 We do this top down because we don't know whether the subtract is
5584 part of a possible chain of reassociation except at the top.
5593 we want to break up k = t - q, but we won't until we've transformed q
5594 = b - r, which won't be broken up until we transform b = c - d.
5596 En passant, clear the GIMPLE visited flag on every statement
5597 and set UIDs within each basic block. */
5600 break_up_subtract_bb (basic_block bb
)
5602 gimple_stmt_iterator gsi
;
5604 unsigned int uid
= 1;
5606 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
5608 gimple
*stmt
= gsi_stmt (gsi
);
5609 gimple_set_visited (stmt
, false);
5610 gimple_set_uid (stmt
, uid
++);
5612 if (!is_gimple_assign (stmt
)
5613 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
5616 /* Look for simple gimple subtract operations. */
5617 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
5619 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
5620 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
5623 /* Check for a subtract used only in an addition. If this
5624 is the case, transform it into add of a negate for better
5625 reassociation. IE transform C = A-B into C = A + -B if C
5626 is only used in an addition. */
5627 if (should_break_up_subtract (stmt
))
5628 break_up_subtract (stmt
, &gsi
);
5630 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
5631 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
5632 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
5634 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
5636 son
= next_dom_son (CDI_DOMINATORS
, son
))
5637 break_up_subtract_bb (son
);
5640 /* Used for repeated factor analysis. */
5641 struct repeat_factor
5643 /* An SSA name that occurs in a multiply chain. */
5646 /* Cached rank of the factor. */
5649 /* Number of occurrences of the factor in the chain. */
5650 HOST_WIDE_INT count
;
5652 /* An SSA name representing the product of this factor and
5653 all factors appearing later in the repeated factor vector. */
5658 static vec
<repeat_factor
> repeat_factor_vec
;
5660 /* Used for sorting the repeat factor vector. Sort primarily by
5661 ascending occurrence count, secondarily by descending rank. */
5664 compare_repeat_factors (const void *x1
, const void *x2
)
5666 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
5667 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
5669 if (rf1
->count
!= rf2
->count
)
5670 return rf1
->count
- rf2
->count
;
5672 return rf2
->rank
- rf1
->rank
;
5675 /* Look for repeated operands in OPS in the multiply tree rooted at
5676 STMT. Replace them with an optimal sequence of multiplies and powi
5677 builtin calls, and remove the used operands from OPS. Return an
5678 SSA name representing the value of the replacement sequence. */
5681 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
5683 unsigned i
, j
, vec_len
;
5686 repeat_factor
*rf1
, *rf2
;
5687 repeat_factor rfnew
;
5688 tree result
= NULL_TREE
;
5689 tree target_ssa
, iter_result
;
5690 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
5691 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
5692 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5693 gimple
*mul_stmt
, *pow_stmt
;
5695 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
5700 /* Allocate the repeated factor vector. */
5701 repeat_factor_vec
.create (10);
5703 /* Scan the OPS vector for all SSA names in the product and build
5704 up a vector of occurrence counts for each factor. */
5705 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
5707 if (TREE_CODE (oe
->op
) == SSA_NAME
)
5709 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5711 if (rf1
->factor
== oe
->op
)
5713 rf1
->count
+= oe
->count
;
5718 if (j
>= repeat_factor_vec
.length ())
5720 rfnew
.factor
= oe
->op
;
5721 rfnew
.rank
= oe
->rank
;
5722 rfnew
.count
= oe
->count
;
5723 rfnew
.repr
= NULL_TREE
;
5724 repeat_factor_vec
.safe_push (rfnew
);
5729 /* Sort the repeated factor vector by (a) increasing occurrence count,
5730 and (b) decreasing rank. */
5731 repeat_factor_vec
.qsort (compare_repeat_factors
);
5733 /* It is generally best to combine as many base factors as possible
5734 into a product before applying __builtin_powi to the result.
5735 However, the sort order chosen for the repeated factor vector
5736 allows us to cache partial results for the product of the base
5737 factors for subsequent use. When we already have a cached partial
5738 result from a previous iteration, it is best to make use of it
5739 before looking for another __builtin_pow opportunity.
5741 As an example, consider x * x * y * y * y * z * z * z * z.
5742 We want to first compose the product x * y * z, raise it to the
5743 second power, then multiply this by y * z, and finally multiply
5744 by z. This can be done in 5 multiplies provided we cache y * z
5745 for use in both expressions:
5753 If we instead ignored the cached y * z and first multiplied by
5754 the __builtin_pow opportunity z * z, we would get the inferior:
5763 vec_len
= repeat_factor_vec
.length ();
5765 /* Repeatedly look for opportunities to create a builtin_powi call. */
5768 HOST_WIDE_INT power
;
5770 /* First look for the largest cached product of factors from
5771 preceding iterations. If found, create a builtin_powi for
5772 it if the minimum occurrence count for its factors is at
5773 least 2, or just use this cached product as our next
5774 multiplicand if the minimum occurrence count is 1. */
5775 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5777 if (rf1
->repr
&& rf1
->count
> 0)
5787 iter_result
= rf1
->repr
;
5789 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5793 fputs ("Multiplying by cached product ", dump_file
);
5794 for (elt
= j
; elt
< vec_len
; elt
++)
5796 rf
= &repeat_factor_vec
[elt
];
5797 print_generic_expr (dump_file
, rf
->factor
);
5798 if (elt
< vec_len
- 1)
5799 fputs (" * ", dump_file
);
5801 fputs ("\n", dump_file
);
5806 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5807 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
5808 build_int_cst (integer_type_node
,
5810 gimple_call_set_lhs (pow_stmt
, iter_result
);
5811 gimple_set_location (pow_stmt
, gimple_location (stmt
));
5812 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
5813 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
5815 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5819 fputs ("Building __builtin_pow call for cached product (",
5821 for (elt
= j
; elt
< vec_len
; elt
++)
5823 rf
= &repeat_factor_vec
[elt
];
5824 print_generic_expr (dump_file
, rf
->factor
);
5825 if (elt
< vec_len
- 1)
5826 fputs (" * ", dump_file
);
5828 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
5835 /* Otherwise, find the first factor in the repeated factor
5836 vector whose occurrence count is at least 2. If no such
5837 factor exists, there are no builtin_powi opportunities
5839 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
5841 if (rf1
->count
>= 2)
5850 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5854 fputs ("Building __builtin_pow call for (", dump_file
);
5855 for (elt
= j
; elt
< vec_len
; elt
++)
5857 rf
= &repeat_factor_vec
[elt
];
5858 print_generic_expr (dump_file
, rf
->factor
);
5859 if (elt
< vec_len
- 1)
5860 fputs (" * ", dump_file
);
5862 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
5865 reassociate_stats
.pows_created
++;
5867 /* Visit each element of the vector in reverse order (so that
5868 high-occurrence elements are visited first, and within the
5869 same occurrence count, lower-ranked elements are visited
5870 first). Form a linear product of all elements in this order
5871 whose occurrencce count is at least that of element J.
5872 Record the SSA name representing the product of each element
5873 with all subsequent elements in the vector. */
5874 if (j
== vec_len
- 1)
5875 rf1
->repr
= rf1
->factor
;
5878 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
5882 rf1
= &repeat_factor_vec
[ii
];
5883 rf2
= &repeat_factor_vec
[ii
+ 1];
5885 /* Init the last factor's representative to be itself. */
5887 rf2
->repr
= rf2
->factor
;
5892 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5893 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
5895 gimple_set_location (mul_stmt
, gimple_location (stmt
));
5896 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
5897 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
5898 rf1
->repr
= target_ssa
;
5900 /* Don't reprocess the multiply we just introduced. */
5901 gimple_set_visited (mul_stmt
, true);
5905 /* Form a call to __builtin_powi for the maximum product
5906 just formed, raised to the power obtained earlier. */
5907 rf1
= &repeat_factor_vec
[j
];
5908 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5909 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
5910 build_int_cst (integer_type_node
,
5912 gimple_call_set_lhs (pow_stmt
, iter_result
);
5913 gimple_set_location (pow_stmt
, gimple_location (stmt
));
5914 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
5915 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
5918 /* If we previously formed at least one other builtin_powi call,
5919 form the product of this one and those others. */
5922 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
5923 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
5924 result
, iter_result
);
5925 gimple_set_location (mul_stmt
, gimple_location (stmt
));
5926 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
5927 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
5928 gimple_set_visited (mul_stmt
, true);
5929 result
= new_result
;
5932 result
= iter_result
;
5934 /* Decrement the occurrence count of each element in the product
5935 by the count found above, and remove this many copies of each
5937 for (i
= j
; i
< vec_len
; i
++)
5942 rf1
= &repeat_factor_vec
[i
];
5943 rf1
->count
-= power
;
5945 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
5947 if (oe
->op
== rf1
->factor
)
5951 ops
->ordered_remove (n
);
5967 /* At this point all elements in the repeated factor vector have a
5968 remaining occurrence count of 0 or 1, and those with a count of 1
5969 don't have cached representatives. Re-sort the ops vector and
5971 ops
->qsort (sort_by_operand_rank
);
5972 repeat_factor_vec
.release ();
5974 /* Return the final product computed herein. Note that there may
5975 still be some elements with single occurrence count left in OPS;
5976 those will be handled by the normal reassociation logic. */
5980 /* Attempt to optimize
5981 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5982 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5985 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
5989 unsigned int length
= ops
->length ();
5990 tree cst
= ops
->last ()->op
;
5992 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
5995 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
5997 if (TREE_CODE (oe
->op
) == SSA_NAME
5998 && has_single_use (oe
->op
))
6000 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6001 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
6004 switch (gimple_call_combined_fn (old_call
))
6007 CASE_CFN_COPYSIGN_FN
:
6008 arg0
= gimple_call_arg (old_call
, 0);
6009 arg1
= gimple_call_arg (old_call
, 1);
6010 /* The first argument of copysign must be a constant,
6011 otherwise there's nothing to do. */
6012 if (TREE_CODE (arg0
) == REAL_CST
)
6014 tree type
= TREE_TYPE (arg0
);
6015 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
6016 /* If we couldn't fold to a single constant, skip it.
6017 That happens e.g. for inexact multiplication when
6019 if (mul
== NULL_TREE
)
6021 /* Instead of adjusting OLD_CALL, let's build a new
6022 call to not leak the LHS and prevent keeping bogus
6023 debug statements. DCE will clean up the old call. */
6025 if (gimple_call_internal_p (old_call
))
6026 new_call
= gimple_build_call_internal
6027 (IFN_COPYSIGN
, 2, mul
, arg1
);
6029 new_call
= gimple_build_call
6030 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
6031 tree lhs
= make_ssa_name (type
);
6032 gimple_call_set_lhs (new_call
, lhs
);
6033 gimple_set_location (new_call
,
6034 gimple_location (old_call
));
6035 insert_stmt_after (new_call
, old_call
);
6036 /* We've used the constant, get rid of it. */
6038 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
6039 /* Handle the CST1 < 0 case by negating the result. */
6042 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
6044 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
6045 insert_stmt_after (negate_stmt
, new_call
);
6050 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6052 fprintf (dump_file
, "Optimizing copysign: ");
6053 print_generic_expr (dump_file
, cst
);
6054 fprintf (dump_file
, " * COPYSIGN (");
6055 print_generic_expr (dump_file
, arg0
);
6056 fprintf (dump_file
, ", ");
6057 print_generic_expr (dump_file
, arg1
);
6058 fprintf (dump_file
, ") into %sCOPYSIGN (",
6059 cst1_neg
? "-" : "");
6060 print_generic_expr (dump_file
, mul
);
6061 fprintf (dump_file
, ", ");
6062 print_generic_expr (dump_file
, arg1
);
6063 fprintf (dump_file
, "\n");
6076 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6079 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
6083 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6085 fprintf (dump_file
, "Transforming ");
6086 print_gimple_stmt (dump_file
, stmt
, 0);
6089 rhs1
= gimple_assign_rhs1 (stmt
);
6090 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
6092 remove_visited_stmt_chain (rhs1
);
6094 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6096 fprintf (dump_file
, " into ");
6097 print_gimple_stmt (dump_file
, stmt
, 0);
6101 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6104 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
6105 tree rhs1
, tree rhs2
)
6107 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6109 fprintf (dump_file
, "Transforming ");
6110 print_gimple_stmt (dump_file
, stmt
, 0);
6113 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
6114 update_stmt (gsi_stmt (*gsi
));
6115 remove_visited_stmt_chain (rhs1
);
6117 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6119 fprintf (dump_file
, " into ");
6120 print_gimple_stmt (dump_file
, stmt
, 0);
6124 /* Reassociate expressions in basic block BB and its post-dominator as
6127 Bubble up return status from maybe_optimize_range_tests. */
6130 reassociate_bb (basic_block bb
)
6132 gimple_stmt_iterator gsi
;
6134 gimple
*stmt
= last_stmt (bb
);
6135 bool cfg_cleanup_needed
= false;
6137 if (stmt
&& !gimple_visited_p (stmt
))
6138 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
6140 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
6142 stmt
= gsi_stmt (gsi
);
6144 if (is_gimple_assign (stmt
)
6145 && !stmt_could_throw_p (cfun
, stmt
))
6147 tree lhs
, rhs1
, rhs2
;
6148 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
6150 /* If this was part of an already processed statement,
6151 we don't need to touch it again. */
6152 if (gimple_visited_p (stmt
))
6154 /* This statement might have become dead because of previous
6156 if (has_zero_uses (gimple_get_lhs (stmt
)))
6158 reassoc_remove_stmt (&gsi
);
6159 release_defs (stmt
);
6160 /* We might end up removing the last stmt above which
6161 places the iterator to the end of the sequence.
6162 Reset it to the last stmt in this case which might
6163 be the end of the sequence as well if we removed
6164 the last statement of the sequence. In which case
6165 we need to bail out. */
6166 if (gsi_end_p (gsi
))
6168 gsi
= gsi_last_bb (bb
);
6169 if (gsi_end_p (gsi
))
6176 /* If this is not a gimple binary expression, there is
6177 nothing for us to do with it. */
6178 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
6181 lhs
= gimple_assign_lhs (stmt
);
6182 rhs1
= gimple_assign_rhs1 (stmt
);
6183 rhs2
= gimple_assign_rhs2 (stmt
);
6185 /* For non-bit or min/max operations we can't associate
6186 all types. Verify that here. */
6187 if (rhs_code
!= BIT_IOR_EXPR
6188 && rhs_code
!= BIT_AND_EXPR
6189 && rhs_code
!= BIT_XOR_EXPR
6190 && rhs_code
!= MIN_EXPR
6191 && rhs_code
!= MAX_EXPR
6192 && (!can_reassociate_p (lhs
)
6193 || !can_reassociate_p (rhs1
)
6194 || !can_reassociate_p (rhs2
)))
6197 if (associative_tree_code (rhs_code
))
6199 auto_vec
<operand_entry
*> ops
;
6200 tree powi_result
= NULL_TREE
;
6201 bool is_vector
= VECTOR_TYPE_P (TREE_TYPE (lhs
));
6203 /* There may be no immediate uses left by the time we
6204 get here because we may have eliminated them all. */
6205 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
6208 gimple_set_visited (stmt
, true);
6209 linearize_expr_tree (&ops
, stmt
, true, true);
6210 ops
.qsort (sort_by_operand_rank
);
6211 int orig_len
= ops
.length ();
6212 optimize_ops_list (rhs_code
, &ops
);
6213 if (undistribute_ops_list (rhs_code
, &ops
,
6214 loop_containing_stmt (stmt
)))
6216 ops
.qsort (sort_by_operand_rank
);
6217 optimize_ops_list (rhs_code
, &ops
);
6219 if (undistribute_bitref_for_vector (rhs_code
, &ops
,
6220 loop_containing_stmt (stmt
)))
6222 ops
.qsort (sort_by_operand_rank
);
6223 optimize_ops_list (rhs_code
, &ops
);
6225 if (rhs_code
== PLUS_EXPR
6226 && transform_add_to_multiply (&ops
))
6227 ops
.qsort (sort_by_operand_rank
);
6229 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
6232 optimize_vec_cond_expr (rhs_code
, &ops
);
6234 optimize_range_tests (rhs_code
, &ops
, NULL
);
6237 if (rhs_code
== MULT_EXPR
&& !is_vector
)
6239 attempt_builtin_copysign (&ops
);
6241 if (reassoc_insert_powi_p
6242 && flag_unsafe_math_optimizations
)
6243 powi_result
= attempt_builtin_powi (stmt
, &ops
);
6246 operand_entry
*last
;
6247 bool negate_result
= false;
6248 if (ops
.length () > 1
6249 && rhs_code
== MULT_EXPR
)
6252 if ((integer_minus_onep (last
->op
)
6253 || real_minus_onep (last
->op
))
6254 && !HONOR_SNANS (TREE_TYPE (lhs
))
6255 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs
))
6256 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs
))))
6259 negate_result
= true;
6264 /* If the operand vector is now empty, all operands were
6265 consumed by the __builtin_powi optimization. */
6266 if (ops
.length () == 0)
6267 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
6268 else if (ops
.length () == 1)
6270 tree last_op
= ops
.last ()->op
;
6272 /* If the stmt that defines operand has to be inserted, insert it
6274 if (ops
.last ()->stmt_to_insert
)
6275 insert_stmt_before_use (stmt
, ops
.last ()->stmt_to_insert
);
6277 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
6280 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
6284 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
6285 int ops_num
= ops
.length ();
6288 /* For binary bit operations, if there are at least 3
6289 operands and the last last operand in OPS is a constant,
6290 move it to the front. This helps ensure that we generate
6291 (X & Y) & C rather than (X & C) & Y. The former will
6292 often match a canonical bit test when we get to RTL. */
6293 if (ops
.length () > 2
6294 && (rhs_code
== BIT_AND_EXPR
6295 || rhs_code
== BIT_IOR_EXPR
6296 || rhs_code
== BIT_XOR_EXPR
)
6297 && TREE_CODE (ops
.last ()->op
) == INTEGER_CST
)
6298 std::swap (*ops
[0], *ops
[ops_num
- 1]);
6300 /* Only rewrite the expression tree to parallel in the
6301 last reassoc pass to avoid useless work back-and-forth
6302 with initial linearization. */
6303 if (!reassoc_insert_powi_p
6304 && ops
.length () > 3
6305 && (width
= get_reassociation_width (ops_num
, rhs_code
,
6308 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6310 "Width = %d was chosen for reassociation\n",
6312 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
6317 /* When there are three operands left, we want
6318 to make sure the ones that get the double
6319 binary op are chosen wisely. */
6320 int len
= ops
.length ();
6322 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
6324 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
6330 /* If we combined some repeated factors into a
6331 __builtin_powi call, multiply that result by the
6332 reassociated operands. */
6335 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
6336 tree type
= TREE_TYPE (lhs
);
6337 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
6339 gimple_set_lhs (lhs_stmt
, target_ssa
);
6340 update_stmt (lhs_stmt
);
6343 target_ssa
= new_lhs
;
6346 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
6347 powi_result
, target_ssa
);
6348 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6349 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6350 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
6356 stmt
= SSA_NAME_DEF_STMT (lhs
);
6357 tree tmp
= make_ssa_name (TREE_TYPE (lhs
));
6358 gimple_set_lhs (stmt
, tmp
);
6361 gassign
*neg_stmt
= gimple_build_assign (lhs
, NEGATE_EXPR
,
6363 gimple_set_uid (neg_stmt
, gimple_uid (stmt
));
6364 gsi_insert_after (&gsi
, neg_stmt
, GSI_NEW_STMT
);
6370 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
6372 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
6373 cfg_cleanup_needed
|= reassociate_bb (son
);
6375 return cfg_cleanup_needed
;
6378 /* Add jumps around shifts for range tests turned into bit tests.
6379 For each SSA_NAME VAR we have code like:
6380 VAR = ...; // final stmt of range comparison
6381 // bit test here...;
6382 OTHERVAR = ...; // final stmt of the bit test sequence
6383 RES = VAR | OTHERVAR;
6384 Turn the above into:
6391 // bit test here...;
6394 # RES = PHI<1(l1), OTHERVAR(l2)>; */
6402 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
6404 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
6407 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
6409 && is_gimple_assign (use_stmt
)
6410 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
6411 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
6413 basic_block cond_bb
= gimple_bb (def_stmt
);
6414 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
6415 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
6417 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
6418 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
6419 build_zero_cst (TREE_TYPE (var
)),
6420 NULL_TREE
, NULL_TREE
);
6421 location_t loc
= gimple_location (use_stmt
);
6422 gimple_set_location (g
, loc
);
6423 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
6425 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
6426 etrue
->probability
= profile_probability::even ();
6427 edge efalse
= find_edge (cond_bb
, then_bb
);
6428 efalse
->flags
= EDGE_FALSE_VALUE
;
6429 efalse
->probability
-= etrue
->probability
;
6430 then_bb
->count
-= etrue
->count ();
6432 tree othervar
= NULL_TREE
;
6433 if (gimple_assign_rhs1 (use_stmt
) == var
)
6434 othervar
= gimple_assign_rhs2 (use_stmt
);
6435 else if (gimple_assign_rhs2 (use_stmt
) == var
)
6436 othervar
= gimple_assign_rhs1 (use_stmt
);
6439 tree lhs
= gimple_assign_lhs (use_stmt
);
6440 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
6441 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
6442 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
6443 gsi
= gsi_for_stmt (use_stmt
);
6444 gsi_remove (&gsi
, true);
6446 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
6447 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
6449 reassoc_branch_fixups
.release ();
6452 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
6453 void debug_ops_vector (vec
<operand_entry
*> ops
);
6455 /* Dump the operand entry vector OPS to FILE. */
6458 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
6463 FOR_EACH_VEC_ELT (ops
, i
, oe
)
6465 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
6466 print_generic_expr (file
, oe
->op
);
6467 fprintf (file
, "\n");
6471 /* Dump the operand entry vector OPS to STDERR. */
6474 debug_ops_vector (vec
<operand_entry
*> ops
)
6476 dump_ops_vector (stderr
, ops
);
6479 /* Bubble up return status from reassociate_bb. */
6484 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
6485 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
6488 /* Initialize the reassociation pass. */
6495 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
6497 /* Find the loops, so that we can prevent moving calculations in
6499 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
6501 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
6503 next_operand_entry_id
= 0;
6505 /* Reverse RPO (Reverse Post Order) will give us something where
6506 deeper loops come later. */
6507 pre_and_rev_post_order_compute (NULL
, bbs
, false);
6508 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
6509 operand_rank
= new hash_map
<tree
, long>;
6511 /* Give each default definition a distinct rank. This includes
6512 parameters and the static chain. Walk backwards over all
6513 SSA names so that we get proper rank ordering according
6514 to tree_swap_operands_p. */
6515 for (i
= num_ssa_names
- 1; i
> 0; --i
)
6517 tree name
= ssa_name (i
);
6518 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
6519 insert_operand_rank (name
, ++rank
);
6522 /* Set up rank for each BB */
6523 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
6524 bb_rank
[bbs
[i
]] = ++rank
<< 16;
6527 calculate_dominance_info (CDI_POST_DOMINATORS
);
6528 plus_negates
= vNULL
;
6531 /* Cleanup after the reassociation pass, and print stats if
6537 statistics_counter_event (cfun
, "Linearized",
6538 reassociate_stats
.linearized
);
6539 statistics_counter_event (cfun
, "Constants eliminated",
6540 reassociate_stats
.constants_eliminated
);
6541 statistics_counter_event (cfun
, "Ops eliminated",
6542 reassociate_stats
.ops_eliminated
);
6543 statistics_counter_event (cfun
, "Statements rewritten",
6544 reassociate_stats
.rewritten
);
6545 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
6546 reassociate_stats
.pows_encountered
);
6547 statistics_counter_event (cfun
, "Built-in powi calls created",
6548 reassociate_stats
.pows_created
);
6550 delete operand_rank
;
6551 operand_entry_pool
.release ();
6553 plus_negates
.release ();
6554 free_dominance_info (CDI_POST_DOMINATORS
);
6555 loop_optimizer_finalize ();
6558 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
6559 insertion of __builtin_powi calls.
6561 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
6562 optimization of a gimple conditional. Otherwise returns zero. */
6565 execute_reassoc (bool insert_powi_p
)
6567 reassoc_insert_powi_p
= insert_powi_p
;
6571 bool cfg_cleanup_needed
;
6572 cfg_cleanup_needed
= do_reassoc ();
6573 repropagate_negates ();
6577 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
6582 const pass_data pass_data_reassoc
=
6584 GIMPLE_PASS
, /* type */
6585 "reassoc", /* name */
6586 OPTGROUP_NONE
, /* optinfo_flags */
6587 TV_TREE_REASSOC
, /* tv_id */
6588 ( PROP_cfg
| PROP_ssa
), /* properties_required */
6589 0, /* properties_provided */
6590 0, /* properties_destroyed */
6591 0, /* todo_flags_start */
6592 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
6595 class pass_reassoc
: public gimple_opt_pass
6598 pass_reassoc (gcc::context
*ctxt
)
6599 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
6602 /* opt_pass methods: */
6603 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
6604 void set_pass_param (unsigned int n
, bool param
)
6606 gcc_assert (n
== 0);
6607 insert_powi_p
= param
;
6609 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
6610 virtual unsigned int execute (function
*)
6611 { return execute_reassoc (insert_powi_p
); }
6614 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
6615 point 3a in the pass header comment. */
6617 }; // class pass_reassoc
6622 make_pass_reassoc (gcc::context
*ctxt
)
6624 return new pass_reassoc (ctxt
);