1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
24 #include "hash-table.h"
29 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-inline.h"
34 #include "tree-ssa-alias.h"
35 #include "internal-fn.h"
36 #include "gimple-fold.h"
38 #include "gimple-expr.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "gimple-ssa.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
54 #include "tree-iterator.h"
55 #include "tree-pass.h"
56 #include "alloc-pool.h"
57 #include "langhooks.h"
62 #include "diagnostic-core.h"
65 /* This is a simple global reassociation pass. It is, in part, based
66 on the LLVM pass of the same name (They do some things more/less
67 than we do, in different orders, etc).
69 It consists of five steps:
71 1. Breaking up subtract operations into addition + negate, where
72 it would promote the reassociation of adds.
74 2. Left linearization of the expression trees, so that (A+B)+(C+D)
75 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
76 During linearization, we place the operands of the binary
77 expressions into a vector of operand_entry_t
79 3. Optimization of the operand lists, eliminating things like a +
82 3a. Combine repeated factors with the same occurrence counts
83 into a __builtin_powi call that will later be optimized into
84 an optimal number of multiplies.
86 4. Rewrite the expression trees we linearized and optimized so
87 they are in proper rank order.
89 5. Repropagate negates, as nothing else will clean it up ATM.
91 A bit of theory on #4, since nobody seems to write anything down
92 about why it makes sense to do it the way they do it:
94 We could do this much nicer theoretically, but don't (for reasons
95 explained after how to do it theoretically nice :P).
97 In order to promote the most redundancy elimination, you want
98 binary expressions whose operands are the same rank (or
99 preferably, the same value) exposed to the redundancy eliminator,
100 for possible elimination.
102 So the way to do this if we really cared, is to build the new op
103 tree from the leaves to the roots, merging as you go, and putting the
104 new op on the end of the worklist, until you are left with one
105 thing on the worklist.
107 IE if you have to rewrite the following set of operands (listed with
108 rank in parentheses), with opcode PLUS_EXPR:
110 a (1), b (1), c (1), d (2), e (2)
113 We start with our merge worklist empty, and the ops list with all of
116 You want to first merge all leaves of the same rank, as much as
119 So first build a binary op of
121 mergetmp = a + b, and put "mergetmp" on the merge worklist.
123 Because there is no three operand form of PLUS_EXPR, c is not going to
124 be exposed to redundancy elimination as a rank 1 operand.
126 So you might as well throw it on the merge worklist (you could also
127 consider it to now be a rank two operand, and merge it with d and e,
128 but in this case, you then have evicted e from a binary op. So at
129 least in this situation, you can't win.)
131 Then build a binary op of d + e
134 and put mergetmp2 on the merge worklist.
136 so merge worklist = {mergetmp, c, mergetmp2}
138 Continue building binary ops of these operations until you have only
139 one operation left on the worklist.
144 mergetmp3 = mergetmp + c
146 worklist = {mergetmp2, mergetmp3}
148 mergetmp4 = mergetmp2 + mergetmp3
150 worklist = {mergetmp4}
152 because we have one operation left, we can now just set the original
153 statement equal to the result of that operation.
155 This will at least expose a + b and d + e to redundancy elimination
156 as binary operations.
158 For extra points, you can reuse the old statements to build the
159 mergetmps, since you shouldn't run out.
161 So why don't we do this?
163 Because it's expensive, and rarely will help. Most trees we are
164 reassociating have 3 or less ops. If they have 2 ops, they already
165 will be written into a nice single binary op. If you have 3 ops, a
166 single simple check suffices to tell you whether the first two are of the
167 same rank. If so, you know to order it
170 newstmt = mergetmp + op3
174 newstmt = mergetmp + op1
176 If all three are of the same rank, you can't expose them all in a
177 single binary operator anyway, so the above is *still* the best you
180 Thus, this is what we do. When we have three ops left, we check to see
181 what order to put them in, and call it a day. As a nod to vector sum
182 reduction, we check if any of the ops are really a phi node that is a
183 destructive update for the associating op, and keep the destructive
184 update together for vector sum reduction recognition. */
191 int constants_eliminated
;
194 int pows_encountered
;
198 /* Operator, rank pair. */
199 typedef struct operand_entry
207 static alloc_pool operand_entry_pool
;
209 /* This is used to assign a unique ID to each struct operand_entry
210 so that qsort results are identical on different hosts. */
211 static int next_operand_entry_id
;
213 /* Starting rank number for a given basic block, so that we can rank
214 operations using unmovable instructions in that BB based on the bb
216 static long *bb_rank
;
218 /* Operand->rank hashtable. */
219 static hash_map
<tree
, long> *operand_rank
;
222 static long get_rank (tree
);
223 static bool reassoc_stmt_dominates_stmt_p (gimple
, gimple
);
225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
226 possibly added by gsi_remove. */
229 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
231 gimple stmt
= gsi_stmt (*gsi
);
233 if (!MAY_HAVE_DEBUG_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
234 return gsi_remove (gsi
, true);
236 gimple_stmt_iterator prev
= *gsi
;
238 unsigned uid
= gimple_uid (stmt
);
239 basic_block bb
= gimple_bb (stmt
);
240 bool ret
= gsi_remove (gsi
, true);
241 if (!gsi_end_p (prev
))
244 prev
= gsi_start_bb (bb
);
245 gimple end_stmt
= gsi_stmt (*gsi
);
246 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
248 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
249 gimple_set_uid (stmt
, uid
);
255 /* Bias amount for loop-carried phis. We want this to be larger than
256 the depth of any reassociation tree we can see, but not larger than
257 the rank difference between two blocks. */
258 #define PHI_LOOP_BIAS (1 << 15)
260 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
261 an innermost loop, and the phi has only a single use which is inside
262 the loop, then the rank is the block rank of the loop latch plus an
263 extra bias for the loop-carried dependence. This causes expressions
264 calculated into an accumulator variable to be independent for each
265 iteration of the loop. If STMT is some other phi, the rank is the
266 block rank of its containing block. */
268 phi_rank (gimple stmt
)
270 basic_block bb
= gimple_bb (stmt
);
271 struct loop
*father
= bb
->loop_father
;
277 /* We only care about real loops (those with a latch). */
279 return bb_rank
[bb
->index
];
281 /* Interesting phis must be in headers of innermost loops. */
282 if (bb
!= father
->header
284 return bb_rank
[bb
->index
];
286 /* Ignore virtual SSA_NAMEs. */
287 res
= gimple_phi_result (stmt
);
288 if (virtual_operand_p (res
))
289 return bb_rank
[bb
->index
];
291 /* The phi definition must have a single use, and that use must be
292 within the loop. Otherwise this isn't an accumulator pattern. */
293 if (!single_imm_use (res
, &use
, &use_stmt
)
294 || gimple_bb (use_stmt
)->loop_father
!= father
)
295 return bb_rank
[bb
->index
];
297 /* Look for phi arguments from within the loop. If found, bias this phi. */
298 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
300 tree arg
= gimple_phi_arg_def (stmt
, i
);
301 if (TREE_CODE (arg
) == SSA_NAME
302 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
304 gimple def_stmt
= SSA_NAME_DEF_STMT (arg
);
305 if (gimple_bb (def_stmt
)->loop_father
== father
)
306 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
310 /* Must be an uninteresting phi. */
311 return bb_rank
[bb
->index
];
314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
315 loop-carried dependence of an innermost loop, return TRUE; else
318 loop_carried_phi (tree exp
)
323 if (TREE_CODE (exp
) != SSA_NAME
324 || SSA_NAME_IS_DEFAULT_DEF (exp
))
327 phi_stmt
= SSA_NAME_DEF_STMT (exp
);
329 if (gimple_code (SSA_NAME_DEF_STMT (exp
)) != GIMPLE_PHI
)
332 /* Non-loop-carried phis have block rank. Loop-carried phis have
333 an additional bias added in. If this phi doesn't have block rank,
334 it's biased and should not be propagated. */
335 block_rank
= bb_rank
[gimple_bb (phi_stmt
)->index
];
337 if (phi_rank (phi_stmt
) != block_rank
)
343 /* Return the maximum of RANK and the rank that should be propagated
344 from expression OP. For most operands, this is just the rank of OP.
345 For loop-carried phis, the value is zero to avoid undoing the bias
346 in favor of the phi. */
348 propagate_rank (long rank
, tree op
)
352 if (loop_carried_phi (op
))
355 op_rank
= get_rank (op
);
357 return MAX (rank
, op_rank
);
360 /* Look up the operand rank structure for expression E. */
363 find_operand_rank (tree e
)
365 long *slot
= operand_rank
->get (e
);
366 return slot
? *slot
: -1;
369 /* Insert {E,RANK} into the operand rank hashtable. */
372 insert_operand_rank (tree e
, long rank
)
374 gcc_assert (rank
> 0);
375 gcc_assert (!operand_rank
->put (e
, rank
));
378 /* Given an expression E, return the rank of the expression. */
383 /* Constants have rank 0. */
384 if (is_gimple_min_invariant (e
))
387 /* SSA_NAME's have the rank of the expression they are the result
389 For globals and uninitialized values, the rank is 0.
390 For function arguments, use the pre-setup rank.
391 For PHI nodes, stores, asm statements, etc, we use the rank of
393 For simple operations, the rank is the maximum rank of any of
394 its operands, or the bb_rank, whichever is less.
395 I make no claims that this is optimal, however, it gives good
398 /* We make an exception to the normal ranking system to break
399 dependences of accumulator variables in loops. Suppose we
400 have a simple one-block loop containing:
407 As shown, each iteration of the calculation into x is fully
408 dependent upon the iteration before it. We would prefer to
409 see this in the form:
416 If the loop is unrolled, the calculations of b and c from
417 different iterations can be interleaved.
419 To obtain this result during reassociation, we bias the rank
420 of the phi definition x_1 upward, when it is recognized as an
421 accumulator pattern. The artificial rank causes it to be
422 added last, providing the desired independence. */
424 if (TREE_CODE (e
) == SSA_NAME
)
431 if (SSA_NAME_IS_DEFAULT_DEF (e
))
432 return find_operand_rank (e
);
434 stmt
= SSA_NAME_DEF_STMT (e
);
435 if (gimple_code (stmt
) == GIMPLE_PHI
)
436 return phi_rank (stmt
);
438 if (!is_gimple_assign (stmt
)
439 || gimple_vdef (stmt
))
440 return bb_rank
[gimple_bb (stmt
)->index
];
442 /* If we already have a rank for this expression, use that. */
443 rank
= find_operand_rank (e
);
447 /* Otherwise, find the maximum rank for the operands. As an
448 exception, remove the bias from loop-carried phis when propagating
449 the rank so that dependent operations are not also biased. */
451 if (gimple_assign_single_p (stmt
))
453 tree rhs
= gimple_assign_rhs1 (stmt
);
454 n
= TREE_OPERAND_LENGTH (rhs
);
456 rank
= propagate_rank (rank
, rhs
);
459 for (i
= 0; i
< n
; i
++)
461 op
= TREE_OPERAND (rhs
, i
);
464 rank
= propagate_rank (rank
, op
);
470 n
= gimple_num_ops (stmt
);
471 for (i
= 1; i
< n
; i
++)
473 op
= gimple_op (stmt
, i
);
475 rank
= propagate_rank (rank
, op
);
479 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
481 fprintf (dump_file
, "Rank for ");
482 print_generic_expr (dump_file
, e
, 0);
483 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
486 /* Note the rank in the hashtable so we don't recompute it. */
487 insert_operand_rank (e
, (rank
+ 1));
491 /* Globals, etc, are rank 0 */
496 /* We want integer ones to end up last no matter what, since they are
497 the ones we can do the most with. */
498 #define INTEGER_CONST_TYPE 1 << 3
499 #define FLOAT_CONST_TYPE 1 << 2
500 #define OTHER_CONST_TYPE 1 << 1
502 /* Classify an invariant tree into integer, float, or other, so that
503 we can sort them to be near other constants of the same type. */
505 constant_type (tree t
)
507 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
508 return INTEGER_CONST_TYPE
;
509 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
510 return FLOAT_CONST_TYPE
;
512 return OTHER_CONST_TYPE
;
515 /* qsort comparison function to sort operand entries PA and PB by rank
516 so that the sorted array is ordered by rank in decreasing order. */
518 sort_by_operand_rank (const void *pa
, const void *pb
)
520 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
521 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
523 /* It's nicer for optimize_expression if constants that are likely
524 to fold when added/multiplied//whatever are put next to each
525 other. Since all constants have rank 0, order them by type. */
526 if (oeb
->rank
== 0 && oea
->rank
== 0)
528 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
529 return constant_type (oeb
->op
) - constant_type (oea
->op
);
531 /* To make sorting result stable, we use unique IDs to determine
533 return oeb
->id
- oea
->id
;
536 /* Lastly, make sure the versions that are the same go next to each
538 if ((oeb
->rank
- oea
->rank
== 0)
539 && TREE_CODE (oea
->op
) == SSA_NAME
540 && TREE_CODE (oeb
->op
) == SSA_NAME
)
542 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
543 versions of removed SSA_NAMEs, so if possible, prefer to sort
544 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
546 if (!SSA_NAME_IS_DEFAULT_DEF (oea
->op
)
547 && !SSA_NAME_IS_DEFAULT_DEF (oeb
->op
)
548 && SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
550 gimple stmta
= SSA_NAME_DEF_STMT (oea
->op
);
551 gimple stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
552 basic_block bba
= gimple_bb (stmta
);
553 basic_block bbb
= gimple_bb (stmtb
);
556 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
557 return bb_rank
[bbb
->index
] - bb_rank
[bba
->index
];
561 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
562 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
568 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
569 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
571 return oeb
->id
- oea
->id
;
574 if (oeb
->rank
!= oea
->rank
)
575 return oeb
->rank
- oea
->rank
;
577 return oeb
->id
- oea
->id
;
580 /* Add an operand entry to *OPS for the tree operand OP. */
583 add_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
)
585 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
588 oe
->rank
= get_rank (op
);
589 oe
->id
= next_operand_entry_id
++;
594 /* Add an operand entry to *OPS for the tree operand OP with repeat
598 add_repeat_to_ops_vec (vec
<operand_entry_t
> *ops
, tree op
,
599 HOST_WIDE_INT repeat
)
601 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
604 oe
->rank
= get_rank (op
);
605 oe
->id
= next_operand_entry_id
++;
609 reassociate_stats
.pows_encountered
++;
612 /* Return true if STMT is reassociable operation containing a binary
613 operation with tree code CODE, and is inside LOOP. */
616 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
618 basic_block bb
= gimple_bb (stmt
);
620 if (gimple_bb (stmt
) == NULL
)
623 if (!flow_bb_inside_loop_p (loop
, bb
))
626 if (is_gimple_assign (stmt
)
627 && gimple_assign_rhs_code (stmt
) == code
628 && has_single_use (gimple_assign_lhs (stmt
)))
635 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
636 operand of the negate operation. Otherwise, return NULL. */
639 get_unary_op (tree name
, enum tree_code opcode
)
641 gimple stmt
= SSA_NAME_DEF_STMT (name
);
643 if (!is_gimple_assign (stmt
))
646 if (gimple_assign_rhs_code (stmt
) == opcode
)
647 return gimple_assign_rhs1 (stmt
);
651 /* If CURR and LAST are a pair of ops that OPCODE allows us to
652 eliminate through equivalences, do so, remove them from OPS, and
653 return true. Otherwise, return false. */
656 eliminate_duplicate_pair (enum tree_code opcode
,
657 vec
<operand_entry_t
> *ops
,
660 operand_entry_t curr
,
661 operand_entry_t last
)
664 /* If we have two of the same op, and the opcode is & |, min, or max,
665 we can eliminate one of them.
666 If we have two of the same op, and the opcode is ^, we can
667 eliminate both of them. */
669 if (last
&& last
->op
== curr
->op
)
677 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
679 fprintf (dump_file
, "Equivalence: ");
680 print_generic_expr (dump_file
, curr
->op
, 0);
681 fprintf (dump_file
, " [&|minmax] ");
682 print_generic_expr (dump_file
, last
->op
, 0);
683 fprintf (dump_file
, " -> ");
684 print_generic_stmt (dump_file
, last
->op
, 0);
687 ops
->ordered_remove (i
);
688 reassociate_stats
.ops_eliminated
++;
693 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
695 fprintf (dump_file
, "Equivalence: ");
696 print_generic_expr (dump_file
, curr
->op
, 0);
697 fprintf (dump_file
, " ^ ");
698 print_generic_expr (dump_file
, last
->op
, 0);
699 fprintf (dump_file
, " -> nothing\n");
702 reassociate_stats
.ops_eliminated
+= 2;
704 if (ops
->length () == 2)
707 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
712 ops
->ordered_remove (i
-1);
713 ops
->ordered_remove (i
-1);
725 static vec
<tree
> plus_negates
;
727 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
728 expression, look in OPS for a corresponding positive operation to cancel
729 it out. If we find one, remove the other from OPS, replace
730 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
734 eliminate_plus_minus_pair (enum tree_code opcode
,
735 vec
<operand_entry_t
> *ops
,
736 unsigned int currindex
,
737 operand_entry_t curr
)
744 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
747 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
748 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
749 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
752 /* Any non-negated version will have a rank that is one less than
753 the current rank. So once we hit those ranks, if we don't find
756 for (i
= currindex
+ 1;
757 ops
->iterate (i
, &oe
)
758 && oe
->rank
>= curr
->rank
- 1 ;
761 if (oe
->op
== negateop
)
764 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
766 fprintf (dump_file
, "Equivalence: ");
767 print_generic_expr (dump_file
, negateop
, 0);
768 fprintf (dump_file
, " + -");
769 print_generic_expr (dump_file
, oe
->op
, 0);
770 fprintf (dump_file
, " -> 0\n");
773 ops
->ordered_remove (i
);
774 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
775 ops
->ordered_remove (currindex
);
776 reassociate_stats
.ops_eliminated
++;
780 else if (oe
->op
== notop
)
782 tree op_type
= TREE_TYPE (oe
->op
);
784 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
786 fprintf (dump_file
, "Equivalence: ");
787 print_generic_expr (dump_file
, notop
, 0);
788 fprintf (dump_file
, " + ~");
789 print_generic_expr (dump_file
, oe
->op
, 0);
790 fprintf (dump_file
, " -> -1\n");
793 ops
->ordered_remove (i
);
794 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
795 ops
->ordered_remove (currindex
);
796 reassociate_stats
.ops_eliminated
++;
802 /* CURR->OP is a negate expr in a plus expr: save it for later
803 inspection in repropagate_negates(). */
804 if (negateop
!= NULL_TREE
)
805 plus_negates
.safe_push (curr
->op
);
810 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
811 bitwise not expression, look in OPS for a corresponding operand to
812 cancel it out. If we find one, remove the other from OPS, replace
813 OPS[CURRINDEX] with 0, and return true. Otherwise, return
817 eliminate_not_pairs (enum tree_code opcode
,
818 vec
<operand_entry_t
> *ops
,
819 unsigned int currindex
,
820 operand_entry_t curr
)
826 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
827 || TREE_CODE (curr
->op
) != SSA_NAME
)
830 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
831 if (notop
== NULL_TREE
)
834 /* Any non-not version will have a rank that is one less than
835 the current rank. So once we hit those ranks, if we don't find
838 for (i
= currindex
+ 1;
839 ops
->iterate (i
, &oe
)
840 && oe
->rank
>= curr
->rank
- 1;
845 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
847 fprintf (dump_file
, "Equivalence: ");
848 print_generic_expr (dump_file
, notop
, 0);
849 if (opcode
== BIT_AND_EXPR
)
850 fprintf (dump_file
, " & ~");
851 else if (opcode
== BIT_IOR_EXPR
)
852 fprintf (dump_file
, " | ~");
853 print_generic_expr (dump_file
, oe
->op
, 0);
854 if (opcode
== BIT_AND_EXPR
)
855 fprintf (dump_file
, " -> 0\n");
856 else if (opcode
== BIT_IOR_EXPR
)
857 fprintf (dump_file
, " -> -1\n");
860 if (opcode
== BIT_AND_EXPR
)
861 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
862 else if (opcode
== BIT_IOR_EXPR
)
863 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
865 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
867 ops
->quick_push (oe
);
875 /* Use constant value that may be present in OPS to try to eliminate
876 operands. Note that this function is only really used when we've
877 eliminated ops for other reasons, or merged constants. Across
878 single statements, fold already does all of this, plus more. There
879 is little point in duplicating logic, so I've only included the
880 identities that I could ever construct testcases to trigger. */
883 eliminate_using_constants (enum tree_code opcode
,
884 vec
<operand_entry_t
> *ops
)
886 operand_entry_t oelast
= ops
->last ();
887 tree type
= TREE_TYPE (oelast
->op
);
889 if (oelast
->rank
== 0
890 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
895 if (integer_zerop (oelast
->op
))
897 if (ops
->length () != 1)
899 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
900 fprintf (dump_file
, "Found & 0, removing all other ops\n");
902 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
905 ops
->quick_push (oelast
);
909 else if (integer_all_onesp (oelast
->op
))
911 if (ops
->length () != 1)
913 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
914 fprintf (dump_file
, "Found & -1, removing\n");
916 reassociate_stats
.ops_eliminated
++;
921 if (integer_all_onesp (oelast
->op
))
923 if (ops
->length () != 1)
925 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
926 fprintf (dump_file
, "Found | -1, removing all other ops\n");
928 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
931 ops
->quick_push (oelast
);
935 else if (integer_zerop (oelast
->op
))
937 if (ops
->length () != 1)
939 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
940 fprintf (dump_file
, "Found | 0, removing\n");
942 reassociate_stats
.ops_eliminated
++;
947 if (integer_zerop (oelast
->op
)
948 || (FLOAT_TYPE_P (type
)
949 && !HONOR_NANS (TYPE_MODE (type
))
950 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type
))
951 && real_zerop (oelast
->op
)))
953 if (ops
->length () != 1)
955 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
956 fprintf (dump_file
, "Found * 0, removing all other ops\n");
958 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
960 ops
->quick_push (oelast
);
964 else if (integer_onep (oelast
->op
)
965 || (FLOAT_TYPE_P (type
)
966 && !HONOR_SNANS (TYPE_MODE (type
))
967 && real_onep (oelast
->op
)))
969 if (ops
->length () != 1)
971 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
972 fprintf (dump_file
, "Found * 1, removing\n");
974 reassociate_stats
.ops_eliminated
++;
982 if (integer_zerop (oelast
->op
)
983 || (FLOAT_TYPE_P (type
)
984 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
985 && fold_real_zero_addition_p (type
, oelast
->op
,
986 opcode
== MINUS_EXPR
)))
988 if (ops
->length () != 1)
990 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
991 fprintf (dump_file
, "Found [|^+] 0, removing\n");
993 reassociate_stats
.ops_eliminated
++;
1005 static void linearize_expr_tree (vec
<operand_entry_t
> *, gimple
,
1008 /* Structure for tracking and counting operands. */
1009 typedef struct oecount_s
{
1012 enum tree_code oecode
;
1017 /* The heap for the oecount hashtable and the sorted list of operands. */
1018 static vec
<oecount
> cvec
;
1021 /* Oecount hashtable helpers. */
1023 struct oecount_hasher
1025 typedef int value_type
;
1026 typedef int compare_type
;
1027 typedef int store_values_directly
;
1028 static inline hashval_t
hash (const value_type
&);
1029 static inline bool equal (const value_type
&, const compare_type
&);
1030 static bool is_deleted (int &v
) { return v
== 1; }
1031 static void mark_deleted (int &e
) { e
= 1; }
1032 static bool is_empty (int &v
) { return v
== 0; }
1033 static void mark_empty (int &e
) { e
= 0; }
1034 static void remove (int &) {}
1037 /* Hash function for oecount. */
1040 oecount_hasher::hash (const value_type
&p
)
1042 const oecount
*c
= &cvec
[p
- 42];
1043 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1046 /* Comparison function for oecount. */
1049 oecount_hasher::equal (const value_type
&p1
, const compare_type
&p2
)
1051 const oecount
*c1
= &cvec
[p1
- 42];
1052 const oecount
*c2
= &cvec
[p2
- 42];
1053 return (c1
->oecode
== c2
->oecode
1054 && c1
->op
== c2
->op
);
1057 /* Comparison function for qsort sorting oecount elements by count. */
1060 oecount_cmp (const void *p1
, const void *p2
)
1062 const oecount
*c1
= (const oecount
*)p1
;
1063 const oecount
*c2
= (const oecount
*)p2
;
1064 if (c1
->cnt
!= c2
->cnt
)
1065 return c1
->cnt
- c2
->cnt
;
1067 /* If counts are identical, use unique IDs to stabilize qsort. */
1068 return c1
->id
- c2
->id
;
1071 /* Return TRUE iff STMT represents a builtin call that raises OP
1072 to some exponent. */
1075 stmt_is_power_of_op (gimple stmt
, tree op
)
1079 if (!is_gimple_call (stmt
))
1082 fndecl
= gimple_call_fndecl (stmt
);
1085 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
1088 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1090 CASE_FLT_FN (BUILT_IN_POW
):
1091 CASE_FLT_FN (BUILT_IN_POWI
):
1092 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1099 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1100 in place and return the result. Assumes that stmt_is_power_of_op
1101 was previously called for STMT and returned TRUE. */
1103 static HOST_WIDE_INT
1104 decrement_power (gimple stmt
)
1106 REAL_VALUE_TYPE c
, cint
;
1107 HOST_WIDE_INT power
;
1110 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt
)))
1112 CASE_FLT_FN (BUILT_IN_POW
):
1113 arg1
= gimple_call_arg (stmt
, 1);
1114 c
= TREE_REAL_CST (arg1
);
1115 power
= real_to_integer (&c
) - 1;
1116 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1117 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1120 CASE_FLT_FN (BUILT_IN_POWI
):
1121 arg1
= gimple_call_arg (stmt
, 1);
1122 power
= TREE_INT_CST_LOW (arg1
) - 1;
1123 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1131 /* Find the single immediate use of STMT's LHS, and replace it
1132 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1133 replace *DEF with OP as well. */
1136 propagate_op_to_single_use (tree op
, gimple stmt
, tree
*def
)
1141 gimple_stmt_iterator gsi
;
1143 if (is_gimple_call (stmt
))
1144 lhs
= gimple_call_lhs (stmt
);
1146 lhs
= gimple_assign_lhs (stmt
);
1148 gcc_assert (has_single_use (lhs
));
1149 single_imm_use (lhs
, &use
, &use_stmt
);
1153 if (TREE_CODE (op
) != SSA_NAME
)
1154 update_stmt (use_stmt
);
1155 gsi
= gsi_for_stmt (stmt
);
1156 unlink_stmt_vdef (stmt
);
1157 reassoc_remove_stmt (&gsi
);
1158 release_defs (stmt
);
1161 /* Walks the linear chain with result *DEF searching for an operation
1162 with operand OP and code OPCODE removing that from the chain. *DEF
1163 is updated if there is only one operand but no operation left. */
1166 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1168 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
1174 if (opcode
== MULT_EXPR
1175 && stmt_is_power_of_op (stmt
, op
))
1177 if (decrement_power (stmt
) == 1)
1178 propagate_op_to_single_use (op
, stmt
, def
);
1182 name
= gimple_assign_rhs1 (stmt
);
1184 /* If this is the operation we look for and one of the operands
1185 is ours simply propagate the other operand into the stmts
1187 if (gimple_assign_rhs_code (stmt
) == opcode
1189 || gimple_assign_rhs2 (stmt
) == op
))
1192 name
= gimple_assign_rhs2 (stmt
);
1193 propagate_op_to_single_use (name
, stmt
, def
);
1197 /* We might have a multiply of two __builtin_pow* calls, and
1198 the operand might be hiding in the rightmost one. */
1199 if (opcode
== MULT_EXPR
1200 && gimple_assign_rhs_code (stmt
) == opcode
1201 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1202 && has_single_use (gimple_assign_rhs2 (stmt
)))
1204 gimple stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1205 if (stmt_is_power_of_op (stmt2
, op
))
1207 if (decrement_power (stmt2
) == 1)
1208 propagate_op_to_single_use (op
, stmt2
, def
);
1213 /* Continue walking the chain. */
1214 gcc_assert (name
!= op
1215 && TREE_CODE (name
) == SSA_NAME
);
1216 stmt
= SSA_NAME_DEF_STMT (name
);
1221 /* Returns true if statement S1 dominates statement S2. Like
1222 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1225 reassoc_stmt_dominates_stmt_p (gimple s1
, gimple s2
)
1227 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1229 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1230 SSA_NAME. Assume it lives at the beginning of function and
1231 thus dominates everything. */
1232 if (!bb1
|| s1
== s2
)
1235 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1241 /* PHIs in the same basic block are assumed to be
1242 executed all in parallel, if only one stmt is a PHI,
1243 it dominates the other stmt in the same basic block. */
1244 if (gimple_code (s1
) == GIMPLE_PHI
)
1247 if (gimple_code (s2
) == GIMPLE_PHI
)
1250 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1252 if (gimple_uid (s1
) < gimple_uid (s2
))
1255 if (gimple_uid (s1
) > gimple_uid (s2
))
1258 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1259 unsigned int uid
= gimple_uid (s1
);
1260 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1262 gimple s
= gsi_stmt (gsi
);
1263 if (gimple_uid (s
) != uid
)
1272 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1275 /* Insert STMT after INSERT_POINT. */
1278 insert_stmt_after (gimple stmt
, gimple insert_point
)
1280 gimple_stmt_iterator gsi
;
1283 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1284 bb
= gimple_bb (insert_point
);
1285 else if (!stmt_ends_bb_p (insert_point
))
1287 gsi
= gsi_for_stmt (insert_point
);
1288 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1289 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1293 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1294 thus if it must end a basic block, it should be a call that can
1295 throw, or some assignment that can throw. If it throws, the LHS
1296 of it will not be initialized though, so only valid places using
1297 the SSA_NAME should be dominated by the fallthru edge. */
1298 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1299 gsi
= gsi_after_labels (bb
);
1300 if (gsi_end_p (gsi
))
1302 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1303 gimple_set_uid (stmt
,
1304 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1307 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1308 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1311 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1312 the result. Places the statement after the definition of either
1313 OP1 or OP2. Returns the new statement. */
1316 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1318 gimple op1def
= NULL
, op2def
= NULL
;
1319 gimple_stmt_iterator gsi
;
1323 /* Create the addition statement. */
1324 op
= make_ssa_name (type
, NULL
);
1325 sum
= gimple_build_assign_with_ops (opcode
, op
, op1
, op2
);
1327 /* Find an insertion place and insert. */
1328 if (TREE_CODE (op1
) == SSA_NAME
)
1329 op1def
= SSA_NAME_DEF_STMT (op1
);
1330 if (TREE_CODE (op2
) == SSA_NAME
)
1331 op2def
= SSA_NAME_DEF_STMT (op2
);
1332 if ((!op1def
|| gimple_nop_p (op1def
))
1333 && (!op2def
|| gimple_nop_p (op2def
)))
1335 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1336 if (gsi_end_p (gsi
))
1338 gimple_stmt_iterator gsi2
1339 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1340 gimple_set_uid (sum
,
1341 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1344 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1345 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1349 gimple insert_point
;
1350 if ((!op1def
|| gimple_nop_p (op1def
))
1351 || (op2def
&& !gimple_nop_p (op2def
)
1352 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1353 insert_point
= op2def
;
1355 insert_point
= op1def
;
1356 insert_stmt_after (sum
, insert_point
);
1363 /* Perform un-distribution of divisions and multiplications.
1364 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1365 to (A + B) / X for real X.
1367 The algorithm is organized as follows.
1369 - First we walk the addition chain *OPS looking for summands that
1370 are defined by a multiplication or a real division. This results
1371 in the candidates bitmap with relevant indices into *OPS.
1373 - Second we build the chains of multiplications or divisions for
1374 these candidates, counting the number of occurrences of (operand, code)
1375 pairs in all of the candidates chains.
1377 - Third we sort the (operand, code) pairs by number of occurrence and
1378 process them starting with the pair with the most uses.
1380 * For each such pair we walk the candidates again to build a
1381 second candidate bitmap noting all multiplication/division chains
1382 that have at least one occurrence of (operand, code).
1384 * We build an alternate addition chain only covering these
1385 candidates with one (operand, code) operation removed from their
1386 multiplication/division chain.
1388 * The first candidate gets replaced by the alternate addition chain
1389 multiplied/divided by the operand.
1391 * All candidate chains get disabled for further processing and
1392 processing of (operand, code) pairs continues.
1394 The alternate addition chains built are re-processed by the main
1395 reassociation algorithm which allows optimizing a * x * y + b * y * x
1396 to (a + b ) * x * y in one invocation of the reassociation pass. */
1399 undistribute_ops_list (enum tree_code opcode
,
1400 vec
<operand_entry_t
> *ops
, struct loop
*loop
)
1402 unsigned int length
= ops
->length ();
1403 operand_entry_t oe1
;
1405 sbitmap candidates
, candidates2
;
1406 unsigned nr_candidates
, nr_candidates2
;
1407 sbitmap_iterator sbi0
;
1408 vec
<operand_entry_t
> *subops
;
1409 bool changed
= false;
1410 int next_oecount_id
= 0;
1413 || opcode
!= PLUS_EXPR
)
1416 /* Build a list of candidates to process. */
1417 candidates
= sbitmap_alloc (length
);
1418 bitmap_clear (candidates
);
1420 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1422 enum tree_code dcode
;
1425 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1427 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1428 if (!is_gimple_assign (oe1def
))
1430 dcode
= gimple_assign_rhs_code (oe1def
);
1431 if ((dcode
!= MULT_EXPR
1432 && dcode
!= RDIV_EXPR
)
1433 || !is_reassociable_op (oe1def
, dcode
, loop
))
1436 bitmap_set_bit (candidates
, i
);
1440 if (nr_candidates
< 2)
1442 sbitmap_free (candidates
);
1446 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1448 fprintf (dump_file
, "searching for un-distribute opportunities ");
1449 print_generic_expr (dump_file
,
1450 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, 0);
1451 fprintf (dump_file
, " %d\n", nr_candidates
);
1454 /* Build linearized sub-operand lists and the counting table. */
1457 hash_table
<oecount_hasher
> ctable (15);
1459 /* ??? Macro arguments cannot have multi-argument template types in
1460 them. This typedef is needed to workaround that limitation. */
1461 typedef vec
<operand_entry_t
> vec_operand_entry_t_heap
;
1462 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1463 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1466 enum tree_code oecode
;
1469 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1470 oecode
= gimple_assign_rhs_code (oedef
);
1471 linearize_expr_tree (&subops
[i
], oedef
,
1472 associative_tree_code (oecode
), false);
1474 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1481 c
.id
= next_oecount_id
++;
1484 idx
= cvec
.length () + 41;
1485 slot
= ctable
.find_slot (idx
, INSERT
);
1493 cvec
[*slot
- 42].cnt
++;
1498 /* Sort the counting table. */
1499 cvec
.qsort (oecount_cmp
);
1501 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1504 fprintf (dump_file
, "Candidates:\n");
1505 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1507 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1508 c
->oecode
== MULT_EXPR
1509 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1510 print_generic_expr (dump_file
, c
->op
, 0);
1511 fprintf (dump_file
, "\n");
1515 /* Process the (operand, code) pairs in order of most occurrence. */
1516 candidates2
= sbitmap_alloc (length
);
1517 while (!cvec
.is_empty ())
1519 oecount
*c
= &cvec
.last ();
1523 /* Now collect the operands in the outer chain that contain
1524 the common operand in their inner chain. */
1525 bitmap_clear (candidates2
);
1527 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1530 enum tree_code oecode
;
1532 tree op
= (*ops
)[i
]->op
;
1534 /* If we undistributed in this chain already this may be
1536 if (TREE_CODE (op
) != SSA_NAME
)
1539 oedef
= SSA_NAME_DEF_STMT (op
);
1540 oecode
= gimple_assign_rhs_code (oedef
);
1541 if (oecode
!= c
->oecode
)
1544 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1546 if (oe1
->op
== c
->op
)
1548 bitmap_set_bit (candidates2
, i
);
1555 if (nr_candidates2
>= 2)
1557 operand_entry_t oe1
, oe2
;
1559 int first
= bitmap_first_set_bit (candidates2
);
1561 /* Build the new addition chain. */
1562 oe1
= (*ops
)[first
];
1563 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1565 fprintf (dump_file
, "Building (");
1566 print_generic_expr (dump_file
, oe1
->op
, 0);
1568 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1569 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1573 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1575 fprintf (dump_file
, " + ");
1576 print_generic_expr (dump_file
, oe2
->op
, 0);
1578 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1579 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1580 oe1
->op
, oe2
->op
, opcode
);
1581 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1583 oe1
->op
= gimple_get_lhs (sum
);
1586 /* Apply the multiplication/division. */
1587 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1588 oe1
->op
, c
->op
, c
->oecode
);
1589 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1591 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1592 print_generic_expr (dump_file
, c
->op
, 0);
1593 fprintf (dump_file
, "\n");
1596 /* Record it in the addition chain and disable further
1597 undistribution with this op. */
1598 oe1
->op
= gimple_assign_lhs (prod
);
1599 oe1
->rank
= get_rank (oe1
->op
);
1600 subops
[first
].release ();
1608 for (i
= 0; i
< ops
->length (); ++i
)
1609 subops
[i
].release ();
1612 sbitmap_free (candidates
);
1613 sbitmap_free (candidates2
);
1618 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1619 expression, examine the other OPS to see if any of them are comparisons
1620 of the same values, which we may be able to combine or eliminate.
1621 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1624 eliminate_redundant_comparison (enum tree_code opcode
,
1625 vec
<operand_entry_t
> *ops
,
1626 unsigned int currindex
,
1627 operand_entry_t curr
)
1630 enum tree_code lcode
, rcode
;
1635 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
1638 /* Check that CURR is a comparison. */
1639 if (TREE_CODE (curr
->op
) != SSA_NAME
)
1641 def1
= SSA_NAME_DEF_STMT (curr
->op
);
1642 if (!is_gimple_assign (def1
))
1644 lcode
= gimple_assign_rhs_code (def1
);
1645 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
1647 op1
= gimple_assign_rhs1 (def1
);
1648 op2
= gimple_assign_rhs2 (def1
);
1650 /* Now look for a similar comparison in the remaining OPS. */
1651 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
1655 if (TREE_CODE (oe
->op
) != SSA_NAME
)
1657 def2
= SSA_NAME_DEF_STMT (oe
->op
);
1658 if (!is_gimple_assign (def2
))
1660 rcode
= gimple_assign_rhs_code (def2
);
1661 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
1664 /* If we got here, we have a match. See if we can combine the
1666 if (opcode
== BIT_IOR_EXPR
)
1667 t
= maybe_fold_or_comparisons (lcode
, op1
, op2
,
1668 rcode
, gimple_assign_rhs1 (def2
),
1669 gimple_assign_rhs2 (def2
));
1671 t
= maybe_fold_and_comparisons (lcode
, op1
, op2
,
1672 rcode
, gimple_assign_rhs1 (def2
),
1673 gimple_assign_rhs2 (def2
));
1677 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1678 always give us a boolean_type_node value back. If the original
1679 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1680 we need to convert. */
1681 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
1682 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
1684 if (TREE_CODE (t
) != INTEGER_CST
1685 && !operand_equal_p (t
, curr
->op
, 0))
1687 enum tree_code subcode
;
1688 tree newop1
, newop2
;
1689 if (!COMPARISON_CLASS_P (t
))
1691 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1692 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1693 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1694 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
1698 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1700 fprintf (dump_file
, "Equivalence: ");
1701 print_generic_expr (dump_file
, curr
->op
, 0);
1702 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
1703 print_generic_expr (dump_file
, oe
->op
, 0);
1704 fprintf (dump_file
, " -> ");
1705 print_generic_expr (dump_file
, t
, 0);
1706 fprintf (dump_file
, "\n");
1709 /* Now we can delete oe, as it has been subsumed by the new combined
1711 ops
->ordered_remove (i
);
1712 reassociate_stats
.ops_eliminated
++;
1714 /* If t is the same as curr->op, we're done. Otherwise we must
1715 replace curr->op with t. Special case is if we got a constant
1716 back, in which case we add it to the end instead of in place of
1717 the current entry. */
1718 if (TREE_CODE (t
) == INTEGER_CST
)
1720 ops
->ordered_remove (currindex
);
1721 add_to_ops_vec (ops
, t
);
1723 else if (!operand_equal_p (t
, curr
->op
, 0))
1726 enum tree_code subcode
;
1729 gcc_assert (COMPARISON_CLASS_P (t
));
1730 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
1731 STRIP_USELESS_TYPE_CONVERSION (newop1
);
1732 STRIP_USELESS_TYPE_CONVERSION (newop2
);
1733 gcc_checking_assert (is_gimple_val (newop1
)
1734 && is_gimple_val (newop2
));
1735 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
1736 curr
->op
= gimple_get_lhs (sum
);
1744 /* Perform various identities and other optimizations on the list of
1745 operand entries, stored in OPS. The tree code for the binary
1746 operation between all the operands is OPCODE. */
1749 optimize_ops_list (enum tree_code opcode
,
1750 vec
<operand_entry_t
> *ops
)
1752 unsigned int length
= ops
->length ();
1755 operand_entry_t oelast
= NULL
;
1756 bool iterate
= false;
1761 oelast
= ops
->last ();
1763 /* If the last two are constants, pop the constants off, merge them
1764 and try the next two. */
1765 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1767 operand_entry_t oelm1
= (*ops
)[length
- 2];
1769 if (oelm1
->rank
== 0
1770 && is_gimple_min_invariant (oelm1
->op
)
1771 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1772 TREE_TYPE (oelast
->op
)))
1774 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1775 oelm1
->op
, oelast
->op
);
1777 if (folded
&& is_gimple_min_invariant (folded
))
1779 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1780 fprintf (dump_file
, "Merging constants\n");
1785 add_to_ops_vec (ops
, folded
);
1786 reassociate_stats
.constants_eliminated
++;
1788 optimize_ops_list (opcode
, ops
);
1794 eliminate_using_constants (opcode
, ops
);
1797 for (i
= 0; ops
->iterate (i
, &oe
);)
1801 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1803 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1804 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
1805 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
1817 length
= ops
->length ();
1818 oelast
= ops
->last ();
1821 optimize_ops_list (opcode
, ops
);
1824 /* The following functions are subroutines to optimize_range_tests and allow
1825 it to try to change a logical combination of comparisons into a range
1829 X == 2 || X == 5 || X == 3 || X == 4
1833 (unsigned) (X - 2) <= 3
1835 For more information see comments above fold_test_range in fold-const.c,
1836 this implementation is for GIMPLE. */
1844 bool strict_overflow_p
;
1845 unsigned int idx
, next
;
1848 /* This is similar to make_range in fold-const.c, but on top of
1849 GIMPLE instead of trees. If EXP is non-NULL, it should be
1850 an SSA_NAME and STMT argument is ignored, otherwise STMT
1851 argument should be a GIMPLE_COND. */
1854 init_range_entry (struct range_entry
*r
, tree exp
, gimple stmt
)
1858 bool is_bool
, strict_overflow_p
;
1862 r
->strict_overflow_p
= false;
1864 r
->high
= NULL_TREE
;
1865 if (exp
!= NULL_TREE
1866 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
1869 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1870 and see if we can refine the range. Some of the cases below may not
1871 happen, but it doesn't seem worth worrying about this. We "continue"
1872 the outer loop when we've changed something; otherwise we "break"
1873 the switch, which will "break" the while. */
1874 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
1877 strict_overflow_p
= false;
1879 if (exp
== NULL_TREE
)
1881 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
1883 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
1888 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
1893 enum tree_code code
;
1894 tree arg0
, arg1
, exp_type
;
1898 if (exp
!= NULL_TREE
)
1900 if (TREE_CODE (exp
) != SSA_NAME
1901 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
1904 stmt
= SSA_NAME_DEF_STMT (exp
);
1905 if (!is_gimple_assign (stmt
))
1908 code
= gimple_assign_rhs_code (stmt
);
1909 arg0
= gimple_assign_rhs1 (stmt
);
1910 arg1
= gimple_assign_rhs2 (stmt
);
1911 exp_type
= TREE_TYPE (exp
);
1915 code
= gimple_cond_code (stmt
);
1916 arg0
= gimple_cond_lhs (stmt
);
1917 arg1
= gimple_cond_rhs (stmt
);
1918 exp_type
= boolean_type_node
;
1921 if (TREE_CODE (arg0
) != SSA_NAME
)
1923 loc
= gimple_location (stmt
);
1927 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
1928 /* Ensure the range is either +[-,0], +[0,0],
1929 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1930 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1931 or similar expression of unconditional true or
1932 false, it should not be negated. */
1933 && ((high
&& integer_zerop (high
))
1934 || (low
&& integer_onep (low
))))
1947 if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
1949 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
1954 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
1969 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
1971 &strict_overflow_p
);
1972 if (nexp
!= NULL_TREE
)
1975 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
1988 r
->strict_overflow_p
= strict_overflow_p
;
1992 /* Comparison function for qsort. Sort entries
1993 without SSA_NAME exp first, then with SSA_NAMEs sorted
1994 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1995 by increasing ->low and if ->low is the same, by increasing
1996 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2000 range_entry_cmp (const void *a
, const void *b
)
2002 const struct range_entry
*p
= (const struct range_entry
*) a
;
2003 const struct range_entry
*q
= (const struct range_entry
*) b
;
2005 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2007 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2009 /* Group range_entries for the same SSA_NAME together. */
2010 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2012 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2014 /* If ->low is different, NULL low goes first, then by
2016 if (p
->low
!= NULL_TREE
)
2018 if (q
->low
!= NULL_TREE
)
2020 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2022 if (tem
&& integer_onep (tem
))
2024 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2026 if (tem
&& integer_onep (tem
))
2032 else if (q
->low
!= NULL_TREE
)
2034 /* If ->high is different, NULL high goes last, before that by
2036 if (p
->high
!= NULL_TREE
)
2038 if (q
->high
!= NULL_TREE
)
2040 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2042 if (tem
&& integer_onep (tem
))
2044 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2046 if (tem
&& integer_onep (tem
))
2052 else if (p
->high
!= NULL_TREE
)
2054 /* If both ranges are the same, sort below by ascending idx. */
2059 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2062 if (p
->idx
< q
->idx
)
2066 gcc_checking_assert (p
->idx
> q
->idx
);
2071 /* Helper routine of optimize_range_test.
2072 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2073 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2074 OPCODE and OPS are arguments of optimize_range_tests. Return
2075 true if the range merge has been successful.
2076 If OPCODE is ERROR_MARK, this is called from within
2077 maybe_optimize_range_tests and is performing inter-bb range optimization.
2078 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2082 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2083 unsigned int count
, enum tree_code opcode
,
2084 vec
<operand_entry_t
> *ops
, tree exp
, bool in_p
,
2085 tree low
, tree high
, bool strict_overflow_p
)
2087 operand_entry_t oe
= (*ops
)[range
->idx
];
2089 gimple stmt
= op
? SSA_NAME_DEF_STMT (op
) :
2090 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2091 location_t loc
= gimple_location (stmt
);
2092 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2093 tree tem
= build_range_check (loc
, optype
, exp
, in_p
, low
, high
);
2094 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2095 gimple_stmt_iterator gsi
;
2097 if (tem
== NULL_TREE
)
2100 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2101 warning_at (loc
, OPT_Wstrict_overflow
,
2102 "assuming signed overflow does not occur "
2103 "when simplifying range test");
2105 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2107 struct range_entry
*r
;
2108 fprintf (dump_file
, "Optimizing range tests ");
2109 print_generic_expr (dump_file
, range
->exp
, 0);
2110 fprintf (dump_file
, " %c[", range
->in_p
? '+' : '-');
2111 print_generic_expr (dump_file
, range
->low
, 0);
2112 fprintf (dump_file
, ", ");
2113 print_generic_expr (dump_file
, range
->high
, 0);
2114 fprintf (dump_file
, "]");
2115 for (r
= otherrange
; r
< otherrange
+ count
; r
++)
2117 fprintf (dump_file
, " and %c[", r
->in_p
? '+' : '-');
2118 print_generic_expr (dump_file
, r
->low
, 0);
2119 fprintf (dump_file
, ", ");
2120 print_generic_expr (dump_file
, r
->high
, 0);
2121 fprintf (dump_file
, "]");
2123 fprintf (dump_file
, "\n into ");
2124 print_generic_expr (dump_file
, tem
, 0);
2125 fprintf (dump_file
, "\n");
2128 if (opcode
== BIT_IOR_EXPR
2129 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2130 tem
= invert_truthvalue_loc (loc
, tem
);
2132 tem
= fold_convert_loc (loc
, optype
, tem
);
2133 gsi
= gsi_for_stmt (stmt
);
2134 /* In rare cases range->exp can be equal to lhs of stmt.
2135 In that case we have to insert after the stmt rather then before
2137 if (op
== range
->exp
)
2138 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, false,
2139 GSI_CONTINUE_LINKING
);
2142 tem
= force_gimple_operand_gsi (&gsi
, tem
, true, NULL_TREE
, true,
2146 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
2147 if (gimple_uid (gsi_stmt (gsi
)))
2150 gimple_set_uid (gsi_stmt (gsi
), gimple_uid (stmt
));
2157 range
->strict_overflow_p
= false;
2159 for (range
= otherrange
; range
< otherrange
+ count
; range
++)
2161 oe
= (*ops
)[range
->idx
];
2162 /* Now change all the other range test immediate uses, so that
2163 those tests will be optimized away. */
2164 if (opcode
== ERROR_MARK
)
2167 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
2168 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
2170 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
2171 ? boolean_false_node
: boolean_true_node
);
2174 oe
->op
= error_mark_node
;
2175 range
->exp
= NULL_TREE
;
2180 /* Optimize X == CST1 || X == CST2
2181 if popcount (CST1 ^ CST2) == 1 into
2182 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2183 Similarly for ranges. E.g.
2184 X != 2 && X != 3 && X != 10 && X != 11
2185 will be transformed by the previous optimization into
2186 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2187 and this loop can transform that into
2188 !(((X & ~8) - 2U) <= 1U). */
2191 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
2192 tree lowi
, tree lowj
, tree highi
, tree highj
,
2193 vec
<operand_entry_t
> *ops
,
2194 struct range_entry
*rangei
,
2195 struct range_entry
*rangej
)
2197 tree lowxor
, highxor
, tem
, exp
;
2198 /* Check highi ^ lowi == highj ^ lowj and
2199 popcount (highi ^ lowi) == 1. */
2200 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
2201 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
2203 if (tree_log2 (lowxor
) < 0)
2205 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
2206 if (!tree_int_cst_equal (lowxor
, highxor
))
2209 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
2210 exp
= fold_build2 (BIT_AND_EXPR
, type
, rangei
->exp
, tem
);
2211 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
2212 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
2213 if (update_range_test (rangei
, rangej
, 1, opcode
, ops
, exp
,
2214 rangei
->in_p
, lowj
, highj
,
2215 rangei
->strict_overflow_p
2216 || rangej
->strict_overflow_p
))
2221 /* Optimize X == CST1 || X == CST2
2222 if popcount (CST2 - CST1) == 1 into
2223 ((X - CST1) & ~(CST2 - CST1)) == 0.
2224 Similarly for ranges. E.g.
2225 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2226 || X == 75 || X == 45
2227 will be transformed by the previous optimization into
2228 (X - 43U) <= 3U || (X - 75U) <= 3U
2229 and this loop can transform that into
2230 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2232 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
2233 tree lowi
, tree lowj
, tree highi
, tree highj
,
2234 vec
<operand_entry_t
> *ops
,
2235 struct range_entry
*rangei
,
2236 struct range_entry
*rangej
)
2238 tree tem1
, tem2
, mask
;
2239 /* Check highi - lowi == highj - lowj. */
2240 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
2241 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2243 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
2244 if (!tree_int_cst_equal (tem1
, tem2
))
2246 /* Check popcount (lowj - lowi) == 1. */
2247 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
2248 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
2250 if (tree_log2 (tem1
) < 0)
2253 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
2254 tem1
= fold_binary (MINUS_EXPR
, type
, rangei
->exp
, lowi
);
2255 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
2256 lowj
= build_int_cst (type
, 0);
2257 if (update_range_test (rangei
, rangej
, 1, opcode
, ops
, tem1
,
2258 rangei
->in_p
, lowj
, tem2
,
2259 rangei
->strict_overflow_p
2260 || rangej
->strict_overflow_p
))
2265 /* It does some common checks for function optimize_range_tests_xor and
2266 optimize_range_tests_diff.
2267 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2268 Else it calls optimize_range_tests_diff. */
2271 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
2272 bool optimize_xor
, vec
<operand_entry_t
> *ops
,
2273 struct range_entry
*ranges
)
2276 bool any_changes
= false;
2277 for (i
= first
; i
< length
; i
++)
2279 tree lowi
, highi
, lowj
, highj
, type
, tem
;
2281 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
2283 type
= TREE_TYPE (ranges
[i
].exp
);
2284 if (!INTEGRAL_TYPE_P (type
))
2286 lowi
= ranges
[i
].low
;
2287 if (lowi
== NULL_TREE
)
2288 lowi
= TYPE_MIN_VALUE (type
);
2289 highi
= ranges
[i
].high
;
2290 if (highi
== NULL_TREE
)
2292 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
2295 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
2297 lowj
= ranges
[j
].low
;
2298 if (lowj
== NULL_TREE
)
2300 highj
= ranges
[j
].high
;
2301 if (highj
== NULL_TREE
)
2302 highj
= TYPE_MAX_VALUE (type
);
2303 /* Check lowj > highi. */
2304 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2306 if (tem
== NULL_TREE
|| !integer_onep (tem
))
2309 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
2311 ranges
+ i
, ranges
+ j
);
2313 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
2315 ranges
+ i
, ranges
+ j
);
2326 /* Optimize range tests, similarly how fold_range_test optimizes
2327 it on trees. The tree code for the binary
2328 operation between all the operands is OPCODE.
2329 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2330 maybe_optimize_range_tests for inter-bb range optimization.
2331 In that case if oe->op is NULL, oe->id is bb->index whose
2332 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2333 the actual opcode. */
2336 optimize_range_tests (enum tree_code opcode
,
2337 vec
<operand_entry_t
> *ops
)
2339 unsigned int length
= ops
->length (), i
, j
, first
;
2341 struct range_entry
*ranges
;
2342 bool any_changes
= false;
2347 ranges
= XNEWVEC (struct range_entry
, length
);
2348 for (i
= 0; i
< length
; i
++)
2352 init_range_entry (ranges
+ i
, oe
->op
,
2354 last_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
2355 /* For | invert it now, we will invert it again before emitting
2356 the optimized expression. */
2357 if (opcode
== BIT_IOR_EXPR
2358 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2359 ranges
[i
].in_p
= !ranges
[i
].in_p
;
2362 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
2363 for (i
= 0; i
< length
; i
++)
2364 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
2367 /* Try to merge ranges. */
2368 for (first
= i
; i
< length
; i
++)
2370 tree low
= ranges
[i
].low
;
2371 tree high
= ranges
[i
].high
;
2372 int in_p
= ranges
[i
].in_p
;
2373 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
2374 int update_fail_count
= 0;
2376 for (j
= i
+ 1; j
< length
; j
++)
2378 if (ranges
[i
].exp
!= ranges
[j
].exp
)
2380 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
2381 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
2383 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
2389 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, j
- i
- 1, opcode
,
2390 ops
, ranges
[i
].exp
, in_p
, low
, high
,
2396 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2397 while update_range_test would fail. */
2398 else if (update_fail_count
== 64)
2401 ++update_fail_count
;
2404 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
2407 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
2408 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
2411 if (any_changes
&& opcode
!= ERROR_MARK
)
2414 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2416 if (oe
->op
== error_mark_node
)
2425 XDELETEVEC (ranges
);
2429 /* Return true if STMT is a cast like:
2435 # _345 = PHI <_123(N), 1(...), 1(...)>
2436 where _234 has bool type, _123 has single use and
2437 bb N has a single successor M. This is commonly used in
2438 the last block of a range test. */
2441 final_range_test_p (gimple stmt
)
2443 basic_block bb
, rhs_bb
;
2446 use_operand_p use_p
;
2449 if (!gimple_assign_cast_p (stmt
))
2451 bb
= gimple_bb (stmt
);
2452 if (!single_succ_p (bb
))
2454 e
= single_succ_edge (bb
);
2455 if (e
->flags
& EDGE_COMPLEX
)
2458 lhs
= gimple_assign_lhs (stmt
);
2459 rhs
= gimple_assign_rhs1 (stmt
);
2460 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2461 || TREE_CODE (rhs
) != SSA_NAME
2462 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
)
2465 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2466 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
2469 if (gimple_code (use_stmt
) != GIMPLE_PHI
2470 || gimple_bb (use_stmt
) != e
->dest
)
2473 /* And that the rhs is defined in the same loop. */
2474 rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
));
2476 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
2482 /* Return true if BB is suitable basic block for inter-bb range test
2483 optimization. If BACKWARD is true, BB should be the only predecessor
2484 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2485 or compared with to find a common basic block to which all conditions
2486 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2487 be the only predecessor of BB. */
2490 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
2493 edge_iterator ei
, ei2
;
2496 gimple_stmt_iterator gsi
;
2497 bool other_edge_seen
= false;
2502 /* Check last stmt first. */
2503 stmt
= last_stmt (bb
);
2505 || (gimple_code (stmt
) != GIMPLE_COND
2506 && (backward
|| !final_range_test_p (stmt
)))
2507 || gimple_visited_p (stmt
)
2508 || stmt_could_throw_p (stmt
)
2511 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
2514 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2515 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2516 to *OTHER_BB (if not set yet, try to find it out). */
2517 if (EDGE_COUNT (bb
->succs
) != 2)
2519 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2521 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2523 if (e
->dest
== test_bb
)
2532 if (*other_bb
== NULL
)
2534 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
2535 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
2537 else if (e
->dest
== e2
->dest
)
2538 *other_bb
= e
->dest
;
2539 if (*other_bb
== NULL
)
2542 if (e
->dest
== *other_bb
)
2543 other_edge_seen
= true;
2547 if (*other_bb
== NULL
|| !other_edge_seen
)
2550 else if (single_succ (bb
) != *other_bb
)
2553 /* Now check all PHIs of *OTHER_BB. */
2554 e
= find_edge (bb
, *other_bb
);
2555 e2
= find_edge (test_bb
, *other_bb
);
2556 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2558 gimple phi
= gsi_stmt (gsi
);
2559 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2560 corresponding to BB and TEST_BB predecessor must be the same. */
2561 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
2562 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
2564 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2565 one of the PHIs should have the lhs of the last stmt in
2566 that block as PHI arg and that PHI should have 0 or 1
2567 corresponding to it in all other range test basic blocks
2571 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
2572 == gimple_assign_lhs (stmt
)
2573 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
2574 || integer_onep (gimple_phi_arg_def (phi
,
2580 gimple test_last
= last_stmt (test_bb
);
2581 if (gimple_code (test_last
) != GIMPLE_COND
2582 && gimple_phi_arg_def (phi
, e2
->dest_idx
)
2583 == gimple_assign_lhs (test_last
)
2584 && (integer_zerop (gimple_phi_arg_def (phi
, e
->dest_idx
))
2585 || integer_onep (gimple_phi_arg_def (phi
, e
->dest_idx
))))
2595 /* Return true if BB doesn't have side-effects that would disallow
2596 range test optimization, all SSA_NAMEs set in the bb are consumed
2597 in the bb and there are no PHIs. */
2600 no_side_effect_bb (basic_block bb
)
2602 gimple_stmt_iterator gsi
;
2605 if (!gimple_seq_empty_p (phi_nodes (bb
)))
2607 last
= last_stmt (bb
);
2608 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2610 gimple stmt
= gsi_stmt (gsi
);
2612 imm_use_iterator imm_iter
;
2613 use_operand_p use_p
;
2615 if (is_gimple_debug (stmt
))
2617 if (gimple_has_side_effects (stmt
))
2621 if (!is_gimple_assign (stmt
))
2623 lhs
= gimple_assign_lhs (stmt
);
2624 if (TREE_CODE (lhs
) != SSA_NAME
)
2626 if (gimple_assign_rhs_could_trap_p (stmt
))
2628 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
2630 gimple use_stmt
= USE_STMT (use_p
);
2631 if (is_gimple_debug (use_stmt
))
2633 if (gimple_bb (use_stmt
) != bb
)
2640 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2641 return true and fill in *OPS recursively. */
2644 get_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> *ops
,
2647 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2651 if (!is_reassociable_op (stmt
, code
, loop
))
2654 rhs
[0] = gimple_assign_rhs1 (stmt
);
2655 rhs
[1] = gimple_assign_rhs2 (stmt
);
2656 gimple_set_visited (stmt
, true);
2657 for (i
= 0; i
< 2; i
++)
2658 if (TREE_CODE (rhs
[i
]) == SSA_NAME
2659 && !get_ops (rhs
[i
], code
, ops
, loop
)
2660 && has_single_use (rhs
[i
]))
2662 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
2668 ops
->safe_push (oe
);
2673 /* Find the ops that were added by get_ops starting from VAR, see if
2674 they were changed during update_range_test and if yes, create new
2678 update_ops (tree var
, enum tree_code code
, vec
<operand_entry_t
> ops
,
2679 unsigned int *pidx
, struct loop
*loop
)
2681 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2685 if (!is_reassociable_op (stmt
, code
, loop
))
2688 rhs
[0] = gimple_assign_rhs1 (stmt
);
2689 rhs
[1] = gimple_assign_rhs2 (stmt
);
2692 for (i
= 0; i
< 2; i
++)
2693 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
2695 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
2696 if (rhs
[2 + i
] == NULL_TREE
)
2698 if (has_single_use (rhs
[i
]))
2699 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
2701 rhs
[2 + i
] = rhs
[i
];
2704 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
2705 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
2707 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
2708 var
= make_ssa_name (TREE_TYPE (var
), NULL
);
2709 gimple g
= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
2710 var
, rhs
[2], rhs
[3]);
2711 gimple_set_uid (g
, gimple_uid (stmt
));
2712 gimple_set_visited (g
, true);
2713 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
2718 /* Structure to track the initial value passed to get_ops and
2719 the range in the ops vector for each basic block. */
2721 struct inter_bb_range_test_entry
2724 unsigned int first_idx
, last_idx
;
2727 /* Inter-bb range test optimization. */
2730 maybe_optimize_range_tests (gimple stmt
)
2732 basic_block first_bb
= gimple_bb (stmt
);
2733 basic_block last_bb
= first_bb
;
2734 basic_block other_bb
= NULL
;
2738 auto_vec
<operand_entry_t
> ops
;
2739 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
2740 bool any_changes
= false;
2742 /* Consider only basic blocks that end with GIMPLE_COND or
2743 a cast statement satisfying final_range_test_p. All
2744 but the last bb in the first_bb .. last_bb range
2745 should end with GIMPLE_COND. */
2746 if (gimple_code (stmt
) == GIMPLE_COND
)
2748 if (EDGE_COUNT (first_bb
->succs
) != 2)
2751 else if (final_range_test_p (stmt
))
2752 other_bb
= single_succ (first_bb
);
2756 if (stmt_could_throw_p (stmt
))
2759 /* As relative ordering of post-dominator sons isn't fixed,
2760 maybe_optimize_range_tests can be called first on any
2761 bb in the range we want to optimize. So, start searching
2762 backwards, if first_bb can be set to a predecessor. */
2763 while (single_pred_p (first_bb
))
2765 basic_block pred_bb
= single_pred (first_bb
);
2766 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, true))
2768 if (!no_side_effect_bb (first_bb
))
2772 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2773 Before starting forward search in last_bb successors, find
2774 out the other_bb. */
2775 if (first_bb
== last_bb
)
2778 /* As non-GIMPLE_COND last stmt always terminates the range,
2779 if forward search didn't discover anything, just give up. */
2780 if (gimple_code (stmt
) != GIMPLE_COND
)
2782 /* Look at both successors. Either it ends with a GIMPLE_COND
2783 and satisfies suitable_cond_bb, or ends with a cast and
2784 other_bb is that cast's successor. */
2785 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
2786 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
2787 || e
->dest
== first_bb
)
2789 else if (single_pred_p (e
->dest
))
2791 stmt
= last_stmt (e
->dest
);
2793 && gimple_code (stmt
) == GIMPLE_COND
2794 && EDGE_COUNT (e
->dest
->succs
) == 2)
2796 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
, true))
2802 && final_range_test_p (stmt
)
2803 && find_edge (first_bb
, single_succ (e
->dest
)))
2805 other_bb
= single_succ (e
->dest
);
2806 if (other_bb
== first_bb
)
2810 if (other_bb
== NULL
)
2813 /* Now do the forward search, moving last_bb to successor bbs
2814 that aren't other_bb. */
2815 while (EDGE_COUNT (last_bb
->succs
) == 2)
2817 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
2818 if (e
->dest
!= other_bb
)
2822 if (!single_pred_p (e
->dest
))
2824 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, false))
2826 if (!no_side_effect_bb (e
->dest
))
2830 if (first_bb
== last_bb
)
2832 /* Here basic blocks first_bb through last_bb's predecessor
2833 end with GIMPLE_COND, all of them have one of the edges to
2834 other_bb and another to another block in the range,
2835 all blocks except first_bb don't have side-effects and
2836 last_bb ends with either GIMPLE_COND, or cast satisfying
2837 final_range_test_p. */
2838 for (bb
= last_bb
; ; bb
= single_pred (bb
))
2840 enum tree_code code
;
2842 inter_bb_range_test_entry bb_ent
;
2844 bb_ent
.op
= NULL_TREE
;
2845 bb_ent
.first_idx
= ops
.length ();
2846 bb_ent
.last_idx
= bb_ent
.first_idx
;
2847 e
= find_edge (bb
, other_bb
);
2848 stmt
= last_stmt (bb
);
2849 gimple_set_visited (stmt
, true);
2850 if (gimple_code (stmt
) != GIMPLE_COND
)
2852 use_operand_p use_p
;
2857 lhs
= gimple_assign_lhs (stmt
);
2858 rhs
= gimple_assign_rhs1 (stmt
);
2859 gcc_assert (bb
== last_bb
);
2866 # _345 = PHI <_123(N), 1(...), 1(...)>
2868 or 0 instead of 1. If it is 0, the _234
2869 range test is anded together with all the
2870 other range tests, if it is 1, it is ored with
2872 single_imm_use (lhs
, &use_p
, &phi
);
2873 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
2874 e2
= find_edge (first_bb
, other_bb
);
2876 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
2877 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
2878 code
= BIT_AND_EXPR
;
2881 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
2882 code
= BIT_IOR_EXPR
;
2885 /* If _234 SSA_NAME_DEF_STMT is
2887 (or &, corresponding to 1/0 in the phi arguments,
2888 push into ops the individual range test arguments
2889 of the bitwise or resp. and, recursively. */
2890 if (!get_ops (rhs
, code
, &ops
,
2891 loop_containing_stmt (stmt
))
2892 && has_single_use (rhs
))
2894 /* Otherwise, push the _234 range test itself. */
2896 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
2906 bb_ent
.last_idx
= ops
.length ();
2908 bbinfo
.safe_push (bb_ent
);
2911 /* Otherwise stmt is GIMPLE_COND. */
2912 code
= gimple_cond_code (stmt
);
2913 lhs
= gimple_cond_lhs (stmt
);
2914 rhs
= gimple_cond_rhs (stmt
);
2915 if (TREE_CODE (lhs
) == SSA_NAME
2916 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2917 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
2918 || rhs
!= boolean_false_node
2919 /* Either push into ops the individual bitwise
2920 or resp. and operands, depending on which
2921 edge is other_bb. */
2922 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
2923 ^ (code
== EQ_EXPR
))
2924 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
2925 loop_containing_stmt (stmt
))))
2927 /* Or push the GIMPLE_COND stmt itself. */
2929 = (operand_entry_t
) pool_alloc (operand_entry_pool
);
2932 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
2933 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
2934 /* oe->op = NULL signs that there is no SSA_NAME
2935 for the range test, and oe->id instead is the
2936 basic block number, at which's end the GIMPLE_COND
2944 else if (ops
.length () > bb_ent
.first_idx
)
2947 bb_ent
.last_idx
= ops
.length ();
2949 bbinfo
.safe_push (bb_ent
);
2953 if (ops
.length () > 1)
2954 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
);
2958 /* update_ops relies on has_single_use predicates returning the
2959 same values as it did during get_ops earlier. Additionally it
2960 never removes statements, only adds new ones and it should walk
2961 from the single imm use and check the predicate already before
2962 making those changes.
2963 On the other side, the handling of GIMPLE_COND directly can turn
2964 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2965 it needs to be done in a separate loop afterwards. */
2966 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
2968 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
2969 && bbinfo
[idx
].op
!= NULL_TREE
)
2973 stmt
= last_stmt (bb
);
2974 new_op
= update_ops (bbinfo
[idx
].op
,
2976 ops
[bbinfo
[idx
].first_idx
]->rank
,
2977 ops
, &bbinfo
[idx
].first_idx
,
2978 loop_containing_stmt (stmt
));
2979 if (new_op
== NULL_TREE
)
2981 gcc_assert (bb
== last_bb
);
2982 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
2984 if (bbinfo
[idx
].op
!= new_op
)
2986 imm_use_iterator iter
;
2987 use_operand_p use_p
;
2988 gimple use_stmt
, cast_stmt
= NULL
;
2990 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
2991 if (is_gimple_debug (use_stmt
))
2993 else if (gimple_code (use_stmt
) == GIMPLE_COND
2994 || gimple_code (use_stmt
) == GIMPLE_PHI
)
2995 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
2996 SET_USE (use_p
, new_op
);
2997 else if (gimple_assign_cast_p (use_stmt
))
2998 cast_stmt
= use_stmt
;
3003 gcc_assert (bb
== last_bb
);
3004 tree lhs
= gimple_assign_lhs (cast_stmt
);
3005 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3006 enum tree_code rhs_code
3007 = gimple_assign_rhs_code (cast_stmt
);
3009 if (is_gimple_min_invariant (new_op
))
3011 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
3012 g
= gimple_build_assign (new_lhs
, new_op
);
3015 g
= gimple_build_assign_with_ops (rhs_code
, new_lhs
,
3017 gimple_stmt_iterator gsi
= gsi_for_stmt (cast_stmt
);
3018 gimple_set_uid (g
, gimple_uid (cast_stmt
));
3019 gimple_set_visited (g
, true);
3020 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3021 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
3022 if (is_gimple_debug (use_stmt
))
3024 else if (gimple_code (use_stmt
) == GIMPLE_COND
3025 || gimple_code (use_stmt
) == GIMPLE_PHI
)
3026 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
3027 SET_USE (use_p
, new_lhs
);
3036 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
3038 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
3039 && bbinfo
[idx
].op
== NULL_TREE
3040 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
3042 stmt
= last_stmt (bb
);
3043 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
3044 gimple_cond_make_false (stmt
);
3045 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
3046 gimple_cond_make_true (stmt
);
3049 gimple_cond_set_code (stmt
, NE_EXPR
);
3050 gimple_cond_set_lhs (stmt
, ops
[bbinfo
[idx
].first_idx
]->op
);
3051 gimple_cond_set_rhs (stmt
, boolean_false_node
);
3061 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3062 of STMT in it's operands. This is also known as a "destructive
3063 update" operation. */
3066 is_phi_for_stmt (gimple stmt
, tree operand
)
3070 use_operand_p arg_p
;
3073 if (TREE_CODE (operand
) != SSA_NAME
)
3076 lhs
= gimple_assign_lhs (stmt
);
3078 def_stmt
= SSA_NAME_DEF_STMT (operand
);
3079 if (gimple_code (def_stmt
) != GIMPLE_PHI
)
3082 FOR_EACH_PHI_ARG (arg_p
, def_stmt
, i
, SSA_OP_USE
)
3083 if (lhs
== USE_FROM_PTR (arg_p
))
3088 /* Remove def stmt of VAR if VAR has zero uses and recurse
3089 on rhs1 operand if so. */
3092 remove_visited_stmt_chain (tree var
)
3095 gimple_stmt_iterator gsi
;
3099 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
3101 stmt
= SSA_NAME_DEF_STMT (var
);
3102 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
3104 var
= gimple_assign_rhs1 (stmt
);
3105 gsi
= gsi_for_stmt (stmt
);
3106 reassoc_remove_stmt (&gsi
);
3107 release_defs (stmt
);
3114 /* This function checks three consequtive operands in
3115 passed operands vector OPS starting from OPINDEX and
3116 swaps two operands if it is profitable for binary operation
3117 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3119 We pair ops with the same rank if possible.
3121 The alternative we try is to see if STMT is a destructive
3122 update style statement, which is like:
3125 In that case, we want to use the destructive update form to
3126 expose the possible vectorizer sum reduction opportunity.
3127 In that case, the third operand will be the phi node. This
3128 check is not performed if STMT is null.
3130 We could, of course, try to be better as noted above, and do a
3131 lot of work to try to find these opportunities in >3 operand
3132 cases, but it is unlikely to be worth it. */
3135 swap_ops_for_binary_stmt (vec
<operand_entry_t
> ops
,
3136 unsigned int opindex
, gimple stmt
)
3138 operand_entry_t oe1
, oe2
, oe3
;
3141 oe2
= ops
[opindex
+ 1];
3142 oe3
= ops
[opindex
+ 2];
3144 if ((oe1
->rank
== oe2
->rank
3145 && oe2
->rank
!= oe3
->rank
)
3146 || (stmt
&& is_phi_for_stmt (stmt
, oe3
->op
)
3147 && !is_phi_for_stmt (stmt
, oe1
->op
)
3148 && !is_phi_for_stmt (stmt
, oe2
->op
)))
3150 struct operand_entry temp
= *oe3
;
3152 oe3
->rank
= oe1
->rank
;
3154 oe1
->rank
= temp
.rank
;
3156 else if ((oe1
->rank
== oe3
->rank
3157 && oe2
->rank
!= oe3
->rank
)
3158 || (stmt
&& is_phi_for_stmt (stmt
, oe2
->op
)
3159 && !is_phi_for_stmt (stmt
, oe1
->op
)
3160 && !is_phi_for_stmt (stmt
, oe3
->op
)))
3162 struct operand_entry temp
= *oe2
;
3164 oe2
->rank
= oe1
->rank
;
3166 oe1
->rank
= temp
.rank
;
3170 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3171 two definitions, otherwise return STMT. */
3173 static inline gimple
3174 find_insert_point (gimple stmt
, tree rhs1
, tree rhs2
)
3176 if (TREE_CODE (rhs1
) == SSA_NAME
3177 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
3178 stmt
= SSA_NAME_DEF_STMT (rhs1
);
3179 if (TREE_CODE (rhs2
) == SSA_NAME
3180 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
3181 stmt
= SSA_NAME_DEF_STMT (rhs2
);
3185 /* Recursively rewrite our linearized statements so that the operators
3186 match those in OPS[OPINDEX], putting the computation in rank
3187 order. Return new lhs. */
3190 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
3191 vec
<operand_entry_t
> ops
, bool changed
)
3193 tree rhs1
= gimple_assign_rhs1 (stmt
);
3194 tree rhs2
= gimple_assign_rhs2 (stmt
);
3195 tree lhs
= gimple_assign_lhs (stmt
);
3198 /* The final recursion case for this function is that you have
3199 exactly two operations left.
3200 If we had one exactly one op in the entire list to start with, we
3201 would have never called this function, and the tail recursion
3202 rewrites them one at a time. */
3203 if (opindex
+ 2 == ops
.length ())
3205 operand_entry_t oe1
, oe2
;
3208 oe2
= ops
[opindex
+ 1];
3210 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
3212 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3213 unsigned int uid
= gimple_uid (stmt
);
3215 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3217 fprintf (dump_file
, "Transforming ");
3218 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3223 gimple insert_point
= find_insert_point (stmt
, oe1
->op
, oe2
->op
);
3224 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3226 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
3227 lhs
, oe1
->op
, oe2
->op
);
3228 gimple_set_uid (stmt
, uid
);
3229 gimple_set_visited (stmt
, true);
3230 if (insert_point
== gsi_stmt (gsi
))
3231 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3233 insert_stmt_after (stmt
, insert_point
);
3237 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
)
3239 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
3240 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
3244 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
3245 remove_visited_stmt_chain (rhs1
);
3247 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3249 fprintf (dump_file
, " into ");
3250 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3256 /* If we hit here, we should have 3 or more ops left. */
3257 gcc_assert (opindex
+ 2 < ops
.length ());
3259 /* Rewrite the next operator. */
3262 /* Recurse on the LHS of the binary operator, which is guaranteed to
3263 be the non-leaf side. */
3265 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
,
3266 changed
|| oe
->op
!= rhs2
);
3268 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
3270 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3272 fprintf (dump_file
, "Transforming ");
3273 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3276 /* If changed is false, this is either opindex == 0
3277 or all outer rhs2's were equal to corresponding oe->op,
3278 and powi_result is NULL.
3279 That means lhs is equivalent before and after reassociation.
3280 Otherwise ensure the old lhs SSA_NAME is not reused and
3281 create a new stmt as well, so that any debug stmts will be
3282 properly adjusted. */
3285 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3286 unsigned int uid
= gimple_uid (stmt
);
3287 gimple insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
);
3289 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3290 stmt
= gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
),
3291 lhs
, new_rhs1
, oe
->op
);
3292 gimple_set_uid (stmt
, uid
);
3293 gimple_set_visited (stmt
, true);
3294 if (insert_point
== gsi_stmt (gsi
))
3295 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
3297 insert_stmt_after (stmt
, insert_point
);
3301 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
)
3303 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
3304 gimple_assign_set_rhs2 (stmt
, oe
->op
);
3308 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3310 fprintf (dump_file
, " into ");
3311 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3317 /* Find out how many cycles we need to compute statements chain.
3318 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3319 maximum number of independent statements we may execute per cycle. */
3322 get_required_cycles (int ops_num
, int cpu_width
)
3328 /* While we have more than 2 * cpu_width operands
3329 we may reduce number of operands by cpu_width
3331 res
= ops_num
/ (2 * cpu_width
);
3333 /* Remained operands count may be reduced twice per cycle
3334 until we have only one operand. */
3335 rest
= (unsigned)(ops_num
- res
* cpu_width
);
3336 elog
= exact_log2 (rest
);
3340 res
+= floor_log2 (rest
) + 1;
3345 /* Returns an optimal number of registers to use for computation of
3346 given statements. */
3349 get_reassociation_width (int ops_num
, enum tree_code opc
,
3350 enum machine_mode mode
)
3352 int param_width
= PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH
);
3357 if (param_width
> 0)
3358 width
= param_width
;
3360 width
= targetm
.sched
.reassociation_width (opc
, mode
);
3365 /* Get the minimal time required for sequence computation. */
3366 cycles_best
= get_required_cycles (ops_num
, width
);
3368 /* Check if we may use less width and still compute sequence for
3369 the same time. It will allow us to reduce registers usage.
3370 get_required_cycles is monotonically increasing with lower width
3371 so we can perform a binary search for the minimal width that still
3372 results in the optimal cycle count. */
3374 while (width
> width_min
)
3376 int width_mid
= (width
+ width_min
) / 2;
3378 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
3380 else if (width_min
< width_mid
)
3381 width_min
= width_mid
;
3389 /* Recursively rewrite our linearized statements so that the operators
3390 match those in OPS[OPINDEX], putting the computation in rank
3391 order and trying to allow operations to be executed in
3395 rewrite_expr_tree_parallel (gimple stmt
, int width
,
3396 vec
<operand_entry_t
> ops
)
3398 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
3399 int op_num
= ops
.length ();
3400 int stmt_num
= op_num
- 1;
3401 gimple
*stmts
= XALLOCAVEC (gimple
, stmt_num
);
3402 int op_index
= op_num
- 1;
3404 int ready_stmts_end
= 0;
3406 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
3408 /* We start expression rewriting from the top statements.
3409 So, in this loop we create a full list of statements
3410 we will work with. */
3411 stmts
[stmt_num
- 1] = stmt
;
3412 for (i
= stmt_num
- 2; i
>= 0; i
--)
3413 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
3415 for (i
= 0; i
< stmt_num
; i
++)
3419 /* Determine whether we should use results of
3420 already handled statements or not. */
3421 if (ready_stmts_end
== 0
3422 && (i
- stmt_index
>= width
|| op_index
< 1))
3423 ready_stmts_end
= i
;
3425 /* Now we choose operands for the next statement. Non zero
3426 value in ready_stmts_end means here that we should use
3427 the result of already generated statements as new operand. */
3428 if (ready_stmts_end
> 0)
3430 op1
= gimple_assign_lhs (stmts
[stmt_index
++]);
3431 if (ready_stmts_end
> stmt_index
)
3432 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3433 else if (op_index
>= 0)
3434 op2
= ops
[op_index
--]->op
;
3437 gcc_assert (stmt_index
< i
);
3438 op2
= gimple_assign_lhs (stmts
[stmt_index
++]);
3441 if (stmt_index
>= ready_stmts_end
)
3442 ready_stmts_end
= 0;
3447 swap_ops_for_binary_stmt (ops
, op_index
- 2, NULL
);
3448 op2
= ops
[op_index
--]->op
;
3449 op1
= ops
[op_index
--]->op
;
3452 /* If we emit the last statement then we should put
3453 operands into the last statement. It will also
3455 if (op_index
< 0 && stmt_index
== i
)
3458 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3460 fprintf (dump_file
, "Transforming ");
3461 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3464 /* We keep original statement only for the last one. All
3465 others are recreated. */
3466 if (i
== stmt_num
- 1)
3468 gimple_assign_set_rhs1 (stmts
[i
], op1
);
3469 gimple_assign_set_rhs2 (stmts
[i
], op2
);
3470 update_stmt (stmts
[i
]);
3473 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
), op1
, op2
, opcode
);
3475 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3477 fprintf (dump_file
, " into ");
3478 print_gimple_stmt (dump_file
, stmts
[i
], 0, 0);
3482 remove_visited_stmt_chain (last_rhs1
);
3485 /* Transform STMT, which is really (A +B) + (C + D) into the left
3486 linear form, ((A+B)+C)+D.
3487 Recurse on D if necessary. */
3490 linearize_expr (gimple stmt
)
3492 gimple_stmt_iterator gsi
;
3493 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
3494 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3495 gimple oldbinrhs
= binrhs
;
3496 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3497 gimple newbinrhs
= NULL
;
3498 struct loop
*loop
= loop_containing_stmt (stmt
);
3499 tree lhs
= gimple_assign_lhs (stmt
);
3501 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
3502 && is_reassociable_op (binrhs
, rhscode
, loop
));
3504 gsi
= gsi_for_stmt (stmt
);
3506 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
3507 binrhs
= gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs
),
3508 make_ssa_name (TREE_TYPE (lhs
), NULL
),
3509 gimple_assign_lhs (binlhs
),
3510 gimple_assign_rhs2 (binrhs
));
3511 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
3512 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
3513 gimple_set_uid (binrhs
, gimple_uid (stmt
));
3515 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
3516 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
3518 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3520 fprintf (dump_file
, "Linearized: ");
3521 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3524 reassociate_stats
.linearized
++;
3527 gsi
= gsi_for_stmt (oldbinrhs
);
3528 reassoc_remove_stmt (&gsi
);
3529 release_defs (oldbinrhs
);
3531 gimple_set_visited (stmt
, true);
3532 gimple_set_visited (binlhs
, true);
3533 gimple_set_visited (binrhs
, true);
3535 /* Tail recurse on the new rhs if it still needs reassociation. */
3536 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
3537 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3538 want to change the algorithm while converting to tuples. */
3539 linearize_expr (stmt
);
3542 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3543 it. Otherwise, return NULL. */
3546 get_single_immediate_use (tree lhs
)
3548 use_operand_p immuse
;
3551 if (TREE_CODE (lhs
) == SSA_NAME
3552 && single_imm_use (lhs
, &immuse
, &immusestmt
)
3553 && is_gimple_assign (immusestmt
))
3559 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3560 representing the negated value. Insertions of any necessary
3561 instructions go before GSI.
3562 This function is recursive in that, if you hand it "a_5" as the
3563 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3564 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3567 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
3569 gimple negatedefstmt
= NULL
;
3570 tree resultofnegate
;
3571 gimple_stmt_iterator gsi
;
3574 /* If we are trying to negate a name, defined by an add, negate the
3575 add operands instead. */
3576 if (TREE_CODE (tonegate
) == SSA_NAME
)
3577 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
3578 if (TREE_CODE (tonegate
) == SSA_NAME
3579 && is_gimple_assign (negatedefstmt
)
3580 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
3581 && has_single_use (gimple_assign_lhs (negatedefstmt
))
3582 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
3584 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
3585 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
3586 tree lhs
= gimple_assign_lhs (negatedefstmt
);
3589 gsi
= gsi_for_stmt (negatedefstmt
);
3590 rhs1
= negate_value (rhs1
, &gsi
);
3592 gsi
= gsi_for_stmt (negatedefstmt
);
3593 rhs2
= negate_value (rhs2
, &gsi
);
3595 gsi
= gsi_for_stmt (negatedefstmt
);
3596 lhs
= make_ssa_name (TREE_TYPE (lhs
), NULL
);
3597 gimple_set_visited (negatedefstmt
, true);
3598 g
= gimple_build_assign_with_ops (PLUS_EXPR
, lhs
, rhs1
, rhs2
);
3599 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
3600 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
3604 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
3605 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
3606 NULL_TREE
, true, GSI_SAME_STMT
);
3608 uid
= gimple_uid (gsi_stmt (gsi
));
3609 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
3611 gimple stmt
= gsi_stmt (gsi
);
3612 if (gimple_uid (stmt
) != 0)
3614 gimple_set_uid (stmt
, uid
);
3616 return resultofnegate
;
3619 /* Return true if we should break up the subtract in STMT into an add
3620 with negate. This is true when we the subtract operands are really
3621 adds, or the subtract itself is used in an add expression. In
3622 either case, breaking up the subtract into an add with negate
3623 exposes the adds to reassociation. */
3626 should_break_up_subtract (gimple stmt
)
3628 tree lhs
= gimple_assign_lhs (stmt
);
3629 tree binlhs
= gimple_assign_rhs1 (stmt
);
3630 tree binrhs
= gimple_assign_rhs2 (stmt
);
3632 struct loop
*loop
= loop_containing_stmt (stmt
);
3634 if (TREE_CODE (binlhs
) == SSA_NAME
3635 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
3638 if (TREE_CODE (binrhs
) == SSA_NAME
3639 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
3642 if (TREE_CODE (lhs
) == SSA_NAME
3643 && (immusestmt
= get_single_immediate_use (lhs
))
3644 && is_gimple_assign (immusestmt
)
3645 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
3646 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
3651 /* Transform STMT from A - B into A + -B. */
3654 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
3656 tree rhs1
= gimple_assign_rhs1 (stmt
);
3657 tree rhs2
= gimple_assign_rhs2 (stmt
);
3659 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3661 fprintf (dump_file
, "Breaking up subtract ");
3662 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3665 rhs2
= negate_value (rhs2
, gsip
);
3666 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
3670 /* Determine whether STMT is a builtin call that raises an SSA name
3671 to an integer power and has only one use. If so, and this is early
3672 reassociation and unsafe math optimizations are permitted, place
3673 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3674 If any of these conditions does not hold, return FALSE. */
3677 acceptable_pow_call (gimple stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
3680 REAL_VALUE_TYPE c
, cint
;
3682 if (!first_pass_instance
3683 || !flag_unsafe_math_optimizations
3684 || !is_gimple_call (stmt
)
3685 || !has_single_use (gimple_call_lhs (stmt
)))
3688 fndecl
= gimple_call_fndecl (stmt
);
3691 || DECL_BUILT_IN_CLASS (fndecl
) != BUILT_IN_NORMAL
)
3694 switch (DECL_FUNCTION_CODE (fndecl
))
3696 CASE_FLT_FN (BUILT_IN_POW
):
3697 *base
= gimple_call_arg (stmt
, 0);
3698 arg1
= gimple_call_arg (stmt
, 1);
3700 if (TREE_CODE (arg1
) != REAL_CST
)
3703 c
= TREE_REAL_CST (arg1
);
3705 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
3708 *exponent
= real_to_integer (&c
);
3709 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
3710 if (!real_identical (&c
, &cint
))
3715 CASE_FLT_FN (BUILT_IN_POWI
):
3716 *base
= gimple_call_arg (stmt
, 0);
3717 arg1
= gimple_call_arg (stmt
, 1);
3719 if (!tree_fits_shwi_p (arg1
))
3722 *exponent
= tree_to_shwi (arg1
);
3729 /* Expanding negative exponents is generally unproductive, so we don't
3730 complicate matters with those. Exponents of zero and one should
3731 have been handled by expression folding. */
3732 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
3738 /* Recursively linearize a binary expression that is the RHS of STMT.
3739 Place the operands of the expression tree in the vector named OPS. */
3742 linearize_expr_tree (vec
<operand_entry_t
> *ops
, gimple stmt
,
3743 bool is_associative
, bool set_visited
)
3745 tree binlhs
= gimple_assign_rhs1 (stmt
);
3746 tree binrhs
= gimple_assign_rhs2 (stmt
);
3747 gimple binlhsdef
= NULL
, binrhsdef
= NULL
;
3748 bool binlhsisreassoc
= false;
3749 bool binrhsisreassoc
= false;
3750 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
3751 struct loop
*loop
= loop_containing_stmt (stmt
);
3752 tree base
= NULL_TREE
;
3753 HOST_WIDE_INT exponent
= 0;
3756 gimple_set_visited (stmt
, true);
3758 if (TREE_CODE (binlhs
) == SSA_NAME
)
3760 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
3761 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
3762 && !stmt_could_throw_p (binlhsdef
));
3765 if (TREE_CODE (binrhs
) == SSA_NAME
)
3767 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
3768 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
3769 && !stmt_could_throw_p (binrhsdef
));
3772 /* If the LHS is not reassociable, but the RHS is, we need to swap
3773 them. If neither is reassociable, there is nothing we can do, so
3774 just put them in the ops vector. If the LHS is reassociable,
3775 linearize it. If both are reassociable, then linearize the RHS
3778 if (!binlhsisreassoc
)
3782 /* If this is not a associative operation like division, give up. */
3783 if (!is_associative
)
3785 add_to_ops_vec (ops
, binrhs
);
3789 if (!binrhsisreassoc
)
3791 if (rhscode
== MULT_EXPR
3792 && TREE_CODE (binrhs
) == SSA_NAME
3793 && acceptable_pow_call (binrhsdef
, &base
, &exponent
))
3795 add_repeat_to_ops_vec (ops
, base
, exponent
);
3796 gimple_set_visited (binrhsdef
, true);
3799 add_to_ops_vec (ops
, binrhs
);
3801 if (rhscode
== MULT_EXPR
3802 && TREE_CODE (binlhs
) == SSA_NAME
3803 && acceptable_pow_call (binlhsdef
, &base
, &exponent
))
3805 add_repeat_to_ops_vec (ops
, base
, exponent
);
3806 gimple_set_visited (binlhsdef
, true);
3809 add_to_ops_vec (ops
, binlhs
);
3814 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3816 fprintf (dump_file
, "swapping operands of ");
3817 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3820 swap_ssa_operands (stmt
,
3821 gimple_assign_rhs1_ptr (stmt
),
3822 gimple_assign_rhs2_ptr (stmt
));
3825 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3827 fprintf (dump_file
, " is now ");
3828 print_gimple_stmt (dump_file
, stmt
, 0, 0);
3831 /* We want to make it so the lhs is always the reassociative op,
3837 else if (binrhsisreassoc
)
3839 linearize_expr (stmt
);
3840 binlhs
= gimple_assign_rhs1 (stmt
);
3841 binrhs
= gimple_assign_rhs2 (stmt
);
3844 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
3845 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
3847 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
3848 is_associative
, set_visited
);
3850 if (rhscode
== MULT_EXPR
3851 && TREE_CODE (binrhs
) == SSA_NAME
3852 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs
), &base
, &exponent
))
3854 add_repeat_to_ops_vec (ops
, base
, exponent
);
3855 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs
), true);
3858 add_to_ops_vec (ops
, binrhs
);
3861 /* Repropagate the negates back into subtracts, since no other pass
3862 currently does it. */
3865 repropagate_negates (void)
3870 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
3872 gimple user
= get_single_immediate_use (negate
);
3874 if (!user
|| !is_gimple_assign (user
))
3877 /* The negate operand can be either operand of a PLUS_EXPR
3878 (it can be the LHS if the RHS is a constant for example).
3880 Force the negate operand to the RHS of the PLUS_EXPR, then
3881 transform the PLUS_EXPR into a MINUS_EXPR. */
3882 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
3884 /* If the negated operand appears on the LHS of the
3885 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3886 to force the negated operand to the RHS of the PLUS_EXPR. */
3887 if (gimple_assign_rhs1 (user
) == negate
)
3889 swap_ssa_operands (user
,
3890 gimple_assign_rhs1_ptr (user
),
3891 gimple_assign_rhs2_ptr (user
));
3894 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3895 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3896 if (gimple_assign_rhs2 (user
) == negate
)
3898 tree rhs1
= gimple_assign_rhs1 (user
);
3899 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
3900 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
3901 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
3905 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
3907 if (gimple_assign_rhs1 (user
) == negate
)
3912 which we transform into
3915 This pushes down the negate which we possibly can merge
3916 into some other operation, hence insert it into the
3917 plus_negates vector. */
3918 gimple feed
= SSA_NAME_DEF_STMT (negate
);
3919 tree a
= gimple_assign_rhs1 (feed
);
3920 tree b
= gimple_assign_rhs2 (user
);
3921 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
3922 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
3923 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)), NULL
);
3924 gimple g
= gimple_build_assign_with_ops (PLUS_EXPR
, x
, a
, b
);
3925 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
3926 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
, NULL
);
3927 user
= gsi_stmt (gsi2
);
3929 reassoc_remove_stmt (&gsi
);
3930 release_defs (feed
);
3931 plus_negates
.safe_push (gimple_assign_lhs (user
));
3935 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3936 rid of one operation. */
3937 gimple feed
= SSA_NAME_DEF_STMT (negate
);
3938 tree a
= gimple_assign_rhs1 (feed
);
3939 tree rhs1
= gimple_assign_rhs1 (user
);
3940 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
3941 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
3942 update_stmt (gsi_stmt (gsi
));
3948 /* Returns true if OP is of a type for which we can do reassociation.
3949 That is for integral or non-saturating fixed-point types, and for
3950 floating point type when associative-math is enabled. */
3953 can_reassociate_p (tree op
)
3955 tree type
= TREE_TYPE (op
);
3956 if ((INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
3957 || NON_SAT_FIXED_POINT_TYPE_P (type
)
3958 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
3963 /* Break up subtract operations in block BB.
3965 We do this top down because we don't know whether the subtract is
3966 part of a possible chain of reassociation except at the top.
3975 we want to break up k = t - q, but we won't until we've transformed q
3976 = b - r, which won't be broken up until we transform b = c - d.
3978 En passant, clear the GIMPLE visited flag on every statement
3979 and set UIDs within each basic block. */
3982 break_up_subtract_bb (basic_block bb
)
3984 gimple_stmt_iterator gsi
;
3986 unsigned int uid
= 1;
3988 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3990 gimple stmt
= gsi_stmt (gsi
);
3991 gimple_set_visited (stmt
, false);
3992 gimple_set_uid (stmt
, uid
++);
3994 if (!is_gimple_assign (stmt
)
3995 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
3998 /* Look for simple gimple subtract operations. */
3999 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
4001 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
4002 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
4005 /* Check for a subtract used only in an addition. If this
4006 is the case, transform it into add of a negate for better
4007 reassociation. IE transform C = A-B into C = A + -B if C
4008 is only used in an addition. */
4009 if (should_break_up_subtract (stmt
))
4010 break_up_subtract (stmt
, &gsi
);
4012 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
4013 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
4014 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
4016 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
4018 son
= next_dom_son (CDI_DOMINATORS
, son
))
4019 break_up_subtract_bb (son
);
4022 /* Used for repeated factor analysis. */
4023 struct repeat_factor_d
4025 /* An SSA name that occurs in a multiply chain. */
4028 /* Cached rank of the factor. */
4031 /* Number of occurrences of the factor in the chain. */
4032 HOST_WIDE_INT count
;
4034 /* An SSA name representing the product of this factor and
4035 all factors appearing later in the repeated factor vector. */
4039 typedef struct repeat_factor_d repeat_factor
, *repeat_factor_t
;
4040 typedef const struct repeat_factor_d
*const_repeat_factor_t
;
4043 static vec
<repeat_factor
> repeat_factor_vec
;
4045 /* Used for sorting the repeat factor vector. Sort primarily by
4046 ascending occurrence count, secondarily by descending rank. */
4049 compare_repeat_factors (const void *x1
, const void *x2
)
4051 const_repeat_factor_t rf1
= (const_repeat_factor_t
) x1
;
4052 const_repeat_factor_t rf2
= (const_repeat_factor_t
) x2
;
4054 if (rf1
->count
!= rf2
->count
)
4055 return rf1
->count
- rf2
->count
;
4057 return rf2
->rank
- rf1
->rank
;
4060 /* Look for repeated operands in OPS in the multiply tree rooted at
4061 STMT. Replace them with an optimal sequence of multiplies and powi
4062 builtin calls, and remove the used operands from OPS. Return an
4063 SSA name representing the value of the replacement sequence. */
4066 attempt_builtin_powi (gimple stmt
, vec
<operand_entry_t
> *ops
)
4068 unsigned i
, j
, vec_len
;
4071 repeat_factor_t rf1
, rf2
;
4072 repeat_factor rfnew
;
4073 tree result
= NULL_TREE
;
4074 tree target_ssa
, iter_result
;
4075 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
4076 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
4077 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4078 gimple mul_stmt
, pow_stmt
;
4080 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4085 /* Allocate the repeated factor vector. */
4086 repeat_factor_vec
.create (10);
4088 /* Scan the OPS vector for all SSA names in the product and build
4089 up a vector of occurrence counts for each factor. */
4090 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4092 if (TREE_CODE (oe
->op
) == SSA_NAME
)
4094 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4096 if (rf1
->factor
== oe
->op
)
4098 rf1
->count
+= oe
->count
;
4103 if (j
>= repeat_factor_vec
.length ())
4105 rfnew
.factor
= oe
->op
;
4106 rfnew
.rank
= oe
->rank
;
4107 rfnew
.count
= oe
->count
;
4108 rfnew
.repr
= NULL_TREE
;
4109 repeat_factor_vec
.safe_push (rfnew
);
4114 /* Sort the repeated factor vector by (a) increasing occurrence count,
4115 and (b) decreasing rank. */
4116 repeat_factor_vec
.qsort (compare_repeat_factors
);
4118 /* It is generally best to combine as many base factors as possible
4119 into a product before applying __builtin_powi to the result.
4120 However, the sort order chosen for the repeated factor vector
4121 allows us to cache partial results for the product of the base
4122 factors for subsequent use. When we already have a cached partial
4123 result from a previous iteration, it is best to make use of it
4124 before looking for another __builtin_pow opportunity.
4126 As an example, consider x * x * y * y * y * z * z * z * z.
4127 We want to first compose the product x * y * z, raise it to the
4128 second power, then multiply this by y * z, and finally multiply
4129 by z. This can be done in 5 multiplies provided we cache y * z
4130 for use in both expressions:
4138 If we instead ignored the cached y * z and first multiplied by
4139 the __builtin_pow opportunity z * z, we would get the inferior:
4148 vec_len
= repeat_factor_vec
.length ();
4150 /* Repeatedly look for opportunities to create a builtin_powi call. */
4153 HOST_WIDE_INT power
;
4155 /* First look for the largest cached product of factors from
4156 preceding iterations. If found, create a builtin_powi for
4157 it if the minimum occurrence count for its factors is at
4158 least 2, or just use this cached product as our next
4159 multiplicand if the minimum occurrence count is 1. */
4160 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4162 if (rf1
->repr
&& rf1
->count
> 0)
4172 iter_result
= rf1
->repr
;
4174 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4178 fputs ("Multiplying by cached product ", dump_file
);
4179 for (elt
= j
; elt
< vec_len
; elt
++)
4181 rf
= &repeat_factor_vec
[elt
];
4182 print_generic_expr (dump_file
, rf
->factor
, 0);
4183 if (elt
< vec_len
- 1)
4184 fputs (" * ", dump_file
);
4186 fputs ("\n", dump_file
);
4191 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4192 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4193 build_int_cst (integer_type_node
,
4195 gimple_call_set_lhs (pow_stmt
, iter_result
);
4196 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4197 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4199 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4203 fputs ("Building __builtin_pow call for cached product (",
4205 for (elt
= j
; elt
< vec_len
; elt
++)
4207 rf
= &repeat_factor_vec
[elt
];
4208 print_generic_expr (dump_file
, rf
->factor
, 0);
4209 if (elt
< vec_len
- 1)
4210 fputs (" * ", dump_file
);
4212 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n",
4219 /* Otherwise, find the first factor in the repeated factor
4220 vector whose occurrence count is at least 2. If no such
4221 factor exists, there are no builtin_powi opportunities
4223 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
4225 if (rf1
->count
>= 2)
4234 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4238 fputs ("Building __builtin_pow call for (", dump_file
);
4239 for (elt
= j
; elt
< vec_len
; elt
++)
4241 rf
= &repeat_factor_vec
[elt
];
4242 print_generic_expr (dump_file
, rf
->factor
, 0);
4243 if (elt
< vec_len
- 1)
4244 fputs (" * ", dump_file
);
4246 fprintf (dump_file
, ")^"HOST_WIDE_INT_PRINT_DEC
"\n", power
);
4249 reassociate_stats
.pows_created
++;
4251 /* Visit each element of the vector in reverse order (so that
4252 high-occurrence elements are visited first, and within the
4253 same occurrence count, lower-ranked elements are visited
4254 first). Form a linear product of all elements in this order
4255 whose occurrencce count is at least that of element J.
4256 Record the SSA name representing the product of each element
4257 with all subsequent elements in the vector. */
4258 if (j
== vec_len
- 1)
4259 rf1
->repr
= rf1
->factor
;
4262 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
4266 rf1
= &repeat_factor_vec
[ii
];
4267 rf2
= &repeat_factor_vec
[ii
+ 1];
4269 /* Init the last factor's representative to be itself. */
4271 rf2
->repr
= rf2
->factor
;
4276 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4277 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
,
4280 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4281 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4282 rf1
->repr
= target_ssa
;
4284 /* Don't reprocess the multiply we just introduced. */
4285 gimple_set_visited (mul_stmt
, true);
4289 /* Form a call to __builtin_powi for the maximum product
4290 just formed, raised to the power obtained earlier. */
4291 rf1
= &repeat_factor_vec
[j
];
4292 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4293 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
4294 build_int_cst (integer_type_node
,
4296 gimple_call_set_lhs (pow_stmt
, iter_result
);
4297 gimple_set_location (pow_stmt
, gimple_location (stmt
));
4298 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
4301 /* If we previously formed at least one other builtin_powi call,
4302 form the product of this one and those others. */
4305 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
4306 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, new_result
,
4307 result
, iter_result
);
4308 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4309 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
4310 gimple_set_visited (mul_stmt
, true);
4311 result
= new_result
;
4314 result
= iter_result
;
4316 /* Decrement the occurrence count of each element in the product
4317 by the count found above, and remove this many copies of each
4319 for (i
= j
; i
< vec_len
; i
++)
4324 rf1
= &repeat_factor_vec
[i
];
4325 rf1
->count
-= power
;
4327 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
4329 if (oe
->op
== rf1
->factor
)
4333 ops
->ordered_remove (n
);
4349 /* At this point all elements in the repeated factor vector have a
4350 remaining occurrence count of 0 or 1, and those with a count of 1
4351 don't have cached representatives. Re-sort the ops vector and
4353 ops
->qsort (sort_by_operand_rank
);
4354 repeat_factor_vec
.release ();
4356 /* Return the final product computed herein. Note that there may
4357 still be some elements with single occurrence count left in OPS;
4358 those will be handled by the normal reassociation logic. */
4362 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4365 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple stmt
, tree new_rhs
)
4369 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4371 fprintf (dump_file
, "Transforming ");
4372 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4375 rhs1
= gimple_assign_rhs1 (stmt
);
4376 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
4378 remove_visited_stmt_chain (rhs1
);
4380 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4382 fprintf (dump_file
, " into ");
4383 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4387 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4390 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple stmt
,
4391 tree rhs1
, tree rhs2
)
4393 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4395 fprintf (dump_file
, "Transforming ");
4396 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4399 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
4400 update_stmt (gsi_stmt (*gsi
));
4401 remove_visited_stmt_chain (rhs1
);
4403 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4405 fprintf (dump_file
, " into ");
4406 print_gimple_stmt (dump_file
, stmt
, 0, 0);
4410 /* Reassociate expressions in basic block BB and its post-dominator as
4414 reassociate_bb (basic_block bb
)
4416 gimple_stmt_iterator gsi
;
4418 gimple stmt
= last_stmt (bb
);
4420 if (stmt
&& !gimple_visited_p (stmt
))
4421 maybe_optimize_range_tests (stmt
);
4423 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
4425 stmt
= gsi_stmt (gsi
);
4427 if (is_gimple_assign (stmt
)
4428 && !stmt_could_throw_p (stmt
))
4430 tree lhs
, rhs1
, rhs2
;
4431 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
4433 /* If this is not a gimple binary expression, there is
4434 nothing for us to do with it. */
4435 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
4438 /* If this was part of an already processed statement,
4439 we don't need to touch it again. */
4440 if (gimple_visited_p (stmt
))
4442 /* This statement might have become dead because of previous
4444 if (has_zero_uses (gimple_get_lhs (stmt
)))
4446 reassoc_remove_stmt (&gsi
);
4447 release_defs (stmt
);
4448 /* We might end up removing the last stmt above which
4449 places the iterator to the end of the sequence.
4450 Reset it to the last stmt in this case which might
4451 be the end of the sequence as well if we removed
4452 the last statement of the sequence. In which case
4453 we need to bail out. */
4454 if (gsi_end_p (gsi
))
4456 gsi
= gsi_last_bb (bb
);
4457 if (gsi_end_p (gsi
))
4464 lhs
= gimple_assign_lhs (stmt
);
4465 rhs1
= gimple_assign_rhs1 (stmt
);
4466 rhs2
= gimple_assign_rhs2 (stmt
);
4468 /* For non-bit or min/max operations we can't associate
4469 all types. Verify that here. */
4470 if (rhs_code
!= BIT_IOR_EXPR
4471 && rhs_code
!= BIT_AND_EXPR
4472 && rhs_code
!= BIT_XOR_EXPR
4473 && rhs_code
!= MIN_EXPR
4474 && rhs_code
!= MAX_EXPR
4475 && (!can_reassociate_p (lhs
)
4476 || !can_reassociate_p (rhs1
)
4477 || !can_reassociate_p (rhs2
)))
4480 if (associative_tree_code (rhs_code
))
4482 auto_vec
<operand_entry_t
> ops
;
4483 tree powi_result
= NULL_TREE
;
4485 /* There may be no immediate uses left by the time we
4486 get here because we may have eliminated them all. */
4487 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
4490 gimple_set_visited (stmt
, true);
4491 linearize_expr_tree (&ops
, stmt
, true, true);
4492 ops
.qsort (sort_by_operand_rank
);
4493 optimize_ops_list (rhs_code
, &ops
);
4494 if (undistribute_ops_list (rhs_code
, &ops
,
4495 loop_containing_stmt (stmt
)))
4497 ops
.qsort (sort_by_operand_rank
);
4498 optimize_ops_list (rhs_code
, &ops
);
4501 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
4502 optimize_range_tests (rhs_code
, &ops
);
4504 if (first_pass_instance
4505 && rhs_code
== MULT_EXPR
4506 && flag_unsafe_math_optimizations
)
4507 powi_result
= attempt_builtin_powi (stmt
, &ops
);
4509 /* If the operand vector is now empty, all operands were
4510 consumed by the __builtin_powi optimization. */
4511 if (ops
.length () == 0)
4512 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
4513 else if (ops
.length () == 1)
4515 tree last_op
= ops
.last ()->op
;
4518 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
4521 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
4525 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
4526 int ops_num
= ops
.length ();
4527 int width
= get_reassociation_width (ops_num
, rhs_code
, mode
);
4530 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4532 "Width = %d was chosen for reassociation\n", width
);
4535 && ops
.length () > 3)
4536 rewrite_expr_tree_parallel (stmt
, width
, ops
);
4539 /* When there are three operands left, we want
4540 to make sure the ones that get the double
4541 binary op are chosen wisely. */
4542 int len
= ops
.length ();
4544 swap_ops_for_binary_stmt (ops
, len
- 3, stmt
);
4546 new_lhs
= rewrite_expr_tree (stmt
, 0, ops
,
4547 powi_result
!= NULL
);
4550 /* If we combined some repeated factors into a
4551 __builtin_powi call, multiply that result by the
4552 reassociated operands. */
4555 gimple mul_stmt
, lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
4556 tree type
= TREE_TYPE (lhs
);
4557 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
4559 gimple_set_lhs (lhs_stmt
, target_ssa
);
4560 update_stmt (lhs_stmt
);
4562 target_ssa
= new_lhs
;
4563 mul_stmt
= gimple_build_assign_with_ops (MULT_EXPR
, lhs
,
4566 gimple_set_location (mul_stmt
, gimple_location (stmt
));
4567 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
4573 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
4575 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
4576 reassociate_bb (son
);
4579 void dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
);
4580 void debug_ops_vector (vec
<operand_entry_t
> ops
);
4582 /* Dump the operand entry vector OPS to FILE. */
4585 dump_ops_vector (FILE *file
, vec
<operand_entry_t
> ops
)
4590 FOR_EACH_VEC_ELT (ops
, i
, oe
)
4592 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
4593 print_generic_expr (file
, oe
->op
, 0);
4597 /* Dump the operand entry vector OPS to STDERR. */
4600 debug_ops_vector (vec
<operand_entry_t
> ops
)
4602 dump_ops_vector (stderr
, ops
);
4608 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
4609 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun
));
4612 /* Initialize the reassociation pass. */
4619 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
4621 /* Find the loops, so that we can prevent moving calculations in
4623 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
4625 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
4627 operand_entry_pool
= create_alloc_pool ("operand entry pool",
4628 sizeof (struct operand_entry
), 30);
4629 next_operand_entry_id
= 0;
4631 /* Reverse RPO (Reverse Post Order) will give us something where
4632 deeper loops come later. */
4633 pre_and_rev_post_order_compute (NULL
, bbs
, false);
4634 bb_rank
= XCNEWVEC (long, last_basic_block_for_fn (cfun
));
4635 operand_rank
= new hash_map
<tree
, long>;
4637 /* Give each default definition a distinct rank. This includes
4638 parameters and the static chain. Walk backwards over all
4639 SSA names so that we get proper rank ordering according
4640 to tree_swap_operands_p. */
4641 for (i
= num_ssa_names
- 1; i
> 0; --i
)
4643 tree name
= ssa_name (i
);
4644 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
4645 insert_operand_rank (name
, ++rank
);
4648 /* Set up rank for each BB */
4649 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
4650 bb_rank
[bbs
[i
]] = ++rank
<< 16;
4653 calculate_dominance_info (CDI_POST_DOMINATORS
);
4654 plus_negates
= vNULL
;
4657 /* Cleanup after the reassociation pass, and print stats if
4663 statistics_counter_event (cfun
, "Linearized",
4664 reassociate_stats
.linearized
);
4665 statistics_counter_event (cfun
, "Constants eliminated",
4666 reassociate_stats
.constants_eliminated
);
4667 statistics_counter_event (cfun
, "Ops eliminated",
4668 reassociate_stats
.ops_eliminated
);
4669 statistics_counter_event (cfun
, "Statements rewritten",
4670 reassociate_stats
.rewritten
);
4671 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
4672 reassociate_stats
.pows_encountered
);
4673 statistics_counter_event (cfun
, "Built-in powi calls created",
4674 reassociate_stats
.pows_created
);
4676 delete operand_rank
;
4677 free_alloc_pool (operand_entry_pool
);
4679 plus_negates
.release ();
4680 free_dominance_info (CDI_POST_DOMINATORS
);
4681 loop_optimizer_finalize ();
4684 /* Gate and execute functions for Reassociation. */
4687 execute_reassoc (void)
4692 repropagate_negates ();
4700 const pass_data pass_data_reassoc
=
4702 GIMPLE_PASS
, /* type */
4703 "reassoc", /* name */
4704 OPTGROUP_NONE
, /* optinfo_flags */
4705 TV_TREE_REASSOC
, /* tv_id */
4706 ( PROP_cfg
| PROP_ssa
), /* properties_required */
4707 0, /* properties_provided */
4708 0, /* properties_destroyed */
4709 0, /* todo_flags_start */
4710 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
4713 class pass_reassoc
: public gimple_opt_pass
4716 pass_reassoc (gcc::context
*ctxt
)
4717 : gimple_opt_pass (pass_data_reassoc
, ctxt
)
4720 /* opt_pass methods: */
4721 opt_pass
* clone () { return new pass_reassoc (m_ctxt
); }
4722 virtual bool gate (function
*) { return flag_tree_reassoc
!= 0; }
4723 virtual unsigned int execute (function
*) { return execute_reassoc (); }
4725 }; // class pass_reassoc
4730 make_pass_reassoc (gcc::context
*ctxt
)
4732 return new pass_reassoc (ctxt
);