1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
27 #include "basic-block.h"
28 #include "diagnostic.h"
29 #include "tree-inline.h"
30 #include "tree-flow.h"
32 #include "tree-dump.h"
34 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "alloc-pool.h"
39 #include "langhooks.h"
40 #include "pointer-set.h"
44 /* This is a simple global reassociation pass. It is, in part, based
45 on the LLVM pass of the same name (They do some things more/less
46 than we do, in different orders, etc).
48 It consists of five steps:
50 1. Breaking up subtract operations into addition + negate, where
51 it would promote the reassociation of adds.
53 2. Left linearization of the expression trees, so that (A+B)+(C+D)
54 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
55 During linearization, we place the operands of the binary
56 expressions into a vector of operand_entry_t
58 3. Optimization of the operand lists, eliminating things like a +
61 4. Rewrite the expression trees we linearized and optimized so
62 they are in proper rank order.
64 5. Repropagate negates, as nothing else will clean it up ATM.
66 A bit of theory on #4, since nobody seems to write anything down
67 about why it makes sense to do it the way they do it:
69 We could do this much nicer theoretically, but don't (for reasons
70 explained after how to do it theoretically nice :P).
72 In order to promote the most redundancy elimination, you want
73 binary expressions whose operands are the same rank (or
74 preferably, the same value) exposed to the redundancy eliminator,
75 for possible elimination.
77 So the way to do this if we really cared, is to build the new op
78 tree from the leaves to the roots, merging as you go, and putting the
79 new op on the end of the worklist, until you are left with one
80 thing on the worklist.
82 IE if you have to rewrite the following set of operands (listed with
83 rank in parentheses), with opcode PLUS_EXPR:
85 a (1), b (1), c (1), d (2), e (2)
88 We start with our merge worklist empty, and the ops list with all of
91 You want to first merge all leaves of the same rank, as much as
94 So first build a binary op of
96 mergetmp = a + b, and put "mergetmp" on the merge worklist.
98 Because there is no three operand form of PLUS_EXPR, c is not going to
99 be exposed to redundancy elimination as a rank 1 operand.
101 So you might as well throw it on the merge worklist (you could also
102 consider it to now be a rank two operand, and merge it with d and e,
103 but in this case, you then have evicted e from a binary op. So at
104 least in this situation, you can't win.)
106 Then build a binary op of d + e
109 and put mergetmp2 on the merge worklist.
111 so merge worklist = {mergetmp, c, mergetmp2}
113 Continue building binary ops of these operations until you have only
114 one operation left on the worklist.
119 mergetmp3 = mergetmp + c
121 worklist = {mergetmp2, mergetmp3}
123 mergetmp4 = mergetmp2 + mergetmp3
125 worklist = {mergetmp4}
127 because we have one operation left, we can now just set the original
128 statement equal to the result of that operation.
130 This will at least expose a + b and d + e to redundancy elimination
131 as binary operations.
133 For extra points, you can reuse the old statements to build the
134 mergetmps, since you shouldn't run out.
136 So why don't we do this?
138 Because it's expensive, and rarely will help. Most trees we are
139 reassociating have 3 or less ops. If they have 2 ops, they already
140 will be written into a nice single binary op. If you have 3 ops, a
141 single simple check suffices to tell you whether the first two are of the
142 same rank. If so, you know to order it
145 newstmt = mergetmp + op3
149 newstmt = mergetmp + op1
151 If all three are of the same rank, you can't expose them all in a
152 single binary operator anyway, so the above is *still* the best you
155 Thus, this is what we do. When we have three ops left, we check to see
156 what order to put them in, and call it a day. As a nod to vector sum
157 reduction, we check if any of the ops are really a phi node that is a
158 destructive update for the associating op, and keep the destructive
159 update together for vector sum reduction recognition. */
166 int constants_eliminated
;
171 /* Operator, rank pair. */
172 typedef struct operand_entry
179 static alloc_pool operand_entry_pool
;
181 /* This is used to assign a unique ID to each struct operand_entry
182 so that qsort results are identical on different hosts. */
183 static int next_operand_entry_id
;
185 /* Starting rank number for a given basic block, so that we can rank
186 operations using unmovable instructions in that BB based on the bb
188 static long *bb_rank
;
190 /* Operand->rank hashtable. */
191 static struct pointer_map_t
*operand_rank
;
194 /* Look up the operand rank structure for expression E. */
197 find_operand_rank (tree e
)
199 void **slot
= pointer_map_contains (operand_rank
, e
);
200 return slot
? (long) (intptr_t) *slot
: -1;
203 /* Insert {E,RANK} into the operand rank hashtable. */
206 insert_operand_rank (tree e
, long rank
)
209 gcc_assert (rank
> 0);
210 slot
= pointer_map_insert (operand_rank
, e
);
212 *slot
= (void *) (intptr_t) rank
;
215 /* Given an expression E, return the rank of the expression. */
220 /* Constants have rank 0. */
221 if (is_gimple_min_invariant (e
))
224 /* SSA_NAME's have the rank of the expression they are the result
226 For globals and uninitialized values, the rank is 0.
227 For function arguments, use the pre-setup rank.
228 For PHI nodes, stores, asm statements, etc, we use the rank of
230 For simple operations, the rank is the maximum rank of any of
231 its operands, or the bb_rank, whichever is less.
232 I make no claims that this is optimal, however, it gives good
235 if (TREE_CODE (e
) == SSA_NAME
)
241 if (TREE_CODE (SSA_NAME_VAR (e
)) == PARM_DECL
242 && SSA_NAME_IS_DEFAULT_DEF (e
))
243 return find_operand_rank (e
);
245 stmt
= SSA_NAME_DEF_STMT (e
);
246 if (gimple_bb (stmt
) == NULL
)
249 if (!is_gimple_assign (stmt
)
250 || gimple_vdef (stmt
))
251 return bb_rank
[gimple_bb (stmt
)->index
];
253 /* If we already have a rank for this expression, use that. */
254 rank
= find_operand_rank (e
);
258 /* Otherwise, find the maximum rank for the operands, or the bb
259 rank, whichever is less. */
261 maxrank
= bb_rank
[gimple_bb(stmt
)->index
];
262 if (gimple_assign_single_p (stmt
))
264 tree rhs
= gimple_assign_rhs1 (stmt
);
265 n
= TREE_OPERAND_LENGTH (rhs
);
267 rank
= MAX (rank
, get_rank (rhs
));
271 i
< n
&& TREE_OPERAND (rhs
, i
) && rank
!= maxrank
; i
++)
272 rank
= MAX(rank
, get_rank (TREE_OPERAND (rhs
, i
)));
277 n
= gimple_num_ops (stmt
);
278 for (i
= 1; i
< n
&& rank
!= maxrank
; i
++)
280 gcc_assert (gimple_op (stmt
, i
));
281 rank
= MAX(rank
, get_rank (gimple_op (stmt
, i
)));
285 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
287 fprintf (dump_file
, "Rank for ");
288 print_generic_expr (dump_file
, e
, 0);
289 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
292 /* Note the rank in the hashtable so we don't recompute it. */
293 insert_operand_rank (e
, (rank
+ 1));
297 /* Globals, etc, are rank 0 */
301 DEF_VEC_P(operand_entry_t
);
302 DEF_VEC_ALLOC_P(operand_entry_t
, heap
);
304 /* We want integer ones to end up last no matter what, since they are
305 the ones we can do the most with. */
306 #define INTEGER_CONST_TYPE 1 << 3
307 #define FLOAT_CONST_TYPE 1 << 2
308 #define OTHER_CONST_TYPE 1 << 1
310 /* Classify an invariant tree into integer, float, or other, so that
311 we can sort them to be near other constants of the same type. */
313 constant_type (tree t
)
315 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
316 return INTEGER_CONST_TYPE
;
317 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
318 return FLOAT_CONST_TYPE
;
320 return OTHER_CONST_TYPE
;
323 /* qsort comparison function to sort operand entries PA and PB by rank
324 so that the sorted array is ordered by rank in decreasing order. */
326 sort_by_operand_rank (const void *pa
, const void *pb
)
328 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
329 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
331 /* It's nicer for optimize_expression if constants that are likely
332 to fold when added/multiplied//whatever are put next to each
333 other. Since all constants have rank 0, order them by type. */
334 if (oeb
->rank
== 0 && oea
->rank
== 0)
336 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
337 return constant_type (oeb
->op
) - constant_type (oea
->op
);
339 /* To make sorting result stable, we use unique IDs to determine
341 return oeb
->id
- oea
->id
;
344 /* Lastly, make sure the versions that are the same go next to each
345 other. We use SSA_NAME_VERSION because it's stable. */
346 if ((oeb
->rank
- oea
->rank
== 0)
347 && TREE_CODE (oea
->op
) == SSA_NAME
348 && TREE_CODE (oeb
->op
) == SSA_NAME
)
350 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
351 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
353 return oeb
->id
- oea
->id
;
356 if (oeb
->rank
!= oea
->rank
)
357 return oeb
->rank
- oea
->rank
;
359 return oeb
->id
- oea
->id
;
362 /* Add an operand entry to *OPS for the tree operand OP. */
365 add_to_ops_vec (VEC(operand_entry_t
, heap
) **ops
, tree op
)
367 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
370 oe
->rank
= get_rank (op
);
371 oe
->id
= next_operand_entry_id
++;
372 VEC_safe_push (operand_entry_t
, heap
, *ops
, oe
);
375 /* Return true if STMT is reassociable operation containing a binary
376 operation with tree code CODE, and is inside LOOP. */
379 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
381 basic_block bb
= gimple_bb (stmt
);
383 if (gimple_bb (stmt
) == NULL
)
386 if (!flow_bb_inside_loop_p (loop
, bb
))
389 if (is_gimple_assign (stmt
)
390 && gimple_assign_rhs_code (stmt
) == code
391 && has_single_use (gimple_assign_lhs (stmt
)))
398 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
399 operand of the negate operation. Otherwise, return NULL. */
402 get_unary_op (tree name
, enum tree_code opcode
)
404 gimple stmt
= SSA_NAME_DEF_STMT (name
);
406 if (!is_gimple_assign (stmt
))
409 if (gimple_assign_rhs_code (stmt
) == opcode
)
410 return gimple_assign_rhs1 (stmt
);
414 /* If CURR and LAST are a pair of ops that OPCODE allows us to
415 eliminate through equivalences, do so, remove them from OPS, and
416 return true. Otherwise, return false. */
419 eliminate_duplicate_pair (enum tree_code opcode
,
420 VEC (operand_entry_t
, heap
) **ops
,
423 operand_entry_t curr
,
424 operand_entry_t last
)
427 /* If we have two of the same op, and the opcode is & |, min, or max,
428 we can eliminate one of them.
429 If we have two of the same op, and the opcode is ^, we can
430 eliminate both of them. */
432 if (last
&& last
->op
== curr
->op
)
440 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
442 fprintf (dump_file
, "Equivalence: ");
443 print_generic_expr (dump_file
, curr
->op
, 0);
444 fprintf (dump_file
, " [&|minmax] ");
445 print_generic_expr (dump_file
, last
->op
, 0);
446 fprintf (dump_file
, " -> ");
447 print_generic_stmt (dump_file
, last
->op
, 0);
450 VEC_ordered_remove (operand_entry_t
, *ops
, i
);
451 reassociate_stats
.ops_eliminated
++;
456 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
458 fprintf (dump_file
, "Equivalence: ");
459 print_generic_expr (dump_file
, curr
->op
, 0);
460 fprintf (dump_file
, " ^ ");
461 print_generic_expr (dump_file
, last
->op
, 0);
462 fprintf (dump_file
, " -> nothing\n");
465 reassociate_stats
.ops_eliminated
+= 2;
467 if (VEC_length (operand_entry_t
, *ops
) == 2)
469 VEC_free (operand_entry_t
, heap
, *ops
);
471 add_to_ops_vec (ops
, fold_convert (TREE_TYPE (last
->op
),
477 VEC_ordered_remove (operand_entry_t
, *ops
, i
-1);
478 VEC_ordered_remove (operand_entry_t
, *ops
, i
-1);
490 static VEC(tree
, heap
) *plus_negates
;
492 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
493 expression, look in OPS for a corresponding positive operation to cancel
494 it out. If we find one, remove the other from OPS, replace
495 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
499 eliminate_plus_minus_pair (enum tree_code opcode
,
500 VEC (operand_entry_t
, heap
) **ops
,
501 unsigned int currindex
,
502 operand_entry_t curr
)
509 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
512 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
513 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
514 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
517 /* Any non-negated version will have a rank that is one less than
518 the current rank. So once we hit those ranks, if we don't find
521 for (i
= currindex
+ 1;
522 VEC_iterate (operand_entry_t
, *ops
, i
, oe
)
523 && oe
->rank
>= curr
->rank
- 1 ;
526 if (oe
->op
== negateop
)
529 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
531 fprintf (dump_file
, "Equivalence: ");
532 print_generic_expr (dump_file
, negateop
, 0);
533 fprintf (dump_file
, " + -");
534 print_generic_expr (dump_file
, oe
->op
, 0);
535 fprintf (dump_file
, " -> 0\n");
538 VEC_ordered_remove (operand_entry_t
, *ops
, i
);
539 add_to_ops_vec (ops
, fold_convert(TREE_TYPE (oe
->op
),
541 VEC_ordered_remove (operand_entry_t
, *ops
, currindex
);
542 reassociate_stats
.ops_eliminated
++;
546 else if (oe
->op
== notop
)
548 tree op_type
= TREE_TYPE (oe
->op
);
550 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
552 fprintf (dump_file
, "Equivalence: ");
553 print_generic_expr (dump_file
, notop
, 0);
554 fprintf (dump_file
, " + ~");
555 print_generic_expr (dump_file
, oe
->op
, 0);
556 fprintf (dump_file
, " -> -1\n");
559 VEC_ordered_remove (operand_entry_t
, *ops
, i
);
560 add_to_ops_vec (ops
, build_int_cst_type (op_type
, -1));
561 VEC_ordered_remove (operand_entry_t
, *ops
, currindex
);
562 reassociate_stats
.ops_eliminated
++;
568 /* CURR->OP is a negate expr in a plus expr: save it for later
569 inspection in repropagate_negates(). */
570 if (negateop
!= NULL_TREE
)
571 VEC_safe_push (tree
, heap
, plus_negates
, curr
->op
);
576 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
577 bitwise not expression, look in OPS for a corresponding operand to
578 cancel it out. If we find one, remove the other from OPS, replace
579 OPS[CURRINDEX] with 0, and return true. Otherwise, return
583 eliminate_not_pairs (enum tree_code opcode
,
584 VEC (operand_entry_t
, heap
) **ops
,
585 unsigned int currindex
,
586 operand_entry_t curr
)
592 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
593 || TREE_CODE (curr
->op
) != SSA_NAME
)
596 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
597 if (notop
== NULL_TREE
)
600 /* Any non-not version will have a rank that is one less than
601 the current rank. So once we hit those ranks, if we don't find
604 for (i
= currindex
+ 1;
605 VEC_iterate (operand_entry_t
, *ops
, i
, oe
)
606 && oe
->rank
>= curr
->rank
- 1;
611 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
613 fprintf (dump_file
, "Equivalence: ");
614 print_generic_expr (dump_file
, notop
, 0);
615 if (opcode
== BIT_AND_EXPR
)
616 fprintf (dump_file
, " & ~");
617 else if (opcode
== BIT_IOR_EXPR
)
618 fprintf (dump_file
, " | ~");
619 print_generic_expr (dump_file
, oe
->op
, 0);
620 if (opcode
== BIT_AND_EXPR
)
621 fprintf (dump_file
, " -> 0\n");
622 else if (opcode
== BIT_IOR_EXPR
)
623 fprintf (dump_file
, " -> -1\n");
626 if (opcode
== BIT_AND_EXPR
)
627 oe
->op
= fold_convert (TREE_TYPE (oe
->op
), integer_zero_node
);
628 else if (opcode
== BIT_IOR_EXPR
)
629 oe
->op
= build_low_bits_mask (TREE_TYPE (oe
->op
),
630 TYPE_PRECISION (TREE_TYPE (oe
->op
)));
632 reassociate_stats
.ops_eliminated
633 += VEC_length (operand_entry_t
, *ops
) - 1;
634 VEC_free (operand_entry_t
, heap
, *ops
);
636 VEC_safe_push (operand_entry_t
, heap
, *ops
, oe
);
644 /* Use constant value that may be present in OPS to try to eliminate
645 operands. Note that this function is only really used when we've
646 eliminated ops for other reasons, or merged constants. Across
647 single statements, fold already does all of this, plus more. There
648 is little point in duplicating logic, so I've only included the
649 identities that I could ever construct testcases to trigger. */
652 eliminate_using_constants (enum tree_code opcode
,
653 VEC(operand_entry_t
, heap
) **ops
)
655 operand_entry_t oelast
= VEC_last (operand_entry_t
, *ops
);
656 tree type
= TREE_TYPE (oelast
->op
);
658 if (oelast
->rank
== 0
659 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
664 if (integer_zerop (oelast
->op
))
666 if (VEC_length (operand_entry_t
, *ops
) != 1)
668 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
669 fprintf (dump_file
, "Found & 0, removing all other ops\n");
671 reassociate_stats
.ops_eliminated
672 += VEC_length (operand_entry_t
, *ops
) - 1;
674 VEC_free (operand_entry_t
, heap
, *ops
);
676 VEC_safe_push (operand_entry_t
, heap
, *ops
, oelast
);
680 else if (integer_all_onesp (oelast
->op
))
682 if (VEC_length (operand_entry_t
, *ops
) != 1)
684 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
685 fprintf (dump_file
, "Found & -1, removing\n");
686 VEC_pop (operand_entry_t
, *ops
);
687 reassociate_stats
.ops_eliminated
++;
692 if (integer_all_onesp (oelast
->op
))
694 if (VEC_length (operand_entry_t
, *ops
) != 1)
696 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
697 fprintf (dump_file
, "Found | -1, removing all other ops\n");
699 reassociate_stats
.ops_eliminated
700 += VEC_length (operand_entry_t
, *ops
) - 1;
702 VEC_free (operand_entry_t
, heap
, *ops
);
704 VEC_safe_push (operand_entry_t
, heap
, *ops
, oelast
);
708 else if (integer_zerop (oelast
->op
))
710 if (VEC_length (operand_entry_t
, *ops
) != 1)
712 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
713 fprintf (dump_file
, "Found | 0, removing\n");
714 VEC_pop (operand_entry_t
, *ops
);
715 reassociate_stats
.ops_eliminated
++;
720 if (integer_zerop (oelast
->op
)
721 || (FLOAT_TYPE_P (type
)
722 && !HONOR_NANS (TYPE_MODE (type
))
723 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type
))
724 && real_zerop (oelast
->op
)))
726 if (VEC_length (operand_entry_t
, *ops
) != 1)
728 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
729 fprintf (dump_file
, "Found * 0, removing all other ops\n");
731 reassociate_stats
.ops_eliminated
732 += VEC_length (operand_entry_t
, *ops
) - 1;
733 VEC_free (operand_entry_t
, heap
, *ops
);
735 VEC_safe_push (operand_entry_t
, heap
, *ops
, oelast
);
739 else if (integer_onep (oelast
->op
)
740 || (FLOAT_TYPE_P (type
)
741 && !HONOR_SNANS (TYPE_MODE (type
))
742 && real_onep (oelast
->op
)))
744 if (VEC_length (operand_entry_t
, *ops
) != 1)
746 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
747 fprintf (dump_file
, "Found * 1, removing\n");
748 VEC_pop (operand_entry_t
, *ops
);
749 reassociate_stats
.ops_eliminated
++;
757 if (integer_zerop (oelast
->op
)
758 || (FLOAT_TYPE_P (type
)
759 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
760 && fold_real_zero_addition_p (type
, oelast
->op
,
761 opcode
== MINUS_EXPR
)))
763 if (VEC_length (operand_entry_t
, *ops
) != 1)
765 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
766 fprintf (dump_file
, "Found [|^+] 0, removing\n");
767 VEC_pop (operand_entry_t
, *ops
);
768 reassociate_stats
.ops_eliminated
++;
780 static void linearize_expr_tree (VEC(operand_entry_t
, heap
) **, gimple
,
783 /* Structure for tracking and counting operands. */
784 typedef struct oecount_s
{
787 enum tree_code oecode
;
792 DEF_VEC_ALLOC_O(oecount
,heap
);
794 /* The heap for the oecount hashtable and the sorted list of operands. */
795 static VEC (oecount
, heap
) *cvec
;
797 /* Hash function for oecount. */
800 oecount_hash (const void *p
)
802 const oecount
*c
= VEC_index (oecount
, cvec
, (size_t)p
- 42);
803 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
806 /* Comparison function for oecount. */
809 oecount_eq (const void *p1
, const void *p2
)
811 const oecount
*c1
= VEC_index (oecount
, cvec
, (size_t)p1
- 42);
812 const oecount
*c2
= VEC_index (oecount
, cvec
, (size_t)p2
- 42);
813 return (c1
->oecode
== c2
->oecode
814 && c1
->op
== c2
->op
);
817 /* Comparison function for qsort sorting oecount elements by count. */
820 oecount_cmp (const void *p1
, const void *p2
)
822 const oecount
*c1
= (const oecount
*)p1
;
823 const oecount
*c2
= (const oecount
*)p2
;
824 if (c1
->cnt
!= c2
->cnt
)
825 return c1
->cnt
- c2
->cnt
;
827 /* If counts are identical, use unique IDs to stabilize qsort. */
828 return c1
->id
- c2
->id
;
831 /* Walks the linear chain with result *DEF searching for an operation
832 with operand OP and code OPCODE removing that from the chain. *DEF
833 is updated if there is only one operand but no operation left. */
836 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
838 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
842 tree name
= gimple_assign_rhs1 (stmt
);
844 /* If this is the operation we look for and one of the operands
845 is ours simply propagate the other operand into the stmts
847 if (gimple_assign_rhs_code (stmt
) == opcode
849 || gimple_assign_rhs2 (stmt
) == op
))
853 gimple_stmt_iterator gsi
;
855 name
= gimple_assign_rhs2 (stmt
);
856 gcc_assert (has_single_use (gimple_assign_lhs (stmt
)));
857 single_imm_use (gimple_assign_lhs (stmt
), &use
, &use_stmt
);
858 if (gimple_assign_lhs (stmt
) == *def
)
861 if (TREE_CODE (name
) != SSA_NAME
)
862 update_stmt (use_stmt
);
863 gsi
= gsi_for_stmt (stmt
);
864 gsi_remove (&gsi
, true);
869 /* Continue walking the chain. */
870 gcc_assert (name
!= op
871 && TREE_CODE (name
) == SSA_NAME
);
872 stmt
= SSA_NAME_DEF_STMT (name
);
877 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
878 the result. Places the statement after the definition of either
879 OP1 or OP2. Returns the new statement. */
882 build_and_add_sum (tree tmpvar
, tree op1
, tree op2
, enum tree_code opcode
)
884 gimple op1def
= NULL
, op2def
= NULL
;
885 gimple_stmt_iterator gsi
;
889 /* Create the addition statement. */
890 sum
= gimple_build_assign_with_ops (opcode
, tmpvar
, op1
, op2
);
891 op
= make_ssa_name (tmpvar
, sum
);
892 gimple_assign_set_lhs (sum
, op
);
894 /* Find an insertion place and insert. */
895 if (TREE_CODE (op1
) == SSA_NAME
)
896 op1def
= SSA_NAME_DEF_STMT (op1
);
897 if (TREE_CODE (op2
) == SSA_NAME
)
898 op2def
= SSA_NAME_DEF_STMT (op2
);
899 if ((!op1def
|| gimple_nop_p (op1def
))
900 && (!op2def
|| gimple_nop_p (op2def
)))
902 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR
));
903 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
905 else if ((!op1def
|| gimple_nop_p (op1def
))
906 || (op2def
&& !gimple_nop_p (op2def
)
907 && stmt_dominates_stmt_p (op1def
, op2def
)))
909 if (gimple_code (op2def
) == GIMPLE_PHI
)
911 gsi
= gsi_after_labels (gimple_bb (op2def
));
912 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
916 if (!stmt_ends_bb_p (op2def
))
918 gsi
= gsi_for_stmt (op2def
);
919 gsi_insert_after (&gsi
, sum
, GSI_NEW_STMT
);
926 FOR_EACH_EDGE (e
, ei
, gimple_bb (op2def
)->succs
)
927 if (e
->flags
& EDGE_FALLTHRU
)
928 gsi_insert_on_edge_immediate (e
, sum
);
934 if (gimple_code (op1def
) == GIMPLE_PHI
)
936 gsi
= gsi_after_labels (gimple_bb (op1def
));
937 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
941 if (!stmt_ends_bb_p (op1def
))
943 gsi
= gsi_for_stmt (op1def
);
944 gsi_insert_after (&gsi
, sum
, GSI_NEW_STMT
);
951 FOR_EACH_EDGE (e
, ei
, gimple_bb (op1def
)->succs
)
952 if (e
->flags
& EDGE_FALLTHRU
)
953 gsi_insert_on_edge_immediate (e
, sum
);
962 /* Perform un-distribution of divisions and multiplications.
963 A * X + B * X is transformed into (A + B) * X and A / X + B / X
964 to (A + B) / X for real X.
966 The algorithm is organized as follows.
968 - First we walk the addition chain *OPS looking for summands that
969 are defined by a multiplication or a real division. This results
970 in the candidates bitmap with relevant indices into *OPS.
972 - Second we build the chains of multiplications or divisions for
973 these candidates, counting the number of occurences of (operand, code)
974 pairs in all of the candidates chains.
976 - Third we sort the (operand, code) pairs by number of occurence and
977 process them starting with the pair with the most uses.
979 * For each such pair we walk the candidates again to build a
980 second candidate bitmap noting all multiplication/division chains
981 that have at least one occurence of (operand, code).
983 * We build an alternate addition chain only covering these
984 candidates with one (operand, code) operation removed from their
985 multiplication/division chain.
987 * The first candidate gets replaced by the alternate addition chain
988 multiplied/divided by the operand.
990 * All candidate chains get disabled for further processing and
991 processing of (operand, code) pairs continues.
993 The alternate addition chains built are re-processed by the main
994 reassociation algorithm which allows optimizing a * x * y + b * y * x
995 to (a + b ) * x * y in one invocation of the reassociation pass. */
998 undistribute_ops_list (enum tree_code opcode
,
999 VEC (operand_entry_t
, heap
) **ops
, struct loop
*loop
)
1001 unsigned int length
= VEC_length (operand_entry_t
, *ops
);
1002 operand_entry_t oe1
;
1004 sbitmap candidates
, candidates2
;
1005 unsigned nr_candidates
, nr_candidates2
;
1006 sbitmap_iterator sbi0
;
1007 VEC (operand_entry_t
, heap
) **subops
;
1009 bool changed
= false;
1010 int next_oecount_id
= 0;
1013 || opcode
!= PLUS_EXPR
)
1016 /* Build a list of candidates to process. */
1017 candidates
= sbitmap_alloc (length
);
1018 sbitmap_zero (candidates
);
1020 for (i
= 0; VEC_iterate (operand_entry_t
, *ops
, i
, oe1
); ++i
)
1022 enum tree_code dcode
;
1025 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1027 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1028 if (!is_gimple_assign (oe1def
))
1030 dcode
= gimple_assign_rhs_code (oe1def
);
1031 if ((dcode
!= MULT_EXPR
1032 && dcode
!= RDIV_EXPR
)
1033 || !is_reassociable_op (oe1def
, dcode
, loop
))
1036 SET_BIT (candidates
, i
);
1040 if (nr_candidates
< 2)
1042 sbitmap_free (candidates
);
1046 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1048 fprintf (dump_file
, "searching for un-distribute opportunities ");
1049 print_generic_expr (dump_file
,
1050 VEC_index (operand_entry_t
, *ops
,
1051 sbitmap_first_set_bit (candidates
))->op
, 0);
1052 fprintf (dump_file
, " %d\n", nr_candidates
);
1055 /* Build linearized sub-operand lists and the counting table. */
1057 ctable
= htab_create (15, oecount_hash
, oecount_eq
, NULL
);
1058 subops
= XCNEWVEC (VEC (operand_entry_t
, heap
) *,
1059 VEC_length (operand_entry_t
, *ops
));
1060 EXECUTE_IF_SET_IN_SBITMAP (candidates
, 0, i
, sbi0
)
1063 enum tree_code oecode
;
1066 oedef
= SSA_NAME_DEF_STMT (VEC_index (operand_entry_t
, *ops
, i
)->op
);
1067 oecode
= gimple_assign_rhs_code (oedef
);
1068 linearize_expr_tree (&subops
[i
], oedef
,
1069 associative_tree_code (oecode
), false);
1071 for (j
= 0; VEC_iterate (operand_entry_t
, subops
[i
], j
, oe1
); ++j
)
1078 c
.id
= next_oecount_id
++;
1080 VEC_safe_push (oecount
, heap
, cvec
, &c
);
1081 idx
= VEC_length (oecount
, cvec
) + 41;
1082 slot
= htab_find_slot (ctable
, (void *)idx
, INSERT
);
1085 *slot
= (void *)idx
;
1089 VEC_pop (oecount
, cvec
);
1090 VEC_index (oecount
, cvec
, (size_t)*slot
- 42)->cnt
++;
1094 htab_delete (ctable
);
1096 /* Sort the counting table. */
1097 qsort (VEC_address (oecount
, cvec
), VEC_length (oecount
, cvec
),
1098 sizeof (oecount
), oecount_cmp
);
1100 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1103 fprintf (dump_file
, "Candidates:\n");
1104 for (j
= 0; VEC_iterate (oecount
, cvec
, j
, c
); ++j
)
1106 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1107 c
->oecode
== MULT_EXPR
1108 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1109 print_generic_expr (dump_file
, c
->op
, 0);
1110 fprintf (dump_file
, "\n");
1114 /* Process the (operand, code) pairs in order of most occurence. */
1115 candidates2
= sbitmap_alloc (length
);
1116 while (!VEC_empty (oecount
, cvec
))
1118 oecount
*c
= VEC_last (oecount
, cvec
);
1122 /* Now collect the operands in the outer chain that contain
1123 the common operand in their inner chain. */
1124 sbitmap_zero (candidates2
);
1126 EXECUTE_IF_SET_IN_SBITMAP (candidates
, 0, i
, sbi0
)
1129 enum tree_code oecode
;
1131 tree op
= VEC_index (operand_entry_t
, *ops
, i
)->op
;
1133 /* If we undistributed in this chain already this may be
1135 if (TREE_CODE (op
) != SSA_NAME
)
1138 oedef
= SSA_NAME_DEF_STMT (op
);
1139 oecode
= gimple_assign_rhs_code (oedef
);
1140 if (oecode
!= c
->oecode
)
1143 for (j
= 0; VEC_iterate (operand_entry_t
, subops
[i
], j
, oe1
); ++j
)
1145 if (oe1
->op
== c
->op
)
1147 SET_BIT (candidates2
, i
);
1154 if (nr_candidates2
>= 2)
1156 operand_entry_t oe1
, oe2
;
1159 int first
= sbitmap_first_set_bit (candidates2
);
1161 /* Build the new addition chain. */
1162 oe1
= VEC_index (operand_entry_t
, *ops
, first
);
1163 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1165 fprintf (dump_file
, "Building (");
1166 print_generic_expr (dump_file
, oe1
->op
, 0);
1168 tmpvar
= create_tmp_var (TREE_TYPE (oe1
->op
), NULL
);
1169 add_referenced_var (tmpvar
);
1170 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1171 EXECUTE_IF_SET_IN_SBITMAP (candidates2
, first
+1, i
, sbi0
)
1174 oe2
= VEC_index (operand_entry_t
, *ops
, i
);
1175 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1177 fprintf (dump_file
, " + ");
1178 print_generic_expr (dump_file
, oe2
->op
, 0);
1180 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1181 sum
= build_and_add_sum (tmpvar
, oe1
->op
, oe2
->op
, opcode
);
1182 oe2
->op
= fold_convert (TREE_TYPE (oe2
->op
), integer_zero_node
);
1184 oe1
->op
= gimple_get_lhs (sum
);
1187 /* Apply the multiplication/division. */
1188 prod
= build_and_add_sum (tmpvar
, oe1
->op
, c
->op
, c
->oecode
);
1189 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1191 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1192 print_generic_expr (dump_file
, c
->op
, 0);
1193 fprintf (dump_file
, "\n");
1196 /* Record it in the addition chain and disable further
1197 undistribution with this op. */
1198 oe1
->op
= gimple_assign_lhs (prod
);
1199 oe1
->rank
= get_rank (oe1
->op
);
1200 VEC_free (operand_entry_t
, heap
, subops
[first
]);
1205 VEC_pop (oecount
, cvec
);
1208 for (i
= 0; i
< VEC_length (operand_entry_t
, *ops
); ++i
)
1209 VEC_free (operand_entry_t
, heap
, subops
[i
]);
1211 VEC_free (oecount
, heap
, cvec
);
1212 sbitmap_free (candidates
);
1213 sbitmap_free (candidates2
);
1219 /* Perform various identities and other optimizations on the list of
1220 operand entries, stored in OPS. The tree code for the binary
1221 operation between all the operands is OPCODE. */
1224 optimize_ops_list (enum tree_code opcode
,
1225 VEC (operand_entry_t
, heap
) **ops
)
1227 unsigned int length
= VEC_length (operand_entry_t
, *ops
);
1230 operand_entry_t oelast
= NULL
;
1231 bool iterate
= false;
1236 oelast
= VEC_last (operand_entry_t
, *ops
);
1238 /* If the last two are constants, pop the constants off, merge them
1239 and try the next two. */
1240 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1242 operand_entry_t oelm1
= VEC_index (operand_entry_t
, *ops
, length
- 2);
1244 if (oelm1
->rank
== 0
1245 && is_gimple_min_invariant (oelm1
->op
)
1246 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1247 TREE_TYPE (oelast
->op
)))
1249 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1250 oelm1
->op
, oelast
->op
);
1252 if (folded
&& is_gimple_min_invariant (folded
))
1254 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1255 fprintf (dump_file
, "Merging constants\n");
1257 VEC_pop (operand_entry_t
, *ops
);
1258 VEC_pop (operand_entry_t
, *ops
);
1260 add_to_ops_vec (ops
, folded
);
1261 reassociate_stats
.constants_eliminated
++;
1263 optimize_ops_list (opcode
, ops
);
1269 eliminate_using_constants (opcode
, ops
);
1272 for (i
= 0; VEC_iterate (operand_entry_t
, *ops
, i
, oe
);)
1276 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1278 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1279 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
)))
1291 length
= VEC_length (operand_entry_t
, *ops
);
1292 oelast
= VEC_last (operand_entry_t
, *ops
);
1295 optimize_ops_list (opcode
, ops
);
1298 /* Return true if OPERAND is defined by a PHI node which uses the LHS
1299 of STMT in it's operands. This is also known as a "destructive
1300 update" operation. */
1303 is_phi_for_stmt (gimple stmt
, tree operand
)
1307 use_operand_p arg_p
;
1310 if (TREE_CODE (operand
) != SSA_NAME
)
1313 lhs
= gimple_assign_lhs (stmt
);
1315 def_stmt
= SSA_NAME_DEF_STMT (operand
);
1316 if (gimple_code (def_stmt
) != GIMPLE_PHI
)
1319 FOR_EACH_PHI_ARG (arg_p
, def_stmt
, i
, SSA_OP_USE
)
1320 if (lhs
== USE_FROM_PTR (arg_p
))
1325 /* Remove def stmt of VAR if VAR has zero uses and recurse
1326 on rhs1 operand if so. */
1329 remove_visited_stmt_chain (tree var
)
1332 gimple_stmt_iterator gsi
;
1336 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
1338 stmt
= SSA_NAME_DEF_STMT (var
);
1339 if (!is_gimple_assign (stmt
)
1340 || !gimple_visited_p (stmt
))
1342 var
= gimple_assign_rhs1 (stmt
);
1343 gsi
= gsi_for_stmt (stmt
);
1344 gsi_remove (&gsi
, true);
1345 release_defs (stmt
);
1349 /* Recursively rewrite our linearized statements so that the operators
1350 match those in OPS[OPINDEX], putting the computation in rank
1354 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
1355 VEC(operand_entry_t
, heap
) * ops
, bool moved
)
1357 tree rhs1
= gimple_assign_rhs1 (stmt
);
1358 tree rhs2
= gimple_assign_rhs2 (stmt
);
1361 /* If we have three operands left, then we want to make sure the one
1362 that gets the double binary op are the ones with the same rank.
1364 The alternative we try is to see if this is a destructive
1365 update style statement, which is like:
1368 In that case, we want to use the destructive update form to
1369 expose the possible vectorizer sum reduction opportunity.
1370 In that case, the third operand will be the phi node.
1372 We could, of course, try to be better as noted above, and do a
1373 lot of work to try to find these opportunities in >3 operand
1374 cases, but it is unlikely to be worth it. */
1375 if (opindex
+ 3 == VEC_length (operand_entry_t
, ops
))
1377 operand_entry_t oe1
, oe2
, oe3
;
1379 oe1
= VEC_index (operand_entry_t
, ops
, opindex
);
1380 oe2
= VEC_index (operand_entry_t
, ops
, opindex
+ 1);
1381 oe3
= VEC_index (operand_entry_t
, ops
, opindex
+ 2);
1383 if ((oe1
->rank
== oe2
->rank
1384 && oe2
->rank
!= oe3
->rank
)
1385 || (is_phi_for_stmt (stmt
, oe3
->op
)
1386 && !is_phi_for_stmt (stmt
, oe1
->op
)
1387 && !is_phi_for_stmt (stmt
, oe2
->op
)))
1389 struct operand_entry temp
= *oe3
;
1391 oe3
->rank
= oe1
->rank
;
1393 oe1
->rank
= temp
.rank
;
1395 else if ((oe1
->rank
== oe3
->rank
1396 && oe2
->rank
!= oe3
->rank
)
1397 || (is_phi_for_stmt (stmt
, oe2
->op
)
1398 && !is_phi_for_stmt (stmt
, oe1
->op
)
1399 && !is_phi_for_stmt (stmt
, oe3
->op
)))
1401 struct operand_entry temp
= *oe2
;
1403 oe2
->rank
= oe1
->rank
;
1405 oe1
->rank
= temp
.rank
;
1409 /* The final recursion case for this function is that you have
1410 exactly two operations left.
1411 If we had one exactly one op in the entire list to start with, we
1412 would have never called this function, and the tail recursion
1413 rewrites them one at a time. */
1414 if (opindex
+ 2 == VEC_length (operand_entry_t
, ops
))
1416 operand_entry_t oe1
, oe2
;
1418 oe1
= VEC_index (operand_entry_t
, ops
, opindex
);
1419 oe2
= VEC_index (operand_entry_t
, ops
, opindex
+ 1);
1421 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
1423 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1425 fprintf (dump_file
, "Transforming ");
1426 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1429 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
1430 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
1432 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
1433 remove_visited_stmt_chain (rhs1
);
1435 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1437 fprintf (dump_file
, " into ");
1438 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1445 /* If we hit here, we should have 3 or more ops left. */
1446 gcc_assert (opindex
+ 2 < VEC_length (operand_entry_t
, ops
));
1448 /* Rewrite the next operator. */
1449 oe
= VEC_index (operand_entry_t
, ops
, opindex
);
1455 gimple_stmt_iterator gsinow
, gsirhs1
;
1456 gimple stmt1
= stmt
, stmt2
;
1459 gsinow
= gsi_for_stmt (stmt
);
1460 count
= VEC_length (operand_entry_t
, ops
) - opindex
- 2;
1461 while (count
-- != 0)
1463 stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1
));
1464 gsirhs1
= gsi_for_stmt (stmt2
);
1465 gsi_move_before (&gsirhs1
, &gsinow
);
1472 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1474 fprintf (dump_file
, "Transforming ");
1475 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1478 gimple_assign_set_rhs2 (stmt
, oe
->op
);
1481 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1483 fprintf (dump_file
, " into ");
1484 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1487 /* Recurse on the LHS of the binary operator, which is guaranteed to
1488 be the non-leaf side. */
1489 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
, moved
);
1492 /* Transform STMT, which is really (A +B) + (C + D) into the left
1493 linear form, ((A+B)+C)+D.
1494 Recurse on D if necessary. */
1497 linearize_expr (gimple stmt
)
1499 gimple_stmt_iterator gsinow
, gsirhs
;
1500 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
1501 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1502 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
1503 gimple newbinrhs
= NULL
;
1504 struct loop
*loop
= loop_containing_stmt (stmt
);
1506 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
1507 && is_reassociable_op (binrhs
, rhscode
, loop
));
1509 gsinow
= gsi_for_stmt (stmt
);
1510 gsirhs
= gsi_for_stmt (binrhs
);
1511 gsi_move_before (&gsirhs
, &gsinow
);
1513 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
1514 gimple_assign_set_rhs1 (binrhs
, gimple_assign_lhs (binlhs
));
1515 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
1517 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
1518 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1520 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1522 fprintf (dump_file
, "Linearized: ");
1523 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1526 reassociate_stats
.linearized
++;
1527 update_stmt (binrhs
);
1528 update_stmt (binlhs
);
1531 gimple_set_visited (stmt
, true);
1532 gimple_set_visited (binlhs
, true);
1533 gimple_set_visited (binrhs
, true);
1535 /* Tail recurse on the new rhs if it still needs reassociation. */
1536 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
1537 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
1538 want to change the algorithm while converting to tuples. */
1539 linearize_expr (stmt
);
1542 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
1543 it. Otherwise, return NULL. */
1546 get_single_immediate_use (tree lhs
)
1548 use_operand_p immuse
;
1551 if (TREE_CODE (lhs
) == SSA_NAME
1552 && single_imm_use (lhs
, &immuse
, &immusestmt
)
1553 && is_gimple_assign (immusestmt
))
1559 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
1560 representing the negated value. Insertions of any necessary
1561 instructions go before GSI.
1562 This function is recursive in that, if you hand it "a_5" as the
1563 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
1564 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
1567 negate_value (tree tonegate
, gimple_stmt_iterator
*gsi
)
1569 gimple negatedefstmt
= NULL
;
1570 tree resultofnegate
;
1572 /* If we are trying to negate a name, defined by an add, negate the
1573 add operands instead. */
1574 if (TREE_CODE (tonegate
) == SSA_NAME
)
1575 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
1576 if (TREE_CODE (tonegate
) == SSA_NAME
1577 && is_gimple_assign (negatedefstmt
)
1578 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
1579 && has_single_use (gimple_assign_lhs (negatedefstmt
))
1580 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
1582 gimple_stmt_iterator gsi
;
1583 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
1584 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
1586 gsi
= gsi_for_stmt (negatedefstmt
);
1587 rhs1
= negate_value (rhs1
, &gsi
);
1588 gimple_assign_set_rhs1 (negatedefstmt
, rhs1
);
1590 gsi
= gsi_for_stmt (negatedefstmt
);
1591 rhs2
= negate_value (rhs2
, &gsi
);
1592 gimple_assign_set_rhs2 (negatedefstmt
, rhs2
);
1594 update_stmt (negatedefstmt
);
1595 return gimple_assign_lhs (negatedefstmt
);
1598 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
1599 resultofnegate
= force_gimple_operand_gsi (gsi
, tonegate
, true,
1600 NULL_TREE
, true, GSI_SAME_STMT
);
1601 return resultofnegate
;
1604 /* Return true if we should break up the subtract in STMT into an add
1605 with negate. This is true when we the subtract operands are really
1606 adds, or the subtract itself is used in an add expression. In
1607 either case, breaking up the subtract into an add with negate
1608 exposes the adds to reassociation. */
1611 should_break_up_subtract (gimple stmt
)
1613 tree lhs
= gimple_assign_lhs (stmt
);
1614 tree binlhs
= gimple_assign_rhs1 (stmt
);
1615 tree binrhs
= gimple_assign_rhs2 (stmt
);
1617 struct loop
*loop
= loop_containing_stmt (stmt
);
1619 if (TREE_CODE (binlhs
) == SSA_NAME
1620 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
1623 if (TREE_CODE (binrhs
) == SSA_NAME
1624 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
1627 if (TREE_CODE (lhs
) == SSA_NAME
1628 && (immusestmt
= get_single_immediate_use (lhs
))
1629 && is_gimple_assign (immusestmt
)
1630 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
1631 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
1636 /* Transform STMT from A - B into A + -B. */
1639 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
1641 tree rhs1
= gimple_assign_rhs1 (stmt
);
1642 tree rhs2
= gimple_assign_rhs2 (stmt
);
1644 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1646 fprintf (dump_file
, "Breaking up subtract ");
1647 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1650 rhs2
= negate_value (rhs2
, gsip
);
1651 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
1655 /* Recursively linearize a binary expression that is the RHS of STMT.
1656 Place the operands of the expression tree in the vector named OPS. */
1659 linearize_expr_tree (VEC(operand_entry_t
, heap
) **ops
, gimple stmt
,
1660 bool is_associative
, bool set_visited
)
1662 tree binlhs
= gimple_assign_rhs1 (stmt
);
1663 tree binrhs
= gimple_assign_rhs2 (stmt
);
1664 gimple binlhsdef
, binrhsdef
;
1665 bool binlhsisreassoc
= false;
1666 bool binrhsisreassoc
= false;
1667 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
1668 struct loop
*loop
= loop_containing_stmt (stmt
);
1671 gimple_set_visited (stmt
, true);
1673 if (TREE_CODE (binlhs
) == SSA_NAME
)
1675 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
1676 binlhsisreassoc
= is_reassociable_op (binlhsdef
, rhscode
, loop
);
1679 if (TREE_CODE (binrhs
) == SSA_NAME
)
1681 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
1682 binrhsisreassoc
= is_reassociable_op (binrhsdef
, rhscode
, loop
);
1685 /* If the LHS is not reassociable, but the RHS is, we need to swap
1686 them. If neither is reassociable, there is nothing we can do, so
1687 just put them in the ops vector. If the LHS is reassociable,
1688 linearize it. If both are reassociable, then linearize the RHS
1691 if (!binlhsisreassoc
)
1695 /* If this is not a associative operation like division, give up. */
1696 if (!is_associative
)
1698 add_to_ops_vec (ops
, binrhs
);
1702 if (!binrhsisreassoc
)
1704 add_to_ops_vec (ops
, binrhs
);
1705 add_to_ops_vec (ops
, binlhs
);
1709 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1711 fprintf (dump_file
, "swapping operands of ");
1712 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1715 swap_tree_operands (stmt
,
1716 gimple_assign_rhs1_ptr (stmt
),
1717 gimple_assign_rhs2_ptr (stmt
));
1720 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1722 fprintf (dump_file
, " is now ");
1723 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1726 /* We want to make it so the lhs is always the reassociative op,
1732 else if (binrhsisreassoc
)
1734 linearize_expr (stmt
);
1735 binlhs
= gimple_assign_rhs1 (stmt
);
1736 binrhs
= gimple_assign_rhs2 (stmt
);
1739 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
1740 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
1742 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
1743 is_associative
, set_visited
);
1744 add_to_ops_vec (ops
, binrhs
);
1747 /* Repropagate the negates back into subtracts, since no other pass
1748 currently does it. */
1751 repropagate_negates (void)
1756 for (i
= 0; VEC_iterate (tree
, plus_negates
, i
, negate
); i
++)
1758 gimple user
= get_single_immediate_use (negate
);
1760 if (!user
|| !is_gimple_assign (user
))
1763 /* The negate operand can be either operand of a PLUS_EXPR
1764 (it can be the LHS if the RHS is a constant for example).
1766 Force the negate operand to the RHS of the PLUS_EXPR, then
1767 transform the PLUS_EXPR into a MINUS_EXPR. */
1768 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
1770 /* If the negated operand appears on the LHS of the
1771 PLUS_EXPR, exchange the operands of the PLUS_EXPR
1772 to force the negated operand to the RHS of the PLUS_EXPR. */
1773 if (gimple_assign_rhs1 (user
) == negate
)
1775 swap_tree_operands (user
,
1776 gimple_assign_rhs1_ptr (user
),
1777 gimple_assign_rhs2_ptr (user
));
1780 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
1781 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
1782 if (gimple_assign_rhs2 (user
) == negate
)
1784 tree rhs1
= gimple_assign_rhs1 (user
);
1785 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
1786 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
1787 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
1791 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
1793 if (gimple_assign_rhs1 (user
) == negate
)
1798 which we transform into
1801 This pushes down the negate which we possibly can merge
1802 into some other operation, hence insert it into the
1803 plus_negates vector. */
1804 gimple feed
= SSA_NAME_DEF_STMT (negate
);
1805 tree a
= gimple_assign_rhs1 (feed
);
1806 tree rhs2
= gimple_assign_rhs2 (user
);
1807 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
), gsi2
;
1808 gimple_replace_lhs (feed
, negate
);
1809 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, a
, rhs2
);
1810 update_stmt (gsi_stmt (gsi
));
1811 gsi2
= gsi_for_stmt (user
);
1812 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, negate
, NULL
);
1813 update_stmt (gsi_stmt (gsi2
));
1814 gsi_move_before (&gsi
, &gsi2
);
1815 VEC_safe_push (tree
, heap
, plus_negates
,
1816 gimple_assign_lhs (gsi_stmt (gsi2
)));
1820 /* Transform "x = -a; y = b - x" into "y = b + a", getting
1821 rid of one operation. */
1822 gimple feed
= SSA_NAME_DEF_STMT (negate
);
1823 tree a
= gimple_assign_rhs1 (feed
);
1824 tree rhs1
= gimple_assign_rhs1 (user
);
1825 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
1826 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, a
);
1827 update_stmt (gsi_stmt (gsi
));
1833 /* Returns true if OP is of a type for which we can do reassociation.
1834 That is for integral or non-saturating fixed-point types, and for
1835 floating point type when associative-math is enabled. */
1838 can_reassociate_p (tree op
)
1840 tree type
= TREE_TYPE (op
);
1841 if (INTEGRAL_TYPE_P (type
)
1842 || NON_SAT_FIXED_POINT_TYPE_P (type
)
1843 || (flag_associative_math
&& SCALAR_FLOAT_TYPE_P (type
)))
1848 /* Break up subtract operations in block BB.
1850 We do this top down because we don't know whether the subtract is
1851 part of a possible chain of reassociation except at the top.
1860 we want to break up k = t - q, but we won't until we've transformed q
1861 = b - r, which won't be broken up until we transform b = c - d.
1863 En passant, clear the GIMPLE visited flag on every statement. */
1866 break_up_subtract_bb (basic_block bb
)
1868 gimple_stmt_iterator gsi
;
1871 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1873 gimple stmt
= gsi_stmt (gsi
);
1874 gimple_set_visited (stmt
, false);
1876 if (!is_gimple_assign (stmt
)
1877 || !can_reassociate_p (gimple_assign_lhs (stmt
)))
1880 /* Look for simple gimple subtract operations. */
1881 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
1883 if (!can_reassociate_p (gimple_assign_rhs1 (stmt
))
1884 || !can_reassociate_p (gimple_assign_rhs2 (stmt
)))
1887 /* Check for a subtract used only in an addition. If this
1888 is the case, transform it into add of a negate for better
1889 reassociation. IE transform C = A-B into C = A + -B if C
1890 is only used in an addition. */
1891 if (should_break_up_subtract (stmt
))
1892 break_up_subtract (stmt
, &gsi
);
1894 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
1895 && can_reassociate_p (gimple_assign_rhs1 (stmt
)))
1896 VEC_safe_push (tree
, heap
, plus_negates
, gimple_assign_lhs (stmt
));
1898 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
1900 son
= next_dom_son (CDI_DOMINATORS
, son
))
1901 break_up_subtract_bb (son
);
1904 /* Reassociate expressions in basic block BB and its post-dominator as
1908 reassociate_bb (basic_block bb
)
1910 gimple_stmt_iterator gsi
;
1913 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
1915 gimple stmt
= gsi_stmt (gsi
);
1917 if (is_gimple_assign (stmt
))
1919 tree lhs
, rhs1
, rhs2
;
1920 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
1922 /* If this is not a gimple binary expression, there is
1923 nothing for us to do with it. */
1924 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
1927 /* If this was part of an already processed statement,
1928 we don't need to touch it again. */
1929 if (gimple_visited_p (stmt
))
1931 /* This statement might have become dead because of previous
1933 if (has_zero_uses (gimple_get_lhs (stmt
)))
1935 gsi_remove (&gsi
, true);
1936 release_defs (stmt
);
1937 /* We might end up removing the last stmt above which
1938 places the iterator to the end of the sequence.
1939 Reset it to the last stmt in this case which might
1940 be the end of the sequence as well if we removed
1941 the last statement of the sequence. In which case
1942 we need to bail out. */
1943 if (gsi_end_p (gsi
))
1945 gsi
= gsi_last_bb (bb
);
1946 if (gsi_end_p (gsi
))
1953 lhs
= gimple_assign_lhs (stmt
);
1954 rhs1
= gimple_assign_rhs1 (stmt
);
1955 rhs2
= gimple_assign_rhs2 (stmt
);
1957 if (!can_reassociate_p (lhs
)
1958 || !can_reassociate_p (rhs1
)
1959 || !can_reassociate_p (rhs2
))
1962 if (associative_tree_code (rhs_code
))
1964 VEC(operand_entry_t
, heap
) *ops
= NULL
;
1966 /* There may be no immediate uses left by the time we
1967 get here because we may have eliminated them all. */
1968 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
1971 gimple_set_visited (stmt
, true);
1972 linearize_expr_tree (&ops
, stmt
, true, true);
1973 qsort (VEC_address (operand_entry_t
, ops
),
1974 VEC_length (operand_entry_t
, ops
),
1975 sizeof (operand_entry_t
),
1976 sort_by_operand_rank
);
1977 optimize_ops_list (rhs_code
, &ops
);
1978 if (undistribute_ops_list (rhs_code
, &ops
,
1979 loop_containing_stmt (stmt
)))
1981 qsort (VEC_address (operand_entry_t
, ops
),
1982 VEC_length (operand_entry_t
, ops
),
1983 sizeof (operand_entry_t
),
1984 sort_by_operand_rank
);
1985 optimize_ops_list (rhs_code
, &ops
);
1988 if (VEC_length (operand_entry_t
, ops
) == 1)
1990 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1992 fprintf (dump_file
, "Transforming ");
1993 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1996 rhs1
= gimple_assign_rhs1 (stmt
);
1997 gimple_assign_set_rhs_from_tree (&gsi
,
1998 VEC_last (operand_entry_t
,
2001 remove_visited_stmt_chain (rhs1
);
2003 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2005 fprintf (dump_file
, " into ");
2006 print_gimple_stmt (dump_file
, stmt
, 0, 0);
2010 rewrite_expr_tree (stmt
, 0, ops
, false);
2012 VEC_free (operand_entry_t
, heap
, ops
);
2016 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
2018 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
2019 reassociate_bb (son
);
2022 void dump_ops_vector (FILE *file
, VEC (operand_entry_t
, heap
) *ops
);
2023 void debug_ops_vector (VEC (operand_entry_t
, heap
) *ops
);
2025 /* Dump the operand entry vector OPS to FILE. */
2028 dump_ops_vector (FILE *file
, VEC (operand_entry_t
, heap
) *ops
)
2033 for (i
= 0; VEC_iterate (operand_entry_t
, ops
, i
, oe
); i
++)
2035 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
2036 print_generic_expr (file
, oe
->op
, 0);
2040 /* Dump the operand entry vector OPS to STDERR. */
2043 debug_ops_vector (VEC (operand_entry_t
, heap
) *ops
)
2045 dump_ops_vector (stderr
, ops
);
2051 break_up_subtract_bb (ENTRY_BLOCK_PTR
);
2052 reassociate_bb (EXIT_BLOCK_PTR
);
2055 /* Initialize the reassociation pass. */
2063 int *bbs
= XNEWVEC (int, last_basic_block
+ 1);
2065 /* Find the loops, so that we can prevent moving calculations in
2067 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
2069 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
2071 operand_entry_pool
= create_alloc_pool ("operand entry pool",
2072 sizeof (struct operand_entry
), 30);
2073 next_operand_entry_id
= 0;
2075 /* Reverse RPO (Reverse Post Order) will give us something where
2076 deeper loops come later. */
2077 pre_and_rev_post_order_compute (NULL
, bbs
, false);
2078 bb_rank
= XCNEWVEC (long, last_basic_block
+ 1);
2079 operand_rank
= pointer_map_create ();
2081 /* Give each argument a distinct rank. */
2082 for (param
= DECL_ARGUMENTS (current_function_decl
);
2084 param
= TREE_CHAIN (param
))
2086 if (gimple_default_def (cfun
, param
) != NULL
)
2088 tree def
= gimple_default_def (cfun
, param
);
2089 insert_operand_rank (def
, ++rank
);
2093 /* Give the chain decl a distinct rank. */
2094 if (cfun
->static_chain_decl
!= NULL
)
2096 tree def
= gimple_default_def (cfun
, cfun
->static_chain_decl
);
2098 insert_operand_rank (def
, ++rank
);
2101 /* Set up rank for each BB */
2102 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
2103 bb_rank
[bbs
[i
]] = ++rank
<< 16;
2106 calculate_dominance_info (CDI_POST_DOMINATORS
);
2107 plus_negates
= NULL
;
2110 /* Cleanup after the reassociation pass, and print stats if
2116 statistics_counter_event (cfun
, "Linearized",
2117 reassociate_stats
.linearized
);
2118 statistics_counter_event (cfun
, "Constants eliminated",
2119 reassociate_stats
.constants_eliminated
);
2120 statistics_counter_event (cfun
, "Ops eliminated",
2121 reassociate_stats
.ops_eliminated
);
2122 statistics_counter_event (cfun
, "Statements rewritten",
2123 reassociate_stats
.rewritten
);
2125 pointer_map_destroy (operand_rank
);
2126 free_alloc_pool (operand_entry_pool
);
2128 VEC_free (tree
, heap
, plus_negates
);
2129 free_dominance_info (CDI_POST_DOMINATORS
);
2130 loop_optimizer_finalize ();
2133 /* Gate and execute functions for Reassociation. */
2136 execute_reassoc (void)
2141 repropagate_negates ();
2148 gate_tree_ssa_reassoc (void)
2150 return flag_tree_reassoc
!= 0;
2153 struct gimple_opt_pass pass_reassoc
=
2157 "reassoc", /* name */
2158 gate_tree_ssa_reassoc
, /* gate */
2159 execute_reassoc
, /* execute */
2162 0, /* static_pass_number */
2163 TV_TREE_REASSOC
, /* tv_id */
2164 PROP_cfg
| PROP_ssa
, /* properties_required */
2165 0, /* properties_provided */
2166 0, /* properties_destroyed */
2167 0, /* todo_flags_start */
2168 TODO_dump_func
| TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */