1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
27 #include "basic-block.h"
28 #include "diagnostic.h"
29 #include "tree-inline.h"
30 #include "tree-flow.h"
32 #include "tree-dump.h"
34 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "alloc-pool.h"
39 #include "langhooks.h"
40 #include "pointer-set.h"
44 /* This is a simple global reassociation pass. It is, in part, based
45 on the LLVM pass of the same name (They do some things more/less
46 than we do, in different orders, etc).
48 It consists of five steps:
50 1. Breaking up subtract operations into addition + negate, where
51 it would promote the reassociation of adds.
53 2. Left linearization of the expression trees, so that (A+B)+(C+D)
54 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
55 During linearization, we place the operands of the binary
56 expressions into a vector of operand_entry_t
58 3. Optimization of the operand lists, eliminating things like a +
61 4. Rewrite the expression trees we linearized and optimized so
62 they are in proper rank order.
64 5. Repropagate negates, as nothing else will clean it up ATM.
66 A bit of theory on #4, since nobody seems to write anything down
67 about why it makes sense to do it the way they do it:
69 We could do this much nicer theoretically, but don't (for reasons
70 explained after how to do it theoretically nice :P).
72 In order to promote the most redundancy elimination, you want
73 binary expressions whose operands are the same rank (or
74 preferably, the same value) exposed to the redundancy eliminator,
75 for possible elimination.
77 So the way to do this if we really cared, is to build the new op
78 tree from the leaves to the roots, merging as you go, and putting the
79 new op on the end of the worklist, until you are left with one
80 thing on the worklist.
82 IE if you have to rewrite the following set of operands (listed with
83 rank in parentheses), with opcode PLUS_EXPR:
85 a (1), b (1), c (1), d (2), e (2)
88 We start with our merge worklist empty, and the ops list with all of
91 You want to first merge all leaves of the same rank, as much as
94 So first build a binary op of
96 mergetmp = a + b, and put "mergetmp" on the merge worklist.
98 Because there is no three operand form of PLUS_EXPR, c is not going to
99 be exposed to redundancy elimination as a rank 1 operand.
101 So you might as well throw it on the merge worklist (you could also
102 consider it to now be a rank two operand, and merge it with d and e,
103 but in this case, you then have evicted e from a binary op. So at
104 least in this situation, you can't win.)
106 Then build a binary op of d + e
109 and put mergetmp2 on the merge worklist.
111 so merge worklist = {mergetmp, c, mergetmp2}
113 Continue building binary ops of these operations until you have only
114 one operation left on the worklist.
119 mergetmp3 = mergetmp + c
121 worklist = {mergetmp2, mergetmp3}
123 mergetmp4 = mergetmp2 + mergetmp3
125 worklist = {mergetmp4}
127 because we have one operation left, we can now just set the original
128 statement equal to the result of that operation.
130 This will at least expose a + b and d + e to redundancy elimination
131 as binary operations.
133 For extra points, you can reuse the old statements to build the
134 mergetmps, since you shouldn't run out.
136 So why don't we do this?
138 Because it's expensive, and rarely will help. Most trees we are
139 reassociating have 3 or less ops. If they have 2 ops, they already
140 will be written into a nice single binary op. If you have 3 ops, a
141 single simple check suffices to tell you whether the first two are of the
142 same rank. If so, you know to order it
145 newstmt = mergetmp + op3
149 newstmt = mergetmp + op1
151 If all three are of the same rank, you can't expose them all in a
152 single binary operator anyway, so the above is *still* the best you
155 Thus, this is what we do. When we have three ops left, we check to see
156 what order to put them in, and call it a day. As a nod to vector sum
157 reduction, we check if any of the ops are really a phi node that is a
158 destructive update for the associating op, and keep the destructive
159 update together for vector sum reduction recognition. */
166 int constants_eliminated
;
171 /* Operator, rank pair. */
172 typedef struct operand_entry
178 static alloc_pool operand_entry_pool
;
181 /* Starting rank number for a given basic block, so that we can rank
182 operations using unmovable instructions in that BB based on the bb
184 static long *bb_rank
;
186 /* Operand->rank hashtable. */
187 static struct pointer_map_t
*operand_rank
;
190 /* Look up the operand rank structure for expression E. */
193 find_operand_rank (tree e
)
195 void **slot
= pointer_map_contains (operand_rank
, e
);
196 return slot
? (long) (intptr_t) *slot
: -1;
199 /* Insert {E,RANK} into the operand rank hashtable. */
202 insert_operand_rank (tree e
, long rank
)
205 gcc_assert (rank
> 0);
206 slot
= pointer_map_insert (operand_rank
, e
);
208 *slot
= (void *) (intptr_t) rank
;
211 /* Given an expression E, return the rank of the expression. */
216 /* Constants have rank 0. */
217 if (is_gimple_min_invariant (e
))
220 /* SSA_NAME's have the rank of the expression they are the result
222 For globals and uninitialized values, the rank is 0.
223 For function arguments, use the pre-setup rank.
224 For PHI nodes, stores, asm statements, etc, we use the rank of
226 For simple operations, the rank is the maximum rank of any of
227 its operands, or the bb_rank, whichever is less.
228 I make no claims that this is optimal, however, it gives good
231 if (TREE_CODE (e
) == SSA_NAME
)
237 if (TREE_CODE (SSA_NAME_VAR (e
)) == PARM_DECL
238 && SSA_NAME_IS_DEFAULT_DEF (e
))
239 return find_operand_rank (e
);
241 stmt
= SSA_NAME_DEF_STMT (e
);
242 if (gimple_bb (stmt
) == NULL
)
245 if (!is_gimple_assign (stmt
)
246 || gimple_vdef (stmt
))
247 return bb_rank
[gimple_bb (stmt
)->index
];
249 /* If we already have a rank for this expression, use that. */
250 rank
= find_operand_rank (e
);
254 /* Otherwise, find the maximum rank for the operands, or the bb
255 rank, whichever is less. */
257 maxrank
= bb_rank
[gimple_bb(stmt
)->index
];
258 if (gimple_assign_single_p (stmt
))
260 tree rhs
= gimple_assign_rhs1 (stmt
);
261 n
= TREE_OPERAND_LENGTH (rhs
);
263 rank
= MAX (rank
, get_rank (rhs
));
267 i
< n
&& TREE_OPERAND (rhs
, i
) && rank
!= maxrank
; i
++)
268 rank
= MAX(rank
, get_rank (TREE_OPERAND (rhs
, i
)));
273 n
= gimple_num_ops (stmt
);
274 for (i
= 1; i
< n
&& rank
!= maxrank
; i
++)
276 gcc_assert (gimple_op (stmt
, i
));
277 rank
= MAX(rank
, get_rank (gimple_op (stmt
, i
)));
281 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
283 fprintf (dump_file
, "Rank for ");
284 print_generic_expr (dump_file
, e
, 0);
285 fprintf (dump_file
, " is %ld\n", (rank
+ 1));
288 /* Note the rank in the hashtable so we don't recompute it. */
289 insert_operand_rank (e
, (rank
+ 1));
293 /* Globals, etc, are rank 0 */
297 DEF_VEC_P(operand_entry_t
);
298 DEF_VEC_ALLOC_P(operand_entry_t
, heap
);
300 /* We want integer ones to end up last no matter what, since they are
301 the ones we can do the most with. */
302 #define INTEGER_CONST_TYPE 1 << 3
303 #define FLOAT_CONST_TYPE 1 << 2
304 #define OTHER_CONST_TYPE 1 << 1
306 /* Classify an invariant tree into integer, float, or other, so that
307 we can sort them to be near other constants of the same type. */
309 constant_type (tree t
)
311 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
312 return INTEGER_CONST_TYPE
;
313 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
314 return FLOAT_CONST_TYPE
;
316 return OTHER_CONST_TYPE
;
319 /* qsort comparison function to sort operand entries PA and PB by rank
320 so that the sorted array is ordered by rank in decreasing order. */
322 sort_by_operand_rank (const void *pa
, const void *pb
)
324 const operand_entry_t oea
= *(const operand_entry_t
*)pa
;
325 const operand_entry_t oeb
= *(const operand_entry_t
*)pb
;
327 /* It's nicer for optimize_expression if constants that are likely
328 to fold when added/multiplied//whatever are put next to each
329 other. Since all constants have rank 0, order them by type. */
330 if (oeb
->rank
== 0 && oea
->rank
== 0)
331 return constant_type (oeb
->op
) - constant_type (oea
->op
);
333 /* Lastly, make sure the versions that are the same go next to each
334 other. We use SSA_NAME_VERSION because it's stable. */
335 if ((oeb
->rank
- oea
->rank
== 0)
336 && TREE_CODE (oea
->op
) == SSA_NAME
337 && TREE_CODE (oeb
->op
) == SSA_NAME
)
338 return SSA_NAME_VERSION (oeb
->op
) - SSA_NAME_VERSION (oea
->op
);
340 return oeb
->rank
- oea
->rank
;
343 /* Add an operand entry to *OPS for the tree operand OP. */
346 add_to_ops_vec (VEC(operand_entry_t
, heap
) **ops
, tree op
)
348 operand_entry_t oe
= (operand_entry_t
) pool_alloc (operand_entry_pool
);
351 oe
->rank
= get_rank (op
);
352 VEC_safe_push (operand_entry_t
, heap
, *ops
, oe
);
355 /* Return true if STMT is reassociable operation containing a binary
356 operation with tree code CODE, and is inside LOOP. */
359 is_reassociable_op (gimple stmt
, enum tree_code code
, struct loop
*loop
)
361 basic_block bb
= gimple_bb (stmt
);
363 if (gimple_bb (stmt
) == NULL
)
366 if (!flow_bb_inside_loop_p (loop
, bb
))
369 if (is_gimple_assign (stmt
)
370 && gimple_assign_rhs_code (stmt
) == code
371 && has_single_use (gimple_assign_lhs (stmt
)))
378 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
379 operand of the negate operation. Otherwise, return NULL. */
382 get_unary_op (tree name
, enum tree_code opcode
)
384 gimple stmt
= SSA_NAME_DEF_STMT (name
);
386 if (!is_gimple_assign (stmt
))
389 if (gimple_assign_rhs_code (stmt
) == opcode
)
390 return gimple_assign_rhs1 (stmt
);
394 /* If CURR and LAST are a pair of ops that OPCODE allows us to
395 eliminate through equivalences, do so, remove them from OPS, and
396 return true. Otherwise, return false. */
399 eliminate_duplicate_pair (enum tree_code opcode
,
400 VEC (operand_entry_t
, heap
) **ops
,
403 operand_entry_t curr
,
404 operand_entry_t last
)
407 /* If we have two of the same op, and the opcode is & |, min, or max,
408 we can eliminate one of them.
409 If we have two of the same op, and the opcode is ^, we can
410 eliminate both of them. */
412 if (last
&& last
->op
== curr
->op
)
420 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
422 fprintf (dump_file
, "Equivalence: ");
423 print_generic_expr (dump_file
, curr
->op
, 0);
424 fprintf (dump_file
, " [&|minmax] ");
425 print_generic_expr (dump_file
, last
->op
, 0);
426 fprintf (dump_file
, " -> ");
427 print_generic_stmt (dump_file
, last
->op
, 0);
430 VEC_ordered_remove (operand_entry_t
, *ops
, i
);
431 reassociate_stats
.ops_eliminated
++;
436 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
438 fprintf (dump_file
, "Equivalence: ");
439 print_generic_expr (dump_file
, curr
->op
, 0);
440 fprintf (dump_file
, " ^ ");
441 print_generic_expr (dump_file
, last
->op
, 0);
442 fprintf (dump_file
, " -> nothing\n");
445 reassociate_stats
.ops_eliminated
+= 2;
447 if (VEC_length (operand_entry_t
, *ops
) == 2)
449 VEC_free (operand_entry_t
, heap
, *ops
);
451 add_to_ops_vec (ops
, fold_convert (TREE_TYPE (last
->op
),
457 VEC_ordered_remove (operand_entry_t
, *ops
, i
-1);
458 VEC_ordered_remove (operand_entry_t
, *ops
, i
-1);
470 /* If OPCODE is PLUS_EXPR, CURR->OP is really a negate expression,
471 look in OPS for a corresponding positive operation to cancel it
472 out. If we find one, remove the other from OPS, replace
473 OPS[CURRINDEX] with 0, and return true. Otherwise, return
477 eliminate_plus_minus_pair (enum tree_code opcode
,
478 VEC (operand_entry_t
, heap
) **ops
,
479 unsigned int currindex
,
480 operand_entry_t curr
)
486 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
489 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
490 if (negateop
== NULL_TREE
)
493 /* Any non-negated version will have a rank that is one less than
494 the current rank. So once we hit those ranks, if we don't find
497 for (i
= currindex
+ 1;
498 VEC_iterate (operand_entry_t
, *ops
, i
, oe
)
499 && oe
->rank
>= curr
->rank
- 1 ;
502 if (oe
->op
== negateop
)
505 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
507 fprintf (dump_file
, "Equivalence: ");
508 print_generic_expr (dump_file
, negateop
, 0);
509 fprintf (dump_file
, " + -");
510 print_generic_expr (dump_file
, oe
->op
, 0);
511 fprintf (dump_file
, " -> 0\n");
514 VEC_ordered_remove (operand_entry_t
, *ops
, i
);
515 add_to_ops_vec (ops
, fold_convert(TREE_TYPE (oe
->op
),
517 VEC_ordered_remove (operand_entry_t
, *ops
, currindex
);
518 reassociate_stats
.ops_eliminated
++;
527 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
528 bitwise not expression, look in OPS for a corresponding operand to
529 cancel it out. If we find one, remove the other from OPS, replace
530 OPS[CURRINDEX] with 0, and return true. Otherwise, return
534 eliminate_not_pairs (enum tree_code opcode
,
535 VEC (operand_entry_t
, heap
) **ops
,
536 unsigned int currindex
,
537 operand_entry_t curr
)
543 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
544 || TREE_CODE (curr
->op
) != SSA_NAME
)
547 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
548 if (notop
== NULL_TREE
)
551 /* Any non-not version will have a rank that is one less than
552 the current rank. So once we hit those ranks, if we don't find
555 for (i
= currindex
+ 1;
556 VEC_iterate (operand_entry_t
, *ops
, i
, oe
)
557 && oe
->rank
>= curr
->rank
- 1;
562 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
564 fprintf (dump_file
, "Equivalence: ");
565 print_generic_expr (dump_file
, notop
, 0);
566 if (opcode
== BIT_AND_EXPR
)
567 fprintf (dump_file
, " & ~");
568 else if (opcode
== BIT_IOR_EXPR
)
569 fprintf (dump_file
, " | ~");
570 print_generic_expr (dump_file
, oe
->op
, 0);
571 if (opcode
== BIT_AND_EXPR
)
572 fprintf (dump_file
, " -> 0\n");
573 else if (opcode
== BIT_IOR_EXPR
)
574 fprintf (dump_file
, " -> -1\n");
577 if (opcode
== BIT_AND_EXPR
)
578 oe
->op
= fold_convert (TREE_TYPE (oe
->op
), integer_zero_node
);
579 else if (opcode
== BIT_IOR_EXPR
)
580 oe
->op
= build_low_bits_mask (TREE_TYPE (oe
->op
),
581 TYPE_PRECISION (TREE_TYPE (oe
->op
)));
583 reassociate_stats
.ops_eliminated
584 += VEC_length (operand_entry_t
, *ops
) - 1;
585 VEC_free (operand_entry_t
, heap
, *ops
);
587 VEC_safe_push (operand_entry_t
, heap
, *ops
, oe
);
595 /* Use constant value that may be present in OPS to try to eliminate
596 operands. Note that this function is only really used when we've
597 eliminated ops for other reasons, or merged constants. Across
598 single statements, fold already does all of this, plus more. There
599 is little point in duplicating logic, so I've only included the
600 identities that I could ever construct testcases to trigger. */
603 eliminate_using_constants (enum tree_code opcode
,
604 VEC(operand_entry_t
, heap
) **ops
)
606 operand_entry_t oelast
= VEC_last (operand_entry_t
, *ops
);
607 tree type
= TREE_TYPE (oelast
->op
);
609 if (oelast
->rank
== 0
610 && (INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
615 if (integer_zerop (oelast
->op
))
617 if (VEC_length (operand_entry_t
, *ops
) != 1)
619 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
620 fprintf (dump_file
, "Found & 0, removing all other ops\n");
622 reassociate_stats
.ops_eliminated
623 += VEC_length (operand_entry_t
, *ops
) - 1;
625 VEC_free (operand_entry_t
, heap
, *ops
);
627 VEC_safe_push (operand_entry_t
, heap
, *ops
, oelast
);
631 else if (integer_all_onesp (oelast
->op
))
633 if (VEC_length (operand_entry_t
, *ops
) != 1)
635 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
636 fprintf (dump_file
, "Found & -1, removing\n");
637 VEC_pop (operand_entry_t
, *ops
);
638 reassociate_stats
.ops_eliminated
++;
643 if (integer_all_onesp (oelast
->op
))
645 if (VEC_length (operand_entry_t
, *ops
) != 1)
647 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
648 fprintf (dump_file
, "Found | -1, removing all other ops\n");
650 reassociate_stats
.ops_eliminated
651 += VEC_length (operand_entry_t
, *ops
) - 1;
653 VEC_free (operand_entry_t
, heap
, *ops
);
655 VEC_safe_push (operand_entry_t
, heap
, *ops
, oelast
);
659 else if (integer_zerop (oelast
->op
))
661 if (VEC_length (operand_entry_t
, *ops
) != 1)
663 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
664 fprintf (dump_file
, "Found | 0, removing\n");
665 VEC_pop (operand_entry_t
, *ops
);
666 reassociate_stats
.ops_eliminated
++;
671 if (integer_zerop (oelast
->op
)
672 || (FLOAT_TYPE_P (type
)
673 && !HONOR_NANS (TYPE_MODE (type
))
674 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type
))
675 && real_zerop (oelast
->op
)))
677 if (VEC_length (operand_entry_t
, *ops
) != 1)
679 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
680 fprintf (dump_file
, "Found * 0, removing all other ops\n");
682 reassociate_stats
.ops_eliminated
683 += VEC_length (operand_entry_t
, *ops
) - 1;
684 VEC_free (operand_entry_t
, heap
, *ops
);
686 VEC_safe_push (operand_entry_t
, heap
, *ops
, oelast
);
690 else if (integer_onep (oelast
->op
)
691 || (FLOAT_TYPE_P (type
)
692 && !HONOR_SNANS (TYPE_MODE (type
))
693 && real_onep (oelast
->op
)))
695 if (VEC_length (operand_entry_t
, *ops
) != 1)
697 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
698 fprintf (dump_file
, "Found * 1, removing\n");
699 VEC_pop (operand_entry_t
, *ops
);
700 reassociate_stats
.ops_eliminated
++;
708 if (integer_zerop (oelast
->op
)
709 || (FLOAT_TYPE_P (type
)
710 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
711 && fold_real_zero_addition_p (type
, oelast
->op
,
712 opcode
== MINUS_EXPR
)))
714 if (VEC_length (operand_entry_t
, *ops
) != 1)
716 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
717 fprintf (dump_file
, "Found [|^+] 0, removing\n");
718 VEC_pop (operand_entry_t
, *ops
);
719 reassociate_stats
.ops_eliminated
++;
731 static void linearize_expr_tree (VEC(operand_entry_t
, heap
) **, gimple
,
734 /* Structure for tracking and counting operands. */
735 typedef struct oecount_s
{
737 enum tree_code oecode
;
742 DEF_VEC_ALLOC_O(oecount
,heap
);
744 /* The heap for the oecount hashtable and the sorted list of operands. */
745 static VEC (oecount
, heap
) *cvec
;
747 /* Hash function for oecount. */
750 oecount_hash (const void *p
)
752 const oecount
*c
= VEC_index (oecount
, cvec
, (size_t)p
- 42);
753 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
756 /* Comparison function for oecount. */
759 oecount_eq (const void *p1
, const void *p2
)
761 const oecount
*c1
= VEC_index (oecount
, cvec
, (size_t)p1
- 42);
762 const oecount
*c2
= VEC_index (oecount
, cvec
, (size_t)p2
- 42);
763 return (c1
->oecode
== c2
->oecode
764 && c1
->op
== c2
->op
);
767 /* Comparison function for qsort sorting oecount elements by count. */
770 oecount_cmp (const void *p1
, const void *p2
)
772 const oecount
*c1
= (const oecount
*)p1
;
773 const oecount
*c2
= (const oecount
*)p2
;
774 return c1
->cnt
- c2
->cnt
;
777 /* Walks the linear chain with result *DEF searching for an operation
778 with operand OP and code OPCODE removing that from the chain. *DEF
779 is updated if there is only one operand but no operation left. */
782 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
784 gimple stmt
= SSA_NAME_DEF_STMT (*def
);
788 tree name
= gimple_assign_rhs1 (stmt
);
790 /* If this is the operation we look for and one of the operands
791 is ours simply propagate the other operand into the stmts
793 if (gimple_assign_rhs_code (stmt
) == opcode
795 || gimple_assign_rhs2 (stmt
) == op
))
799 gimple_stmt_iterator gsi
;
801 name
= gimple_assign_rhs2 (stmt
);
802 gcc_assert (has_single_use (gimple_assign_lhs (stmt
)));
803 single_imm_use (gimple_assign_lhs (stmt
), &use
, &use_stmt
);
804 if (gimple_assign_lhs (stmt
) == *def
)
807 if (TREE_CODE (name
) != SSA_NAME
)
808 update_stmt (use_stmt
);
809 gsi
= gsi_for_stmt (stmt
);
810 gsi_remove (&gsi
, true);
815 /* Continue walking the chain. */
816 gcc_assert (name
!= op
817 && TREE_CODE (name
) == SSA_NAME
);
818 stmt
= SSA_NAME_DEF_STMT (name
);
823 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
824 the result. Places the statement after the definition of either
825 OP1 or OP2. Returns the new statement. */
828 build_and_add_sum (tree tmpvar
, tree op1
, tree op2
, enum tree_code opcode
)
830 gimple op1def
= NULL
, op2def
= NULL
;
831 gimple_stmt_iterator gsi
;
835 /* Create the addition statement. */
836 sum
= gimple_build_assign_with_ops (opcode
, tmpvar
, op1
, op2
);
837 op
= make_ssa_name (tmpvar
, sum
);
838 gimple_assign_set_lhs (sum
, op
);
840 /* Find an insertion place and insert. */
841 if (TREE_CODE (op1
) == SSA_NAME
)
842 op1def
= SSA_NAME_DEF_STMT (op1
);
843 if (TREE_CODE (op2
) == SSA_NAME
)
844 op2def
= SSA_NAME_DEF_STMT (op2
);
845 if ((!op1def
|| gimple_nop_p (op1def
))
846 && (!op2def
|| gimple_nop_p (op2def
)))
848 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR
));
849 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
851 else if ((!op1def
|| gimple_nop_p (op1def
))
852 || (op2def
&& !gimple_nop_p (op2def
)
853 && stmt_dominates_stmt_p (op1def
, op2def
)))
855 if (gimple_code (op2def
) == GIMPLE_PHI
)
857 gsi
= gsi_after_labels (gimple_bb (op2def
));
858 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
862 if (!stmt_ends_bb_p (op2def
))
864 gsi
= gsi_for_stmt (op2def
);
865 gsi_insert_after (&gsi
, sum
, GSI_NEW_STMT
);
872 FOR_EACH_EDGE (e
, ei
, gimple_bb (op2def
)->succs
)
873 if (e
->flags
& EDGE_FALLTHRU
)
874 gsi_insert_on_edge_immediate (e
, sum
);
880 if (gimple_code (op1def
) == GIMPLE_PHI
)
882 gsi
= gsi_after_labels (gimple_bb (op1def
));
883 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
887 if (!stmt_ends_bb_p (op1def
))
889 gsi
= gsi_for_stmt (op1def
);
890 gsi_insert_after (&gsi
, sum
, GSI_NEW_STMT
);
897 FOR_EACH_EDGE (e
, ei
, gimple_bb (op1def
)->succs
)
898 if (e
->flags
& EDGE_FALLTHRU
)
899 gsi_insert_on_edge_immediate (e
, sum
);
908 /* Perform un-distribution of divisions and multiplications.
909 A * X + B * X is transformed into (A + B) * X and A / X + B / X
910 to (A + B) / X for real X.
912 The algorithm is organized as follows.
914 - First we walk the addition chain *OPS looking for summands that
915 are defined by a multiplication or a real division. This results
916 in the candidates bitmap with relevant indices into *OPS.
918 - Second we build the chains of multiplications or divisions for
919 these candidates, counting the number of occurences of (operand, code)
920 pairs in all of the candidates chains.
922 - Third we sort the (operand, code) pairs by number of occurence and
923 process them starting with the pair with the most uses.
925 * For each such pair we walk the candidates again to build a
926 second candidate bitmap noting all multiplication/division chains
927 that have at least one occurence of (operand, code).
929 * We build an alternate addition chain only covering these
930 candidates with one (operand, code) operation removed from their
931 multiplication/division chain.
933 * The first candidate gets replaced by the alternate addition chain
934 multiplied/divided by the operand.
936 * All candidate chains get disabled for further processing and
937 processing of (operand, code) pairs continues.
939 The alternate addition chains built are re-processed by the main
940 reassociation algorithm which allows optimizing a * x * y + b * y * x
941 to (a + b ) * x * y in one invocation of the reassociation pass. */
944 undistribute_ops_list (enum tree_code opcode
,
945 VEC (operand_entry_t
, heap
) **ops
, struct loop
*loop
)
947 unsigned int length
= VEC_length (operand_entry_t
, *ops
);
950 sbitmap candidates
, candidates2
;
951 unsigned nr_candidates
, nr_candidates2
;
952 sbitmap_iterator sbi0
;
953 VEC (operand_entry_t
, heap
) **subops
;
955 bool changed
= false;
958 || opcode
!= PLUS_EXPR
)
961 /* Build a list of candidates to process. */
962 candidates
= sbitmap_alloc (length
);
963 sbitmap_zero (candidates
);
965 for (i
= 0; VEC_iterate (operand_entry_t
, *ops
, i
, oe1
); ++i
)
967 enum tree_code dcode
;
970 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
972 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
973 if (!is_gimple_assign (oe1def
))
975 dcode
= gimple_assign_rhs_code (oe1def
);
976 if ((dcode
!= MULT_EXPR
977 && dcode
!= RDIV_EXPR
)
978 || !is_reassociable_op (oe1def
, dcode
, loop
))
981 SET_BIT (candidates
, i
);
985 if (nr_candidates
< 2)
987 sbitmap_free (candidates
);
991 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
993 fprintf (dump_file
, "searching for un-distribute opportunities ");
994 print_generic_expr (dump_file
,
995 VEC_index (operand_entry_t
, *ops
,
996 sbitmap_first_set_bit (candidates
))->op
, 0);
997 fprintf (dump_file
, " %d\n", nr_candidates
);
1000 /* Build linearized sub-operand lists and the counting table. */
1002 ctable
= htab_create (15, oecount_hash
, oecount_eq
, NULL
);
1003 subops
= XCNEWVEC (VEC (operand_entry_t
, heap
) *,
1004 VEC_length (operand_entry_t
, *ops
));
1005 EXECUTE_IF_SET_IN_SBITMAP (candidates
, 0, i
, sbi0
)
1008 enum tree_code oecode
;
1011 oedef
= SSA_NAME_DEF_STMT (VEC_index (operand_entry_t
, *ops
, i
)->op
);
1012 oecode
= gimple_assign_rhs_code (oedef
);
1013 linearize_expr_tree (&subops
[i
], oedef
,
1014 associative_tree_code (oecode
), false);
1016 for (j
= 0; VEC_iterate (operand_entry_t
, subops
[i
], j
, oe1
); ++j
)
1024 VEC_safe_push (oecount
, heap
, cvec
, &c
);
1025 idx
= VEC_length (oecount
, cvec
) + 41;
1026 slot
= htab_find_slot (ctable
, (void *)idx
, INSERT
);
1029 *slot
= (void *)idx
;
1033 VEC_pop (oecount
, cvec
);
1034 VEC_index (oecount
, cvec
, (size_t)*slot
- 42)->cnt
++;
1038 htab_delete (ctable
);
1040 /* Sort the counting table. */
1041 qsort (VEC_address (oecount
, cvec
), VEC_length (oecount
, cvec
),
1042 sizeof (oecount
), oecount_cmp
);
1044 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1047 fprintf (dump_file
, "Candidates:\n");
1048 for (j
= 0; VEC_iterate (oecount
, cvec
, j
, c
); ++j
)
1050 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1051 c
->oecode
== MULT_EXPR
1052 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1053 print_generic_expr (dump_file
, c
->op
, 0);
1054 fprintf (dump_file
, "\n");
1058 /* Process the (operand, code) pairs in order of most occurence. */
1059 candidates2
= sbitmap_alloc (length
);
1060 while (!VEC_empty (oecount
, cvec
))
1062 oecount
*c
= VEC_last (oecount
, cvec
);
1066 /* Now collect the operands in the outer chain that contain
1067 the common operand in their inner chain. */
1068 sbitmap_zero (candidates2
);
1070 EXECUTE_IF_SET_IN_SBITMAP (candidates
, 0, i
, sbi0
)
1073 enum tree_code oecode
;
1075 tree op
= VEC_index (operand_entry_t
, *ops
, i
)->op
;
1077 /* If we undistributed in this chain already this may be
1079 if (TREE_CODE (op
) != SSA_NAME
)
1082 oedef
= SSA_NAME_DEF_STMT (op
);
1083 oecode
= gimple_assign_rhs_code (oedef
);
1084 if (oecode
!= c
->oecode
)
1087 for (j
= 0; VEC_iterate (operand_entry_t
, subops
[i
], j
, oe1
); ++j
)
1089 if (oe1
->op
== c
->op
)
1091 SET_BIT (candidates2
, i
);
1098 if (nr_candidates2
>= 2)
1100 operand_entry_t oe1
, oe2
;
1103 int first
= sbitmap_first_set_bit (candidates2
);
1105 /* Build the new addition chain. */
1106 oe1
= VEC_index (operand_entry_t
, *ops
, first
);
1107 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1109 fprintf (dump_file
, "Building (");
1110 print_generic_expr (dump_file
, oe1
->op
, 0);
1112 tmpvar
= create_tmp_var (TREE_TYPE (oe1
->op
), NULL
);
1113 add_referenced_var (tmpvar
);
1114 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1115 EXECUTE_IF_SET_IN_SBITMAP (candidates2
, first
+1, i
, sbi0
)
1118 oe2
= VEC_index (operand_entry_t
, *ops
, i
);
1119 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1121 fprintf (dump_file
, " + ");
1122 print_generic_expr (dump_file
, oe2
->op
, 0);
1124 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1125 sum
= build_and_add_sum (tmpvar
, oe1
->op
, oe2
->op
, opcode
);
1126 oe2
->op
= fold_convert (TREE_TYPE (oe2
->op
), integer_zero_node
);
1128 oe1
->op
= gimple_get_lhs (sum
);
1131 /* Apply the multiplication/division. */
1132 prod
= build_and_add_sum (tmpvar
, oe1
->op
, c
->op
, c
->oecode
);
1133 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1135 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1136 print_generic_expr (dump_file
, c
->op
, 0);
1137 fprintf (dump_file
, "\n");
1140 /* Record it in the addition chain and disable further
1141 undistribution with this op. */
1142 oe1
->op
= gimple_assign_lhs (prod
);
1143 oe1
->rank
= get_rank (oe1
->op
);
1144 VEC_free (operand_entry_t
, heap
, subops
[first
]);
1149 VEC_pop (oecount
, cvec
);
1152 for (i
= 0; i
< VEC_length (operand_entry_t
, *ops
); ++i
)
1153 VEC_free (operand_entry_t
, heap
, subops
[i
]);
1155 VEC_free (oecount
, heap
, cvec
);
1156 sbitmap_free (candidates
);
1157 sbitmap_free (candidates2
);
1163 /* Perform various identities and other optimizations on the list of
1164 operand entries, stored in OPS. The tree code for the binary
1165 operation between all the operands is OPCODE. */
1168 optimize_ops_list (enum tree_code opcode
,
1169 VEC (operand_entry_t
, heap
) **ops
)
1171 unsigned int length
= VEC_length (operand_entry_t
, *ops
);
1174 operand_entry_t oelast
= NULL
;
1175 bool iterate
= false;
1180 oelast
= VEC_last (operand_entry_t
, *ops
);
1182 /* If the last two are constants, pop the constants off, merge them
1183 and try the next two. */
1184 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
1186 operand_entry_t oelm1
= VEC_index (operand_entry_t
, *ops
, length
- 2);
1188 if (oelm1
->rank
== 0
1189 && is_gimple_min_invariant (oelm1
->op
)
1190 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
1191 TREE_TYPE (oelast
->op
)))
1193 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
1194 oelm1
->op
, oelast
->op
);
1196 if (folded
&& is_gimple_min_invariant (folded
))
1198 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1199 fprintf (dump_file
, "Merging constants\n");
1201 VEC_pop (operand_entry_t
, *ops
);
1202 VEC_pop (operand_entry_t
, *ops
);
1204 add_to_ops_vec (ops
, folded
);
1205 reassociate_stats
.constants_eliminated
++;
1207 optimize_ops_list (opcode
, ops
);
1213 eliminate_using_constants (opcode
, ops
);
1216 for (i
= 0; VEC_iterate (operand_entry_t
, *ops
, i
, oe
);)
1220 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
1222 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
1223 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
)))
1235 length
= VEC_length (operand_entry_t
, *ops
);
1236 oelast
= VEC_last (operand_entry_t
, *ops
);
1239 optimize_ops_list (opcode
, ops
);
1242 /* Return true if OPERAND is defined by a PHI node which uses the LHS
1243 of STMT in it's operands. This is also known as a "destructive
1244 update" operation. */
1247 is_phi_for_stmt (gimple stmt
, tree operand
)
1251 use_operand_p arg_p
;
1254 if (TREE_CODE (operand
) != SSA_NAME
)
1257 lhs
= gimple_assign_lhs (stmt
);
1259 def_stmt
= SSA_NAME_DEF_STMT (operand
);
1260 if (gimple_code (def_stmt
) != GIMPLE_PHI
)
1263 FOR_EACH_PHI_ARG (arg_p
, def_stmt
, i
, SSA_OP_USE
)
1264 if (lhs
== USE_FROM_PTR (arg_p
))
1269 /* Remove def stmt of VAR if VAR has zero uses and recurse
1270 on rhs1 operand if so. */
1273 remove_visited_stmt_chain (tree var
)
1276 gimple_stmt_iterator gsi
;
1280 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
1282 stmt
= SSA_NAME_DEF_STMT (var
);
1283 if (!is_gimple_assign (stmt
)
1284 || !gimple_visited_p (stmt
))
1286 var
= gimple_assign_rhs1 (stmt
);
1287 gsi
= gsi_for_stmt (stmt
);
1288 gsi_remove (&gsi
, true);
1289 release_defs (stmt
);
1293 /* Recursively rewrite our linearized statements so that the operators
1294 match those in OPS[OPINDEX], putting the computation in rank
1298 rewrite_expr_tree (gimple stmt
, unsigned int opindex
,
1299 VEC(operand_entry_t
, heap
) * ops
, bool moved
)
1301 tree rhs1
= gimple_assign_rhs1 (stmt
);
1302 tree rhs2
= gimple_assign_rhs2 (stmt
);
1305 /* If we have three operands left, then we want to make sure the one
1306 that gets the double binary op are the ones with the same rank.
1308 The alternative we try is to see if this is a destructive
1309 update style statement, which is like:
1312 In that case, we want to use the destructive update form to
1313 expose the possible vectorizer sum reduction opportunity.
1314 In that case, the third operand will be the phi node.
1316 We could, of course, try to be better as noted above, and do a
1317 lot of work to try to find these opportunities in >3 operand
1318 cases, but it is unlikely to be worth it. */
1319 if (opindex
+ 3 == VEC_length (operand_entry_t
, ops
))
1321 operand_entry_t oe1
, oe2
, oe3
;
1323 oe1
= VEC_index (operand_entry_t
, ops
, opindex
);
1324 oe2
= VEC_index (operand_entry_t
, ops
, opindex
+ 1);
1325 oe3
= VEC_index (operand_entry_t
, ops
, opindex
+ 2);
1327 if ((oe1
->rank
== oe2
->rank
1328 && oe2
->rank
!= oe3
->rank
)
1329 || (is_phi_for_stmt (stmt
, oe3
->op
)
1330 && !is_phi_for_stmt (stmt
, oe1
->op
)
1331 && !is_phi_for_stmt (stmt
, oe2
->op
)))
1333 struct operand_entry temp
= *oe3
;
1335 oe3
->rank
= oe1
->rank
;
1337 oe1
->rank
= temp
.rank
;
1339 else if ((oe1
->rank
== oe3
->rank
1340 && oe2
->rank
!= oe3
->rank
)
1341 || (is_phi_for_stmt (stmt
, oe2
->op
)
1342 && !is_phi_for_stmt (stmt
, oe1
->op
)
1343 && !is_phi_for_stmt (stmt
, oe3
->op
)))
1345 struct operand_entry temp
= *oe2
;
1347 oe2
->rank
= oe1
->rank
;
1349 oe1
->rank
= temp
.rank
;
1353 /* The final recursion case for this function is that you have
1354 exactly two operations left.
1355 If we had one exactly one op in the entire list to start with, we
1356 would have never called this function, and the tail recursion
1357 rewrites them one at a time. */
1358 if (opindex
+ 2 == VEC_length (operand_entry_t
, ops
))
1360 operand_entry_t oe1
, oe2
;
1362 oe1
= VEC_index (operand_entry_t
, ops
, opindex
);
1363 oe2
= VEC_index (operand_entry_t
, ops
, opindex
+ 1);
1365 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
1367 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1369 fprintf (dump_file
, "Transforming ");
1370 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1373 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
1374 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
1376 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
1377 remove_visited_stmt_chain (rhs1
);
1379 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1381 fprintf (dump_file
, " into ");
1382 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1389 /* If we hit here, we should have 3 or more ops left. */
1390 gcc_assert (opindex
+ 2 < VEC_length (operand_entry_t
, ops
));
1392 /* Rewrite the next operator. */
1393 oe
= VEC_index (operand_entry_t
, ops
, opindex
);
1399 gimple_stmt_iterator gsinow
, gsirhs1
;
1400 gimple stmt1
= stmt
, stmt2
;
1403 gsinow
= gsi_for_stmt (stmt
);
1404 count
= VEC_length (operand_entry_t
, ops
) - opindex
- 2;
1405 while (count
-- != 0)
1407 stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1
));
1408 gsirhs1
= gsi_for_stmt (stmt2
);
1409 gsi_move_before (&gsirhs1
, &gsinow
);
1416 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1418 fprintf (dump_file
, "Transforming ");
1419 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1422 gimple_assign_set_rhs2 (stmt
, oe
->op
);
1425 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1427 fprintf (dump_file
, " into ");
1428 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1431 /* Recurse on the LHS of the binary operator, which is guaranteed to
1432 be the non-leaf side. */
1433 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), opindex
+ 1, ops
, moved
);
1436 /* Transform STMT, which is really (A +B) + (C + D) into the left
1437 linear form, ((A+B)+C)+D.
1438 Recurse on D if necessary. */
1441 linearize_expr (gimple stmt
)
1443 gimple_stmt_iterator gsinow
, gsirhs
;
1444 gimple binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
1445 gimple binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1446 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
1447 gimple newbinrhs
= NULL
;
1448 struct loop
*loop
= loop_containing_stmt (stmt
);
1450 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
1451 && is_reassociable_op (binrhs
, rhscode
, loop
));
1453 gsinow
= gsi_for_stmt (stmt
);
1454 gsirhs
= gsi_for_stmt (binrhs
);
1455 gsi_move_before (&gsirhs
, &gsinow
);
1457 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
1458 gimple_assign_set_rhs1 (binrhs
, gimple_assign_lhs (binlhs
));
1459 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
1461 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
1462 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1464 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1466 fprintf (dump_file
, "Linearized: ");
1467 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1470 reassociate_stats
.linearized
++;
1471 update_stmt (binrhs
);
1472 update_stmt (binlhs
);
1475 gimple_set_visited (stmt
, true);
1476 gimple_set_visited (binlhs
, true);
1477 gimple_set_visited (binrhs
, true);
1479 /* Tail recurse on the new rhs if it still needs reassociation. */
1480 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
1481 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
1482 want to change the algorithm while converting to tuples. */
1483 linearize_expr (stmt
);
1486 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
1487 it. Otherwise, return NULL. */
1490 get_single_immediate_use (tree lhs
)
1492 use_operand_p immuse
;
1495 if (TREE_CODE (lhs
) == SSA_NAME
1496 && single_imm_use (lhs
, &immuse
, &immusestmt
)
1497 && is_gimple_assign (immusestmt
))
1503 static VEC(tree
, heap
) *broken_up_subtracts
;
1505 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
1506 representing the negated value. Insertions of any necessary
1507 instructions go before GSI.
1508 This function is recursive in that, if you hand it "a_5" as the
1509 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
1510 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
1513 negate_value (tree tonegate
, gimple_stmt_iterator
*gsi
)
1515 gimple negatedefstmt
= NULL
;
1516 tree resultofnegate
;
1518 /* If we are trying to negate a name, defined by an add, negate the
1519 add operands instead. */
1520 if (TREE_CODE (tonegate
) == SSA_NAME
)
1521 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
1522 if (TREE_CODE (tonegate
) == SSA_NAME
1523 && is_gimple_assign (negatedefstmt
)
1524 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
1525 && has_single_use (gimple_assign_lhs (negatedefstmt
))
1526 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
1528 gimple_stmt_iterator gsi
;
1529 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
1530 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
1532 gsi
= gsi_for_stmt (negatedefstmt
);
1533 rhs1
= negate_value (rhs1
, &gsi
);
1534 gimple_assign_set_rhs1 (negatedefstmt
, rhs1
);
1536 gsi
= gsi_for_stmt (negatedefstmt
);
1537 rhs2
= negate_value (rhs2
, &gsi
);
1538 gimple_assign_set_rhs2 (negatedefstmt
, rhs2
);
1540 update_stmt (negatedefstmt
);
1541 return gimple_assign_lhs (negatedefstmt
);
1544 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
1545 resultofnegate
= force_gimple_operand_gsi (gsi
, tonegate
, true,
1546 NULL_TREE
, true, GSI_SAME_STMT
);
1547 VEC_safe_push (tree
, heap
, broken_up_subtracts
, resultofnegate
);
1548 return resultofnegate
;
1551 /* Return true if we should break up the subtract in STMT into an add
1552 with negate. This is true when we the subtract operands are really
1553 adds, or the subtract itself is used in an add expression. In
1554 either case, breaking up the subtract into an add with negate
1555 exposes the adds to reassociation. */
1558 should_break_up_subtract (gimple stmt
)
1560 tree lhs
= gimple_assign_lhs (stmt
);
1561 tree binlhs
= gimple_assign_rhs1 (stmt
);
1562 tree binrhs
= gimple_assign_rhs2 (stmt
);
1564 struct loop
*loop
= loop_containing_stmt (stmt
);
1566 if (TREE_CODE (binlhs
) == SSA_NAME
1567 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
1570 if (TREE_CODE (binrhs
) == SSA_NAME
1571 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
1574 if (TREE_CODE (lhs
) == SSA_NAME
1575 && (immusestmt
= get_single_immediate_use (lhs
))
1576 && is_gimple_assign (immusestmt
)
1577 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
1578 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
1583 /* Transform STMT from A - B into A + -B. */
1586 break_up_subtract (gimple stmt
, gimple_stmt_iterator
*gsip
)
1588 tree rhs1
= gimple_assign_rhs1 (stmt
);
1589 tree rhs2
= gimple_assign_rhs2 (stmt
);
1591 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1593 fprintf (dump_file
, "Breaking up subtract ");
1594 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1597 rhs2
= negate_value (rhs2
, gsip
);
1598 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
1602 /* Recursively linearize a binary expression that is the RHS of STMT.
1603 Place the operands of the expression tree in the vector named OPS. */
1606 linearize_expr_tree (VEC(operand_entry_t
, heap
) **ops
, gimple stmt
,
1607 bool is_associative
, bool set_visited
)
1609 tree binlhs
= gimple_assign_rhs1 (stmt
);
1610 tree binrhs
= gimple_assign_rhs2 (stmt
);
1611 gimple binlhsdef
, binrhsdef
;
1612 bool binlhsisreassoc
= false;
1613 bool binrhsisreassoc
= false;
1614 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
1615 struct loop
*loop
= loop_containing_stmt (stmt
);
1618 gimple_set_visited (stmt
, true);
1620 if (TREE_CODE (binlhs
) == SSA_NAME
)
1622 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
1623 binlhsisreassoc
= is_reassociable_op (binlhsdef
, rhscode
, loop
);
1626 if (TREE_CODE (binrhs
) == SSA_NAME
)
1628 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
1629 binrhsisreassoc
= is_reassociable_op (binrhsdef
, rhscode
, loop
);
1632 /* If the LHS is not reassociable, but the RHS is, we need to swap
1633 them. If neither is reassociable, there is nothing we can do, so
1634 just put them in the ops vector. If the LHS is reassociable,
1635 linearize it. If both are reassociable, then linearize the RHS
1638 if (!binlhsisreassoc
)
1642 /* If this is not a associative operation like division, give up. */
1643 if (!is_associative
)
1645 add_to_ops_vec (ops
, binrhs
);
1649 if (!binrhsisreassoc
)
1651 add_to_ops_vec (ops
, binrhs
);
1652 add_to_ops_vec (ops
, binlhs
);
1656 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1658 fprintf (dump_file
, "swapping operands of ");
1659 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1662 swap_tree_operands (stmt
,
1663 gimple_assign_rhs1_ptr (stmt
),
1664 gimple_assign_rhs2_ptr (stmt
));
1667 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1669 fprintf (dump_file
, " is now ");
1670 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1673 /* We want to make it so the lhs is always the reassociative op,
1679 else if (binrhsisreassoc
)
1681 linearize_expr (stmt
);
1682 binlhs
= gimple_assign_rhs1 (stmt
);
1683 binrhs
= gimple_assign_rhs2 (stmt
);
1686 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
1687 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
1689 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
1690 is_associative
, set_visited
);
1691 add_to_ops_vec (ops
, binrhs
);
1694 /* Repropagate the negates back into subtracts, since no other pass
1695 currently does it. */
1698 repropagate_negates (void)
1703 for (i
= 0; VEC_iterate (tree
, broken_up_subtracts
, i
, negate
); i
++)
1705 gimple user
= get_single_immediate_use (negate
);
1707 /* The negate operand can be either operand of a PLUS_EXPR
1708 (it can be the LHS if the RHS is a constant for example).
1710 Force the negate operand to the RHS of the PLUS_EXPR, then
1711 transform the PLUS_EXPR into a MINUS_EXPR. */
1713 && is_gimple_assign (user
)
1714 && gimple_assign_rhs_code (user
) == PLUS_EXPR
)
1716 /* If the negated operand appears on the LHS of the
1717 PLUS_EXPR, exchange the operands of the PLUS_EXPR
1718 to force the negated operand to the RHS of the PLUS_EXPR. */
1719 if (gimple_assign_rhs1 (user
) == negate
)
1721 swap_tree_operands (user
,
1722 gimple_assign_rhs1_ptr (user
),
1723 gimple_assign_rhs2_ptr (user
));
1726 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
1727 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
1728 if (gimple_assign_rhs2 (user
) == negate
)
1730 tree rhs1
= gimple_assign_rhs1 (user
);
1731 tree rhs2
= get_unary_op (negate
, NEGATE_EXPR
);
1732 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
1733 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
, rhs2
);
1740 /* Break up subtract operations in block BB.
1742 We do this top down because we don't know whether the subtract is
1743 part of a possible chain of reassociation except at the top.
1752 we want to break up k = t - q, but we won't until we've transformed q
1753 = b - r, which won't be broken up until we transform b = c - d.
1755 En passant, clear the GIMPLE visited flag on every statement. */
1758 break_up_subtract_bb (basic_block bb
)
1760 gimple_stmt_iterator gsi
;
1763 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1765 gimple stmt
= gsi_stmt (gsi
);
1766 gimple_set_visited (stmt
, false);
1768 /* Look for simple gimple subtract operations. */
1769 if (is_gimple_assign (stmt
)
1770 && gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
1772 tree lhs
= gimple_assign_lhs (stmt
);
1773 tree rhs1
= gimple_assign_rhs1 (stmt
);
1774 tree rhs2
= gimple_assign_rhs2 (stmt
);
1776 /* If associative-math we can do reassociation for
1777 non-integral types. Or, we can do reassociation for
1778 non-saturating fixed-point types. */
1779 if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
1780 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
1781 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs2
)))
1782 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs
))
1783 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs1
))
1784 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs2
))
1785 || !flag_associative_math
)
1786 && (!NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE (lhs
))
1787 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs1
))
1788 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs2
))))
1791 /* Check for a subtract used only in an addition. If this
1792 is the case, transform it into add of a negate for better
1793 reassociation. IE transform C = A-B into C = A + -B if C
1794 is only used in an addition. */
1795 if (should_break_up_subtract (stmt
))
1796 break_up_subtract (stmt
, &gsi
);
1799 for (son
= first_dom_son (CDI_DOMINATORS
, bb
);
1801 son
= next_dom_son (CDI_DOMINATORS
, son
))
1802 break_up_subtract_bb (son
);
1805 /* Reassociate expressions in basic block BB and its post-dominator as
1809 reassociate_bb (basic_block bb
)
1811 gimple_stmt_iterator gsi
;
1814 for (gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
1816 gimple stmt
= gsi_stmt (gsi
);
1818 if (is_gimple_assign (stmt
))
1820 tree lhs
, rhs1
, rhs2
;
1821 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
1823 /* If this is not a gimple binary expression, there is
1824 nothing for us to do with it. */
1825 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
1828 /* If this was part of an already processed statement,
1829 we don't need to touch it again. */
1830 if (gimple_visited_p (stmt
))
1832 /* This statement might have become dead because of previous
1834 if (has_zero_uses (gimple_get_lhs (stmt
)))
1836 gsi_remove (&gsi
, true);
1837 release_defs (stmt
);
1838 /* We might end up removing the last stmt above which
1839 places the iterator to the end of the sequence.
1840 Reset it to the last stmt in this case which might
1841 be the end of the sequence as well if we removed
1842 the last statement of the sequence. In which case
1843 we need to bail out. */
1844 if (gsi_end_p (gsi
))
1846 gsi
= gsi_last_bb (bb
);
1847 if (gsi_end_p (gsi
))
1854 lhs
= gimple_assign_lhs (stmt
);
1855 rhs1
= gimple_assign_rhs1 (stmt
);
1856 rhs2
= gimple_assign_rhs2 (stmt
);
1858 /* If associative-math we can do reassociation for
1859 non-integral types. Or, we can do reassociation for
1860 non-saturating fixed-point types. */
1861 if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
1862 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
1863 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs2
)))
1864 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs
))
1865 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs1
))
1866 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs2
))
1867 || !flag_associative_math
)
1868 && (!NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE (lhs
))
1869 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs1
))
1870 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs2
))))
1873 if (associative_tree_code (rhs_code
))
1875 VEC(operand_entry_t
, heap
) *ops
= NULL
;
1877 /* There may be no immediate uses left by the time we
1878 get here because we may have eliminated them all. */
1879 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
1882 gimple_set_visited (stmt
, true);
1883 linearize_expr_tree (&ops
, stmt
, true, true);
1884 qsort (VEC_address (operand_entry_t
, ops
),
1885 VEC_length (operand_entry_t
, ops
),
1886 sizeof (operand_entry_t
),
1887 sort_by_operand_rank
);
1888 optimize_ops_list (rhs_code
, &ops
);
1889 if (undistribute_ops_list (rhs_code
, &ops
,
1890 loop_containing_stmt (stmt
)))
1892 qsort (VEC_address (operand_entry_t
, ops
),
1893 VEC_length (operand_entry_t
, ops
),
1894 sizeof (operand_entry_t
),
1895 sort_by_operand_rank
);
1896 optimize_ops_list (rhs_code
, &ops
);
1899 if (VEC_length (operand_entry_t
, ops
) == 1)
1901 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1903 fprintf (dump_file
, "Transforming ");
1904 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1907 rhs1
= gimple_assign_rhs1 (stmt
);
1908 gimple_assign_set_rhs_from_tree (&gsi
,
1909 VEC_last (operand_entry_t
,
1912 remove_visited_stmt_chain (rhs1
);
1914 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1916 fprintf (dump_file
, " into ");
1917 print_gimple_stmt (dump_file
, stmt
, 0, 0);
1921 rewrite_expr_tree (stmt
, 0, ops
, false);
1923 VEC_free (operand_entry_t
, heap
, ops
);
1927 for (son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
1929 son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
1930 reassociate_bb (son
);
1933 void dump_ops_vector (FILE *file
, VEC (operand_entry_t
, heap
) *ops
);
1934 void debug_ops_vector (VEC (operand_entry_t
, heap
) *ops
);
1936 /* Dump the operand entry vector OPS to FILE. */
1939 dump_ops_vector (FILE *file
, VEC (operand_entry_t
, heap
) *ops
)
1944 for (i
= 0; VEC_iterate (operand_entry_t
, ops
, i
, oe
); i
++)
1946 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
1947 print_generic_expr (file
, oe
->op
, 0);
1951 /* Dump the operand entry vector OPS to STDERR. */
1954 debug_ops_vector (VEC (operand_entry_t
, heap
) *ops
)
1956 dump_ops_vector (stderr
, ops
);
1962 break_up_subtract_bb (ENTRY_BLOCK_PTR
);
1963 reassociate_bb (EXIT_BLOCK_PTR
);
1966 /* Initialize the reassociation pass. */
1974 int *bbs
= XNEWVEC (int, last_basic_block
+ 1);
1976 /* Find the loops, so that we can prevent moving calculations in
1978 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
1980 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
1982 operand_entry_pool
= create_alloc_pool ("operand entry pool",
1983 sizeof (struct operand_entry
), 30);
1985 /* Reverse RPO (Reverse Post Order) will give us something where
1986 deeper loops come later. */
1987 pre_and_rev_post_order_compute (NULL
, bbs
, false);
1988 bb_rank
= XCNEWVEC (long, last_basic_block
+ 1);
1989 operand_rank
= pointer_map_create ();
1991 /* Give each argument a distinct rank. */
1992 for (param
= DECL_ARGUMENTS (current_function_decl
);
1994 param
= TREE_CHAIN (param
))
1996 if (gimple_default_def (cfun
, param
) != NULL
)
1998 tree def
= gimple_default_def (cfun
, param
);
1999 insert_operand_rank (def
, ++rank
);
2003 /* Give the chain decl a distinct rank. */
2004 if (cfun
->static_chain_decl
!= NULL
)
2006 tree def
= gimple_default_def (cfun
, cfun
->static_chain_decl
);
2008 insert_operand_rank (def
, ++rank
);
2011 /* Set up rank for each BB */
2012 for (i
= 0; i
< n_basic_blocks
- NUM_FIXED_BLOCKS
; i
++)
2013 bb_rank
[bbs
[i
]] = ++rank
<< 16;
2016 calculate_dominance_info (CDI_POST_DOMINATORS
);
2017 broken_up_subtracts
= NULL
;
2020 /* Cleanup after the reassociation pass, and print stats if
2026 statistics_counter_event (cfun
, "Linearized",
2027 reassociate_stats
.linearized
);
2028 statistics_counter_event (cfun
, "Constants eliminated",
2029 reassociate_stats
.constants_eliminated
);
2030 statistics_counter_event (cfun
, "Ops eliminated",
2031 reassociate_stats
.ops_eliminated
);
2032 statistics_counter_event (cfun
, "Statements rewritten",
2033 reassociate_stats
.rewritten
);
2035 pointer_map_destroy (operand_rank
);
2036 free_alloc_pool (operand_entry_pool
);
2038 VEC_free (tree
, heap
, broken_up_subtracts
);
2039 free_dominance_info (CDI_POST_DOMINATORS
);
2040 loop_optimizer_finalize ();
2043 /* Gate and execute functions for Reassociation. */
2046 execute_reassoc (void)
2051 repropagate_negates ();
2058 gate_tree_ssa_reassoc (void)
2060 return flag_tree_reassoc
!= 0;
2063 struct gimple_opt_pass pass_reassoc
=
2067 "reassoc", /* name */
2068 gate_tree_ssa_reassoc
, /* gate */
2069 execute_reassoc
, /* execute */
2072 0, /* static_pass_number */
2073 TV_TREE_REASSOC
, /* tv_id */
2074 PROP_cfg
| PROP_ssa
, /* properties_required */
2075 0, /* properties_provided */
2076 0, /* properties_destroyed */
2077 0, /* todo_flags_start */
2078 TODO_dump_func
| TODO_ggc_collect
| TODO_verify_ssa
/* todo_flags_finish */