Merged r158229 through r158464 into branch.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob8d1c05c86e1eb20b58855cbbee7a9c2642cb2db3
1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "basic-block.h"
28 #include "diagnostic.h"
29 #include "tree-inline.h"
30 #include "tree-flow.h"
31 #include "gimple.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "tree-iterator.h"
35 #include "real.h"
36 #include "tree-pass.h"
37 #include "alloc-pool.h"
38 #include "vec.h"
39 #include "langhooks.h"
40 #include "pointer-set.h"
41 #include "cfgloop.h"
42 #include "flags.h"
44 /* This is a simple global reassociation pass. It is, in part, based
45 on the LLVM pass of the same name (They do some things more/less
46 than we do, in different orders, etc).
48 It consists of five steps:
50 1. Breaking up subtract operations into addition + negate, where
51 it would promote the reassociation of adds.
53 2. Left linearization of the expression trees, so that (A+B)+(C+D)
54 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
55 During linearization, we place the operands of the binary
56 expressions into a vector of operand_entry_t
58 3. Optimization of the operand lists, eliminating things like a +
59 -a, a & a, etc.
61 4. Rewrite the expression trees we linearized and optimized so
62 they are in proper rank order.
64 5. Repropagate negates, as nothing else will clean it up ATM.
66 A bit of theory on #4, since nobody seems to write anything down
67 about why it makes sense to do it the way they do it:
69 We could do this much nicer theoretically, but don't (for reasons
70 explained after how to do it theoretically nice :P).
72 In order to promote the most redundancy elimination, you want
73 binary expressions whose operands are the same rank (or
74 preferably, the same value) exposed to the redundancy eliminator,
75 for possible elimination.
77 So the way to do this if we really cared, is to build the new op
78 tree from the leaves to the roots, merging as you go, and putting the
79 new op on the end of the worklist, until you are left with one
80 thing on the worklist.
82 IE if you have to rewrite the following set of operands (listed with
83 rank in parentheses), with opcode PLUS_EXPR:
85 a (1), b (1), c (1), d (2), e (2)
88 We start with our merge worklist empty, and the ops list with all of
89 those on it.
91 You want to first merge all leaves of the same rank, as much as
92 possible.
94 So first build a binary op of
96 mergetmp = a + b, and put "mergetmp" on the merge worklist.
98 Because there is no three operand form of PLUS_EXPR, c is not going to
99 be exposed to redundancy elimination as a rank 1 operand.
101 So you might as well throw it on the merge worklist (you could also
102 consider it to now be a rank two operand, and merge it with d and e,
103 but in this case, you then have evicted e from a binary op. So at
104 least in this situation, you can't win.)
106 Then build a binary op of d + e
107 mergetmp2 = d + e
109 and put mergetmp2 on the merge worklist.
111 so merge worklist = {mergetmp, c, mergetmp2}
113 Continue building binary ops of these operations until you have only
114 one operation left on the worklist.
116 So we have
118 build binary op
119 mergetmp3 = mergetmp + c
121 worklist = {mergetmp2, mergetmp3}
123 mergetmp4 = mergetmp2 + mergetmp3
125 worklist = {mergetmp4}
127 because we have one operation left, we can now just set the original
128 statement equal to the result of that operation.
130 This will at least expose a + b and d + e to redundancy elimination
131 as binary operations.
133 For extra points, you can reuse the old statements to build the
134 mergetmps, since you shouldn't run out.
136 So why don't we do this?
138 Because it's expensive, and rarely will help. Most trees we are
139 reassociating have 3 or less ops. If they have 2 ops, they already
140 will be written into a nice single binary op. If you have 3 ops, a
141 single simple check suffices to tell you whether the first two are of the
142 same rank. If so, you know to order it
144 mergetmp = op1 + op2
145 newstmt = mergetmp + op3
147 instead of
148 mergetmp = op2 + op3
149 newstmt = mergetmp + op1
151 If all three are of the same rank, you can't expose them all in a
152 single binary operator anyway, so the above is *still* the best you
153 can do.
155 Thus, this is what we do. When we have three ops left, we check to see
156 what order to put them in, and call it a day. As a nod to vector sum
157 reduction, we check if any of the ops are really a phi node that is a
158 destructive update for the associating op, and keep the destructive
159 update together for vector sum reduction recognition. */
162 /* Statistics */
163 static struct
165 int linearized;
166 int constants_eliminated;
167 int ops_eliminated;
168 int rewritten;
169 } reassociate_stats;
171 /* Operator, rank pair. */
172 typedef struct operand_entry
174 unsigned int rank;
175 int id;
176 tree op;
177 } *operand_entry_t;
179 static alloc_pool operand_entry_pool;
181 /* This is used to assign a unique ID to each struct operand_entry
182 so that qsort results are identical on different hosts. */
183 static int next_operand_entry_id;
185 /* Starting rank number for a given basic block, so that we can rank
186 operations using unmovable instructions in that BB based on the bb
187 depth. */
188 static long *bb_rank;
190 /* Operand->rank hashtable. */
191 static struct pointer_map_t *operand_rank;
194 /* Look up the operand rank structure for expression E. */
196 static inline long
197 find_operand_rank (tree e)
199 void **slot = pointer_map_contains (operand_rank, e);
200 return slot ? (long) (intptr_t) *slot : -1;
203 /* Insert {E,RANK} into the operand rank hashtable. */
205 static inline void
206 insert_operand_rank (tree e, long rank)
208 void **slot;
209 gcc_assert (rank > 0);
210 slot = pointer_map_insert (operand_rank, e);
211 gcc_assert (!*slot);
212 *slot = (void *) (intptr_t) rank;
215 /* Given an expression E, return the rank of the expression. */
217 static long
218 get_rank (tree e)
220 /* Constants have rank 0. */
221 if (is_gimple_min_invariant (e))
222 return 0;
224 /* SSA_NAME's have the rank of the expression they are the result
226 For globals and uninitialized values, the rank is 0.
227 For function arguments, use the pre-setup rank.
228 For PHI nodes, stores, asm statements, etc, we use the rank of
229 the BB.
230 For simple operations, the rank is the maximum rank of any of
231 its operands, or the bb_rank, whichever is less.
232 I make no claims that this is optimal, however, it gives good
233 results. */
235 if (TREE_CODE (e) == SSA_NAME)
237 gimple stmt;
238 long rank, maxrank;
239 int i, n;
241 if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
242 && SSA_NAME_IS_DEFAULT_DEF (e))
243 return find_operand_rank (e);
245 stmt = SSA_NAME_DEF_STMT (e);
246 if (gimple_bb (stmt) == NULL)
247 return 0;
249 if (!is_gimple_assign (stmt)
250 || gimple_vdef (stmt))
251 return bb_rank[gimple_bb (stmt)->index];
253 /* If we already have a rank for this expression, use that. */
254 rank = find_operand_rank (e);
255 if (rank != -1)
256 return rank;
258 /* Otherwise, find the maximum rank for the operands, or the bb
259 rank, whichever is less. */
260 rank = 0;
261 maxrank = bb_rank[gimple_bb(stmt)->index];
262 if (gimple_assign_single_p (stmt))
264 tree rhs = gimple_assign_rhs1 (stmt);
265 n = TREE_OPERAND_LENGTH (rhs);
266 if (n == 0)
267 rank = MAX (rank, get_rank (rhs));
268 else
270 for (i = 0;
271 i < n && TREE_OPERAND (rhs, i) && rank != maxrank; i++)
272 rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i)));
275 else
277 n = gimple_num_ops (stmt);
278 for (i = 1; i < n && rank != maxrank; i++)
280 gcc_assert (gimple_op (stmt, i));
281 rank = MAX(rank, get_rank (gimple_op (stmt, i)));
285 if (dump_file && (dump_flags & TDF_DETAILS))
287 fprintf (dump_file, "Rank for ");
288 print_generic_expr (dump_file, e, 0);
289 fprintf (dump_file, " is %ld\n", (rank + 1));
292 /* Note the rank in the hashtable so we don't recompute it. */
293 insert_operand_rank (e, (rank + 1));
294 return (rank + 1);
297 /* Globals, etc, are rank 0 */
298 return 0;
301 DEF_VEC_P(operand_entry_t);
302 DEF_VEC_ALLOC_P(operand_entry_t, heap);
304 /* We want integer ones to end up last no matter what, since they are
305 the ones we can do the most with. */
306 #define INTEGER_CONST_TYPE 1 << 3
307 #define FLOAT_CONST_TYPE 1 << 2
308 #define OTHER_CONST_TYPE 1 << 1
310 /* Classify an invariant tree into integer, float, or other, so that
311 we can sort them to be near other constants of the same type. */
312 static inline int
313 constant_type (tree t)
315 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
316 return INTEGER_CONST_TYPE;
317 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
318 return FLOAT_CONST_TYPE;
319 else
320 return OTHER_CONST_TYPE;
323 /* qsort comparison function to sort operand entries PA and PB by rank
324 so that the sorted array is ordered by rank in decreasing order. */
325 static int
326 sort_by_operand_rank (const void *pa, const void *pb)
328 const operand_entry_t oea = *(const operand_entry_t *)pa;
329 const operand_entry_t oeb = *(const operand_entry_t *)pb;
331 /* It's nicer for optimize_expression if constants that are likely
332 to fold when added/multiplied//whatever are put next to each
333 other. Since all constants have rank 0, order them by type. */
334 if (oeb->rank == 0 && oea->rank == 0)
336 if (constant_type (oeb->op) != constant_type (oea->op))
337 return constant_type (oeb->op) - constant_type (oea->op);
338 else
339 /* To make sorting result stable, we use unique IDs to determine
340 order. */
341 return oeb->id - oea->id;
344 /* Lastly, make sure the versions that are the same go next to each
345 other. We use SSA_NAME_VERSION because it's stable. */
346 if ((oeb->rank - oea->rank == 0)
347 && TREE_CODE (oea->op) == SSA_NAME
348 && TREE_CODE (oeb->op) == SSA_NAME)
350 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
351 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
352 else
353 return oeb->id - oea->id;
356 if (oeb->rank != oea->rank)
357 return oeb->rank - oea->rank;
358 else
359 return oeb->id - oea->id;
362 /* Add an operand entry to *OPS for the tree operand OP. */
364 static void
365 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
367 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
369 oe->op = op;
370 oe->rank = get_rank (op);
371 oe->id = next_operand_entry_id++;
372 VEC_safe_push (operand_entry_t, heap, *ops, oe);
375 /* Return true if STMT is reassociable operation containing a binary
376 operation with tree code CODE, and is inside LOOP. */
378 static bool
379 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
381 basic_block bb = gimple_bb (stmt);
383 if (gimple_bb (stmt) == NULL)
384 return false;
386 if (!flow_bb_inside_loop_p (loop, bb))
387 return false;
389 if (is_gimple_assign (stmt)
390 && gimple_assign_rhs_code (stmt) == code
391 && has_single_use (gimple_assign_lhs (stmt)))
392 return true;
394 return false;
398 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
399 operand of the negate operation. Otherwise, return NULL. */
401 static tree
402 get_unary_op (tree name, enum tree_code opcode)
404 gimple stmt = SSA_NAME_DEF_STMT (name);
406 if (!is_gimple_assign (stmt))
407 return NULL_TREE;
409 if (gimple_assign_rhs_code (stmt) == opcode)
410 return gimple_assign_rhs1 (stmt);
411 return NULL_TREE;
414 /* If CURR and LAST are a pair of ops that OPCODE allows us to
415 eliminate through equivalences, do so, remove them from OPS, and
416 return true. Otherwise, return false. */
418 static bool
419 eliminate_duplicate_pair (enum tree_code opcode,
420 VEC (operand_entry_t, heap) **ops,
421 bool *all_done,
422 unsigned int i,
423 operand_entry_t curr,
424 operand_entry_t last)
427 /* If we have two of the same op, and the opcode is & |, min, or max,
428 we can eliminate one of them.
429 If we have two of the same op, and the opcode is ^, we can
430 eliminate both of them. */
432 if (last && last->op == curr->op)
434 switch (opcode)
436 case MAX_EXPR:
437 case MIN_EXPR:
438 case BIT_IOR_EXPR:
439 case BIT_AND_EXPR:
440 if (dump_file && (dump_flags & TDF_DETAILS))
442 fprintf (dump_file, "Equivalence: ");
443 print_generic_expr (dump_file, curr->op, 0);
444 fprintf (dump_file, " [&|minmax] ");
445 print_generic_expr (dump_file, last->op, 0);
446 fprintf (dump_file, " -> ");
447 print_generic_stmt (dump_file, last->op, 0);
450 VEC_ordered_remove (operand_entry_t, *ops, i);
451 reassociate_stats.ops_eliminated ++;
453 return true;
455 case BIT_XOR_EXPR:
456 if (dump_file && (dump_flags & TDF_DETAILS))
458 fprintf (dump_file, "Equivalence: ");
459 print_generic_expr (dump_file, curr->op, 0);
460 fprintf (dump_file, " ^ ");
461 print_generic_expr (dump_file, last->op, 0);
462 fprintf (dump_file, " -> nothing\n");
465 reassociate_stats.ops_eliminated += 2;
467 if (VEC_length (operand_entry_t, *ops) == 2)
469 VEC_free (operand_entry_t, heap, *ops);
470 *ops = NULL;
471 add_to_ops_vec (ops, fold_convert (TREE_TYPE (last->op),
472 integer_zero_node));
473 *all_done = true;
475 else
477 VEC_ordered_remove (operand_entry_t, *ops, i-1);
478 VEC_ordered_remove (operand_entry_t, *ops, i-1);
481 return true;
483 default:
484 break;
487 return false;
490 static VEC(tree, heap) *plus_negates;
492 /* If OPCODE is PLUS_EXPR, CURR->OP is really a negate expression,
493 look in OPS for a corresponding positive operation to cancel it
494 out. If we find one, remove the other from OPS, replace
495 OPS[CURRINDEX] with 0, and return true. Otherwise, return
496 false. */
498 static bool
499 eliminate_plus_minus_pair (enum tree_code opcode,
500 VEC (operand_entry_t, heap) **ops,
501 unsigned int currindex,
502 operand_entry_t curr)
504 tree negateop;
505 unsigned int i;
506 operand_entry_t oe;
508 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
509 return false;
511 negateop = get_unary_op (curr->op, NEGATE_EXPR);
512 if (negateop == NULL_TREE)
513 return false;
515 /* Any non-negated version will have a rank that is one less than
516 the current rank. So once we hit those ranks, if we don't find
517 one, we can stop. */
519 for (i = currindex + 1;
520 VEC_iterate (operand_entry_t, *ops, i, oe)
521 && oe->rank >= curr->rank - 1 ;
522 i++)
524 if (oe->op == negateop)
527 if (dump_file && (dump_flags & TDF_DETAILS))
529 fprintf (dump_file, "Equivalence: ");
530 print_generic_expr (dump_file, negateop, 0);
531 fprintf (dump_file, " + -");
532 print_generic_expr (dump_file, oe->op, 0);
533 fprintf (dump_file, " -> 0\n");
536 VEC_ordered_remove (operand_entry_t, *ops, i);
537 add_to_ops_vec (ops, fold_convert(TREE_TYPE (oe->op),
538 integer_zero_node));
539 VEC_ordered_remove (operand_entry_t, *ops, currindex);
540 reassociate_stats.ops_eliminated ++;
542 return true;
546 /* CURR->OP is a negate expr in a plus expr: save it for later
547 inspection in repropagate_negates(). */
548 VEC_safe_push (tree, heap, plus_negates, curr->op);
550 return false;
553 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
554 bitwise not expression, look in OPS for a corresponding operand to
555 cancel it out. If we find one, remove the other from OPS, replace
556 OPS[CURRINDEX] with 0, and return true. Otherwise, return
557 false. */
559 static bool
560 eliminate_not_pairs (enum tree_code opcode,
561 VEC (operand_entry_t, heap) **ops,
562 unsigned int currindex,
563 operand_entry_t curr)
565 tree notop;
566 unsigned int i;
567 operand_entry_t oe;
569 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
570 || TREE_CODE (curr->op) != SSA_NAME)
571 return false;
573 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
574 if (notop == NULL_TREE)
575 return false;
577 /* Any non-not version will have a rank that is one less than
578 the current rank. So once we hit those ranks, if we don't find
579 one, we can stop. */
581 for (i = currindex + 1;
582 VEC_iterate (operand_entry_t, *ops, i, oe)
583 && oe->rank >= curr->rank - 1;
584 i++)
586 if (oe->op == notop)
588 if (dump_file && (dump_flags & TDF_DETAILS))
590 fprintf (dump_file, "Equivalence: ");
591 print_generic_expr (dump_file, notop, 0);
592 if (opcode == BIT_AND_EXPR)
593 fprintf (dump_file, " & ~");
594 else if (opcode == BIT_IOR_EXPR)
595 fprintf (dump_file, " | ~");
596 print_generic_expr (dump_file, oe->op, 0);
597 if (opcode == BIT_AND_EXPR)
598 fprintf (dump_file, " -> 0\n");
599 else if (opcode == BIT_IOR_EXPR)
600 fprintf (dump_file, " -> -1\n");
603 if (opcode == BIT_AND_EXPR)
604 oe->op = fold_convert (TREE_TYPE (oe->op), integer_zero_node);
605 else if (opcode == BIT_IOR_EXPR)
606 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
607 TYPE_PRECISION (TREE_TYPE (oe->op)));
609 reassociate_stats.ops_eliminated
610 += VEC_length (operand_entry_t, *ops) - 1;
611 VEC_free (operand_entry_t, heap, *ops);
612 *ops = NULL;
613 VEC_safe_push (operand_entry_t, heap, *ops, oe);
614 return true;
618 return false;
621 /* Use constant value that may be present in OPS to try to eliminate
622 operands. Note that this function is only really used when we've
623 eliminated ops for other reasons, or merged constants. Across
624 single statements, fold already does all of this, plus more. There
625 is little point in duplicating logic, so I've only included the
626 identities that I could ever construct testcases to trigger. */
628 static void
629 eliminate_using_constants (enum tree_code opcode,
630 VEC(operand_entry_t, heap) **ops)
632 operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
633 tree type = TREE_TYPE (oelast->op);
635 if (oelast->rank == 0
636 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
638 switch (opcode)
640 case BIT_AND_EXPR:
641 if (integer_zerop (oelast->op))
643 if (VEC_length (operand_entry_t, *ops) != 1)
645 if (dump_file && (dump_flags & TDF_DETAILS))
646 fprintf (dump_file, "Found & 0, removing all other ops\n");
648 reassociate_stats.ops_eliminated
649 += VEC_length (operand_entry_t, *ops) - 1;
651 VEC_free (operand_entry_t, heap, *ops);
652 *ops = NULL;
653 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
654 return;
657 else if (integer_all_onesp (oelast->op))
659 if (VEC_length (operand_entry_t, *ops) != 1)
661 if (dump_file && (dump_flags & TDF_DETAILS))
662 fprintf (dump_file, "Found & -1, removing\n");
663 VEC_pop (operand_entry_t, *ops);
664 reassociate_stats.ops_eliminated++;
667 break;
668 case BIT_IOR_EXPR:
669 if (integer_all_onesp (oelast->op))
671 if (VEC_length (operand_entry_t, *ops) != 1)
673 if (dump_file && (dump_flags & TDF_DETAILS))
674 fprintf (dump_file, "Found | -1, removing all other ops\n");
676 reassociate_stats.ops_eliminated
677 += VEC_length (operand_entry_t, *ops) - 1;
679 VEC_free (operand_entry_t, heap, *ops);
680 *ops = NULL;
681 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
682 return;
685 else if (integer_zerop (oelast->op))
687 if (VEC_length (operand_entry_t, *ops) != 1)
689 if (dump_file && (dump_flags & TDF_DETAILS))
690 fprintf (dump_file, "Found | 0, removing\n");
691 VEC_pop (operand_entry_t, *ops);
692 reassociate_stats.ops_eliminated++;
695 break;
696 case MULT_EXPR:
697 if (integer_zerop (oelast->op)
698 || (FLOAT_TYPE_P (type)
699 && !HONOR_NANS (TYPE_MODE (type))
700 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
701 && real_zerop (oelast->op)))
703 if (VEC_length (operand_entry_t, *ops) != 1)
705 if (dump_file && (dump_flags & TDF_DETAILS))
706 fprintf (dump_file, "Found * 0, removing all other ops\n");
708 reassociate_stats.ops_eliminated
709 += VEC_length (operand_entry_t, *ops) - 1;
710 VEC_free (operand_entry_t, heap, *ops);
711 *ops = NULL;
712 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
713 return;
716 else if (integer_onep (oelast->op)
717 || (FLOAT_TYPE_P (type)
718 && !HONOR_SNANS (TYPE_MODE (type))
719 && real_onep (oelast->op)))
721 if (VEC_length (operand_entry_t, *ops) != 1)
723 if (dump_file && (dump_flags & TDF_DETAILS))
724 fprintf (dump_file, "Found * 1, removing\n");
725 VEC_pop (operand_entry_t, *ops);
726 reassociate_stats.ops_eliminated++;
727 return;
730 break;
731 case BIT_XOR_EXPR:
732 case PLUS_EXPR:
733 case MINUS_EXPR:
734 if (integer_zerop (oelast->op)
735 || (FLOAT_TYPE_P (type)
736 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
737 && fold_real_zero_addition_p (type, oelast->op,
738 opcode == MINUS_EXPR)))
740 if (VEC_length (operand_entry_t, *ops) != 1)
742 if (dump_file && (dump_flags & TDF_DETAILS))
743 fprintf (dump_file, "Found [|^+] 0, removing\n");
744 VEC_pop (operand_entry_t, *ops);
745 reassociate_stats.ops_eliminated++;
746 return;
749 break;
750 default:
751 break;
757 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
758 bool, bool);
760 /* Structure for tracking and counting operands. */
761 typedef struct oecount_s {
762 int cnt;
763 int id;
764 enum tree_code oecode;
765 tree op;
766 } oecount;
768 DEF_VEC_O(oecount);
769 DEF_VEC_ALLOC_O(oecount,heap);
771 /* The heap for the oecount hashtable and the sorted list of operands. */
772 static VEC (oecount, heap) *cvec;
774 /* Hash function for oecount. */
776 static hashval_t
777 oecount_hash (const void *p)
779 const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
780 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
783 /* Comparison function for oecount. */
785 static int
786 oecount_eq (const void *p1, const void *p2)
788 const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
789 const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
790 return (c1->oecode == c2->oecode
791 && c1->op == c2->op);
794 /* Comparison function for qsort sorting oecount elements by count. */
796 static int
797 oecount_cmp (const void *p1, const void *p2)
799 const oecount *c1 = (const oecount *)p1;
800 const oecount *c2 = (const oecount *)p2;
801 if (c1->cnt != c2->cnt)
802 return c1->cnt - c2->cnt;
803 else
804 /* If counts are identical, use unique IDs to stabilize qsort. */
805 return c1->id - c2->id;
808 /* Walks the linear chain with result *DEF searching for an operation
809 with operand OP and code OPCODE removing that from the chain. *DEF
810 is updated if there is only one operand but no operation left. */
812 static void
813 zero_one_operation (tree *def, enum tree_code opcode, tree op)
815 gimple stmt = SSA_NAME_DEF_STMT (*def);
819 tree name = gimple_assign_rhs1 (stmt);
821 /* If this is the operation we look for and one of the operands
822 is ours simply propagate the other operand into the stmts
823 single use. */
824 if (gimple_assign_rhs_code (stmt) == opcode
825 && (name == op
826 || gimple_assign_rhs2 (stmt) == op))
828 gimple use_stmt;
829 use_operand_p use;
830 gimple_stmt_iterator gsi;
831 if (name == op)
832 name = gimple_assign_rhs2 (stmt);
833 gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
834 single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
835 if (gimple_assign_lhs (stmt) == *def)
836 *def = name;
837 SET_USE (use, name);
838 if (TREE_CODE (name) != SSA_NAME)
839 update_stmt (use_stmt);
840 gsi = gsi_for_stmt (stmt);
841 gsi_remove (&gsi, true);
842 release_defs (stmt);
843 return;
846 /* Continue walking the chain. */
847 gcc_assert (name != op
848 && TREE_CODE (name) == SSA_NAME);
849 stmt = SSA_NAME_DEF_STMT (name);
851 while (1);
854 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
855 the result. Places the statement after the definition of either
856 OP1 or OP2. Returns the new statement. */
858 static gimple
859 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
861 gimple op1def = NULL, op2def = NULL;
862 gimple_stmt_iterator gsi;
863 tree op;
864 gimple sum;
866 /* Create the addition statement. */
867 sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
868 op = make_ssa_name (tmpvar, sum);
869 gimple_assign_set_lhs (sum, op);
871 /* Find an insertion place and insert. */
872 if (TREE_CODE (op1) == SSA_NAME)
873 op1def = SSA_NAME_DEF_STMT (op1);
874 if (TREE_CODE (op2) == SSA_NAME)
875 op2def = SSA_NAME_DEF_STMT (op2);
876 if ((!op1def || gimple_nop_p (op1def))
877 && (!op2def || gimple_nop_p (op2def)))
879 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
880 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
882 else if ((!op1def || gimple_nop_p (op1def))
883 || (op2def && !gimple_nop_p (op2def)
884 && stmt_dominates_stmt_p (op1def, op2def)))
886 if (gimple_code (op2def) == GIMPLE_PHI)
888 gsi = gsi_after_labels (gimple_bb (op2def));
889 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
891 else
893 if (!stmt_ends_bb_p (op2def))
895 gsi = gsi_for_stmt (op2def);
896 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
898 else
900 edge e;
901 edge_iterator ei;
903 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
904 if (e->flags & EDGE_FALLTHRU)
905 gsi_insert_on_edge_immediate (e, sum);
909 else
911 if (gimple_code (op1def) == GIMPLE_PHI)
913 gsi = gsi_after_labels (gimple_bb (op1def));
914 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
916 else
918 if (!stmt_ends_bb_p (op1def))
920 gsi = gsi_for_stmt (op1def);
921 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
923 else
925 edge e;
926 edge_iterator ei;
928 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
929 if (e->flags & EDGE_FALLTHRU)
930 gsi_insert_on_edge_immediate (e, sum);
934 update_stmt (sum);
936 return sum;
939 /* Perform un-distribution of divisions and multiplications.
940 A * X + B * X is transformed into (A + B) * X and A / X + B / X
941 to (A + B) / X for real X.
943 The algorithm is organized as follows.
945 - First we walk the addition chain *OPS looking for summands that
946 are defined by a multiplication or a real division. This results
947 in the candidates bitmap with relevant indices into *OPS.
949 - Second we build the chains of multiplications or divisions for
950 these candidates, counting the number of occurences of (operand, code)
951 pairs in all of the candidates chains.
953 - Third we sort the (operand, code) pairs by number of occurence and
954 process them starting with the pair with the most uses.
956 * For each such pair we walk the candidates again to build a
957 second candidate bitmap noting all multiplication/division chains
958 that have at least one occurence of (operand, code).
960 * We build an alternate addition chain only covering these
961 candidates with one (operand, code) operation removed from their
962 multiplication/division chain.
964 * The first candidate gets replaced by the alternate addition chain
965 multiplied/divided by the operand.
967 * All candidate chains get disabled for further processing and
968 processing of (operand, code) pairs continues.
970 The alternate addition chains built are re-processed by the main
971 reassociation algorithm which allows optimizing a * x * y + b * y * x
972 to (a + b ) * x * y in one invocation of the reassociation pass. */
974 static bool
975 undistribute_ops_list (enum tree_code opcode,
976 VEC (operand_entry_t, heap) **ops, struct loop *loop)
978 unsigned int length = VEC_length (operand_entry_t, *ops);
979 operand_entry_t oe1;
980 unsigned i, j;
981 sbitmap candidates, candidates2;
982 unsigned nr_candidates, nr_candidates2;
983 sbitmap_iterator sbi0;
984 VEC (operand_entry_t, heap) **subops;
985 htab_t ctable;
986 bool changed = false;
987 int next_oecount_id = 0;
989 if (length <= 1
990 || opcode != PLUS_EXPR)
991 return false;
993 /* Build a list of candidates to process. */
994 candidates = sbitmap_alloc (length);
995 sbitmap_zero (candidates);
996 nr_candidates = 0;
997 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe1); ++i)
999 enum tree_code dcode;
1000 gimple oe1def;
1002 if (TREE_CODE (oe1->op) != SSA_NAME)
1003 continue;
1004 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1005 if (!is_gimple_assign (oe1def))
1006 continue;
1007 dcode = gimple_assign_rhs_code (oe1def);
1008 if ((dcode != MULT_EXPR
1009 && dcode != RDIV_EXPR)
1010 || !is_reassociable_op (oe1def, dcode, loop))
1011 continue;
1013 SET_BIT (candidates, i);
1014 nr_candidates++;
1017 if (nr_candidates < 2)
1019 sbitmap_free (candidates);
1020 return false;
1023 if (dump_file && (dump_flags & TDF_DETAILS))
1025 fprintf (dump_file, "searching for un-distribute opportunities ");
1026 print_generic_expr (dump_file,
1027 VEC_index (operand_entry_t, *ops,
1028 sbitmap_first_set_bit (candidates))->op, 0);
1029 fprintf (dump_file, " %d\n", nr_candidates);
1032 /* Build linearized sub-operand lists and the counting table. */
1033 cvec = NULL;
1034 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1035 subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1036 VEC_length (operand_entry_t, *ops));
1037 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1039 gimple oedef;
1040 enum tree_code oecode;
1041 unsigned j;
1043 oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1044 oecode = gimple_assign_rhs_code (oedef);
1045 linearize_expr_tree (&subops[i], oedef,
1046 associative_tree_code (oecode), false);
1048 for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
1050 oecount c;
1051 void **slot;
1052 size_t idx;
1053 c.oecode = oecode;
1054 c.cnt = 1;
1055 c.id = next_oecount_id++;
1056 c.op = oe1->op;
1057 VEC_safe_push (oecount, heap, cvec, &c);
1058 idx = VEC_length (oecount, cvec) + 41;
1059 slot = htab_find_slot (ctable, (void *)idx, INSERT);
1060 if (!*slot)
1062 *slot = (void *)idx;
1064 else
1066 VEC_pop (oecount, cvec);
1067 VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1071 htab_delete (ctable);
1073 /* Sort the counting table. */
1074 qsort (VEC_address (oecount, cvec), VEC_length (oecount, cvec),
1075 sizeof (oecount), oecount_cmp);
1077 if (dump_file && (dump_flags & TDF_DETAILS))
1079 oecount *c;
1080 fprintf (dump_file, "Candidates:\n");
1081 for (j = 0; VEC_iterate (oecount, cvec, j, c); ++j)
1083 fprintf (dump_file, " %u %s: ", c->cnt,
1084 c->oecode == MULT_EXPR
1085 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1086 print_generic_expr (dump_file, c->op, 0);
1087 fprintf (dump_file, "\n");
1091 /* Process the (operand, code) pairs in order of most occurence. */
1092 candidates2 = sbitmap_alloc (length);
1093 while (!VEC_empty (oecount, cvec))
1095 oecount *c = VEC_last (oecount, cvec);
1096 if (c->cnt < 2)
1097 break;
1099 /* Now collect the operands in the outer chain that contain
1100 the common operand in their inner chain. */
1101 sbitmap_zero (candidates2);
1102 nr_candidates2 = 0;
1103 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1105 gimple oedef;
1106 enum tree_code oecode;
1107 unsigned j;
1108 tree op = VEC_index (operand_entry_t, *ops, i)->op;
1110 /* If we undistributed in this chain already this may be
1111 a constant. */
1112 if (TREE_CODE (op) != SSA_NAME)
1113 continue;
1115 oedef = SSA_NAME_DEF_STMT (op);
1116 oecode = gimple_assign_rhs_code (oedef);
1117 if (oecode != c->oecode)
1118 continue;
1120 for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
1122 if (oe1->op == c->op)
1124 SET_BIT (candidates2, i);
1125 ++nr_candidates2;
1126 break;
1131 if (nr_candidates2 >= 2)
1133 operand_entry_t oe1, oe2;
1134 tree tmpvar;
1135 gimple prod;
1136 int first = sbitmap_first_set_bit (candidates2);
1138 /* Build the new addition chain. */
1139 oe1 = VEC_index (operand_entry_t, *ops, first);
1140 if (dump_file && (dump_flags & TDF_DETAILS))
1142 fprintf (dump_file, "Building (");
1143 print_generic_expr (dump_file, oe1->op, 0);
1145 tmpvar = create_tmp_var (TREE_TYPE (oe1->op), NULL);
1146 add_referenced_var (tmpvar);
1147 zero_one_operation (&oe1->op, c->oecode, c->op);
1148 EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1150 gimple sum;
1151 oe2 = VEC_index (operand_entry_t, *ops, i);
1152 if (dump_file && (dump_flags & TDF_DETAILS))
1154 fprintf (dump_file, " + ");
1155 print_generic_expr (dump_file, oe2->op, 0);
1157 zero_one_operation (&oe2->op, c->oecode, c->op);
1158 sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1159 oe2->op = fold_convert (TREE_TYPE (oe2->op), integer_zero_node);
1160 oe2->rank = 0;
1161 oe1->op = gimple_get_lhs (sum);
1164 /* Apply the multiplication/division. */
1165 prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1166 if (dump_file && (dump_flags & TDF_DETAILS))
1168 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1169 print_generic_expr (dump_file, c->op, 0);
1170 fprintf (dump_file, "\n");
1173 /* Record it in the addition chain and disable further
1174 undistribution with this op. */
1175 oe1->op = gimple_assign_lhs (prod);
1176 oe1->rank = get_rank (oe1->op);
1177 VEC_free (operand_entry_t, heap, subops[first]);
1179 changed = true;
1182 VEC_pop (oecount, cvec);
1185 for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1186 VEC_free (operand_entry_t, heap, subops[i]);
1187 free (subops);
1188 VEC_free (oecount, heap, cvec);
1189 sbitmap_free (candidates);
1190 sbitmap_free (candidates2);
1192 return changed;
1196 /* Perform various identities and other optimizations on the list of
1197 operand entries, stored in OPS. The tree code for the binary
1198 operation between all the operands is OPCODE. */
1200 static void
1201 optimize_ops_list (enum tree_code opcode,
1202 VEC (operand_entry_t, heap) **ops)
1204 unsigned int length = VEC_length (operand_entry_t, *ops);
1205 unsigned int i;
1206 operand_entry_t oe;
1207 operand_entry_t oelast = NULL;
1208 bool iterate = false;
1210 if (length == 1)
1211 return;
1213 oelast = VEC_last (operand_entry_t, *ops);
1215 /* If the last two are constants, pop the constants off, merge them
1216 and try the next two. */
1217 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1219 operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1221 if (oelm1->rank == 0
1222 && is_gimple_min_invariant (oelm1->op)
1223 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1224 TREE_TYPE (oelast->op)))
1226 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1227 oelm1->op, oelast->op);
1229 if (folded && is_gimple_min_invariant (folded))
1231 if (dump_file && (dump_flags & TDF_DETAILS))
1232 fprintf (dump_file, "Merging constants\n");
1234 VEC_pop (operand_entry_t, *ops);
1235 VEC_pop (operand_entry_t, *ops);
1237 add_to_ops_vec (ops, folded);
1238 reassociate_stats.constants_eliminated++;
1240 optimize_ops_list (opcode, ops);
1241 return;
1246 eliminate_using_constants (opcode, ops);
1247 oelast = NULL;
1249 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1251 bool done = false;
1253 if (eliminate_not_pairs (opcode, ops, i, oe))
1254 return;
1255 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1256 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)))
1258 if (done)
1259 return;
1260 iterate = true;
1261 oelast = NULL;
1262 continue;
1264 oelast = oe;
1265 i++;
1268 length = VEC_length (operand_entry_t, *ops);
1269 oelast = VEC_last (operand_entry_t, *ops);
1271 if (iterate)
1272 optimize_ops_list (opcode, ops);
1275 /* Return true if OPERAND is defined by a PHI node which uses the LHS
1276 of STMT in it's operands. This is also known as a "destructive
1277 update" operation. */
1279 static bool
1280 is_phi_for_stmt (gimple stmt, tree operand)
1282 gimple def_stmt;
1283 tree lhs;
1284 use_operand_p arg_p;
1285 ssa_op_iter i;
1287 if (TREE_CODE (operand) != SSA_NAME)
1288 return false;
1290 lhs = gimple_assign_lhs (stmt);
1292 def_stmt = SSA_NAME_DEF_STMT (operand);
1293 if (gimple_code (def_stmt) != GIMPLE_PHI)
1294 return false;
1296 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
1297 if (lhs == USE_FROM_PTR (arg_p))
1298 return true;
1299 return false;
1302 /* Remove def stmt of VAR if VAR has zero uses and recurse
1303 on rhs1 operand if so. */
1305 static void
1306 remove_visited_stmt_chain (tree var)
1308 gimple stmt;
1309 gimple_stmt_iterator gsi;
1311 while (1)
1313 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
1314 return;
1315 stmt = SSA_NAME_DEF_STMT (var);
1316 if (!is_gimple_assign (stmt)
1317 || !gimple_visited_p (stmt))
1318 return;
1319 var = gimple_assign_rhs1 (stmt);
1320 gsi = gsi_for_stmt (stmt);
1321 gsi_remove (&gsi, true);
1322 release_defs (stmt);
1326 /* Recursively rewrite our linearized statements so that the operators
1327 match those in OPS[OPINDEX], putting the computation in rank
1328 order. */
1330 static void
1331 rewrite_expr_tree (gimple stmt, unsigned int opindex,
1332 VEC(operand_entry_t, heap) * ops, bool moved)
1334 tree rhs1 = gimple_assign_rhs1 (stmt);
1335 tree rhs2 = gimple_assign_rhs2 (stmt);
1336 operand_entry_t oe;
1338 /* If we have three operands left, then we want to make sure the one
1339 that gets the double binary op are the ones with the same rank.
1341 The alternative we try is to see if this is a destructive
1342 update style statement, which is like:
1343 b = phi (a, ...)
1344 a = c + b;
1345 In that case, we want to use the destructive update form to
1346 expose the possible vectorizer sum reduction opportunity.
1347 In that case, the third operand will be the phi node.
1349 We could, of course, try to be better as noted above, and do a
1350 lot of work to try to find these opportunities in >3 operand
1351 cases, but it is unlikely to be worth it. */
1352 if (opindex + 3 == VEC_length (operand_entry_t, ops))
1354 operand_entry_t oe1, oe2, oe3;
1356 oe1 = VEC_index (operand_entry_t, ops, opindex);
1357 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
1358 oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
1360 if ((oe1->rank == oe2->rank
1361 && oe2->rank != oe3->rank)
1362 || (is_phi_for_stmt (stmt, oe3->op)
1363 && !is_phi_for_stmt (stmt, oe1->op)
1364 && !is_phi_for_stmt (stmt, oe2->op)))
1366 struct operand_entry temp = *oe3;
1367 oe3->op = oe1->op;
1368 oe3->rank = oe1->rank;
1369 oe1->op = temp.op;
1370 oe1->rank= temp.rank;
1372 else if ((oe1->rank == oe3->rank
1373 && oe2->rank != oe3->rank)
1374 || (is_phi_for_stmt (stmt, oe2->op)
1375 && !is_phi_for_stmt (stmt, oe1->op)
1376 && !is_phi_for_stmt (stmt, oe3->op)))
1378 struct operand_entry temp = *oe2;
1379 oe2->op = oe1->op;
1380 oe2->rank = oe1->rank;
1381 oe1->op = temp.op;
1382 oe1->rank= temp.rank;
1386 /* The final recursion case for this function is that you have
1387 exactly two operations left.
1388 If we had one exactly one op in the entire list to start with, we
1389 would have never called this function, and the tail recursion
1390 rewrites them one at a time. */
1391 if (opindex + 2 == VEC_length (operand_entry_t, ops))
1393 operand_entry_t oe1, oe2;
1395 oe1 = VEC_index (operand_entry_t, ops, opindex);
1396 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
1398 if (rhs1 != oe1->op || rhs2 != oe2->op)
1400 if (dump_file && (dump_flags & TDF_DETAILS))
1402 fprintf (dump_file, "Transforming ");
1403 print_gimple_stmt (dump_file, stmt, 0, 0);
1406 gimple_assign_set_rhs1 (stmt, oe1->op);
1407 gimple_assign_set_rhs2 (stmt, oe2->op);
1408 update_stmt (stmt);
1409 if (rhs1 != oe1->op && rhs1 != oe2->op)
1410 remove_visited_stmt_chain (rhs1);
1412 if (dump_file && (dump_flags & TDF_DETAILS))
1414 fprintf (dump_file, " into ");
1415 print_gimple_stmt (dump_file, stmt, 0, 0);
1419 return;
1422 /* If we hit here, we should have 3 or more ops left. */
1423 gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
1425 /* Rewrite the next operator. */
1426 oe = VEC_index (operand_entry_t, ops, opindex);
1428 if (oe->op != rhs2)
1430 if (!moved)
1432 gimple_stmt_iterator gsinow, gsirhs1;
1433 gimple stmt1 = stmt, stmt2;
1434 unsigned int count;
1436 gsinow = gsi_for_stmt (stmt);
1437 count = VEC_length (operand_entry_t, ops) - opindex - 2;
1438 while (count-- != 0)
1440 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
1441 gsirhs1 = gsi_for_stmt (stmt2);
1442 gsi_move_before (&gsirhs1, &gsinow);
1443 gsi_prev (&gsinow);
1444 stmt1 = stmt2;
1446 moved = true;
1449 if (dump_file && (dump_flags & TDF_DETAILS))
1451 fprintf (dump_file, "Transforming ");
1452 print_gimple_stmt (dump_file, stmt, 0, 0);
1455 gimple_assign_set_rhs2 (stmt, oe->op);
1456 update_stmt (stmt);
1458 if (dump_file && (dump_flags & TDF_DETAILS))
1460 fprintf (dump_file, " into ");
1461 print_gimple_stmt (dump_file, stmt, 0, 0);
1464 /* Recurse on the LHS of the binary operator, which is guaranteed to
1465 be the non-leaf side. */
1466 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
1469 /* Transform STMT, which is really (A +B) + (C + D) into the left
1470 linear form, ((A+B)+C)+D.
1471 Recurse on D if necessary. */
1473 static void
1474 linearize_expr (gimple stmt)
1476 gimple_stmt_iterator gsinow, gsirhs;
1477 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
1478 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1479 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
1480 gimple newbinrhs = NULL;
1481 struct loop *loop = loop_containing_stmt (stmt);
1483 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
1484 && is_reassociable_op (binrhs, rhscode, loop));
1486 gsinow = gsi_for_stmt (stmt);
1487 gsirhs = gsi_for_stmt (binrhs);
1488 gsi_move_before (&gsirhs, &gsinow);
1490 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
1491 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
1492 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
1494 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1495 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1497 if (dump_file && (dump_flags & TDF_DETAILS))
1499 fprintf (dump_file, "Linearized: ");
1500 print_gimple_stmt (dump_file, stmt, 0, 0);
1503 reassociate_stats.linearized++;
1504 update_stmt (binrhs);
1505 update_stmt (binlhs);
1506 update_stmt (stmt);
1508 gimple_set_visited (stmt, true);
1509 gimple_set_visited (binlhs, true);
1510 gimple_set_visited (binrhs, true);
1512 /* Tail recurse on the new rhs if it still needs reassociation. */
1513 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
1514 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
1515 want to change the algorithm while converting to tuples. */
1516 linearize_expr (stmt);
1519 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
1520 it. Otherwise, return NULL. */
1522 static gimple
1523 get_single_immediate_use (tree lhs)
1525 use_operand_p immuse;
1526 gimple immusestmt;
1528 if (TREE_CODE (lhs) == SSA_NAME
1529 && single_imm_use (lhs, &immuse, &immusestmt)
1530 && is_gimple_assign (immusestmt))
1531 return immusestmt;
1533 return NULL;
1536 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
1537 representing the negated value. Insertions of any necessary
1538 instructions go before GSI.
1539 This function is recursive in that, if you hand it "a_5" as the
1540 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
1541 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
1543 static tree
1544 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
1546 gimple negatedefstmt= NULL;
1547 tree resultofnegate;
1549 /* If we are trying to negate a name, defined by an add, negate the
1550 add operands instead. */
1551 if (TREE_CODE (tonegate) == SSA_NAME)
1552 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
1553 if (TREE_CODE (tonegate) == SSA_NAME
1554 && is_gimple_assign (negatedefstmt)
1555 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
1556 && has_single_use (gimple_assign_lhs (negatedefstmt))
1557 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
1559 gimple_stmt_iterator gsi;
1560 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
1561 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
1563 gsi = gsi_for_stmt (negatedefstmt);
1564 rhs1 = negate_value (rhs1, &gsi);
1565 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
1567 gsi = gsi_for_stmt (negatedefstmt);
1568 rhs2 = negate_value (rhs2, &gsi);
1569 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
1571 update_stmt (negatedefstmt);
1572 return gimple_assign_lhs (negatedefstmt);
1575 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
1576 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
1577 NULL_TREE, true, GSI_SAME_STMT);
1578 return resultofnegate;
1581 /* Return true if we should break up the subtract in STMT into an add
1582 with negate. This is true when we the subtract operands are really
1583 adds, or the subtract itself is used in an add expression. In
1584 either case, breaking up the subtract into an add with negate
1585 exposes the adds to reassociation. */
1587 static bool
1588 should_break_up_subtract (gimple stmt)
1590 tree lhs = gimple_assign_lhs (stmt);
1591 tree binlhs = gimple_assign_rhs1 (stmt);
1592 tree binrhs = gimple_assign_rhs2 (stmt);
1593 gimple immusestmt;
1594 struct loop *loop = loop_containing_stmt (stmt);
1596 if (TREE_CODE (binlhs) == SSA_NAME
1597 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
1598 return true;
1600 if (TREE_CODE (binrhs) == SSA_NAME
1601 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
1602 return true;
1604 if (TREE_CODE (lhs) == SSA_NAME
1605 && (immusestmt = get_single_immediate_use (lhs))
1606 && is_gimple_assign (immusestmt)
1607 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
1608 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
1609 return true;
1610 return false;
1613 /* Transform STMT from A - B into A + -B. */
1615 static void
1616 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
1618 tree rhs1 = gimple_assign_rhs1 (stmt);
1619 tree rhs2 = gimple_assign_rhs2 (stmt);
1621 if (dump_file && (dump_flags & TDF_DETAILS))
1623 fprintf (dump_file, "Breaking up subtract ");
1624 print_gimple_stmt (dump_file, stmt, 0, 0);
1627 rhs2 = negate_value (rhs2, gsip);
1628 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
1629 update_stmt (stmt);
1632 /* Recursively linearize a binary expression that is the RHS of STMT.
1633 Place the operands of the expression tree in the vector named OPS. */
1635 static void
1636 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
1637 bool is_associative, bool set_visited)
1639 tree binlhs = gimple_assign_rhs1 (stmt);
1640 tree binrhs = gimple_assign_rhs2 (stmt);
1641 gimple binlhsdef, binrhsdef;
1642 bool binlhsisreassoc = false;
1643 bool binrhsisreassoc = false;
1644 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
1645 struct loop *loop = loop_containing_stmt (stmt);
1647 if (set_visited)
1648 gimple_set_visited (stmt, true);
1650 if (TREE_CODE (binlhs) == SSA_NAME)
1652 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
1653 binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode, loop);
1656 if (TREE_CODE (binrhs) == SSA_NAME)
1658 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
1659 binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode, loop);
1662 /* If the LHS is not reassociable, but the RHS is, we need to swap
1663 them. If neither is reassociable, there is nothing we can do, so
1664 just put them in the ops vector. If the LHS is reassociable,
1665 linearize it. If both are reassociable, then linearize the RHS
1666 and the LHS. */
1668 if (!binlhsisreassoc)
1670 tree temp;
1672 /* If this is not a associative operation like division, give up. */
1673 if (!is_associative)
1675 add_to_ops_vec (ops, binrhs);
1676 return;
1679 if (!binrhsisreassoc)
1681 add_to_ops_vec (ops, binrhs);
1682 add_to_ops_vec (ops, binlhs);
1683 return;
1686 if (dump_file && (dump_flags & TDF_DETAILS))
1688 fprintf (dump_file, "swapping operands of ");
1689 print_gimple_stmt (dump_file, stmt, 0, 0);
1692 swap_tree_operands (stmt,
1693 gimple_assign_rhs1_ptr (stmt),
1694 gimple_assign_rhs2_ptr (stmt));
1695 update_stmt (stmt);
1697 if (dump_file && (dump_flags & TDF_DETAILS))
1699 fprintf (dump_file, " is now ");
1700 print_gimple_stmt (dump_file, stmt, 0, 0);
1703 /* We want to make it so the lhs is always the reassociative op,
1704 so swap. */
1705 temp = binlhs;
1706 binlhs = binrhs;
1707 binrhs = temp;
1709 else if (binrhsisreassoc)
1711 linearize_expr (stmt);
1712 binlhs = gimple_assign_rhs1 (stmt);
1713 binrhs = gimple_assign_rhs2 (stmt);
1716 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
1717 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
1718 rhscode, loop));
1719 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
1720 is_associative, set_visited);
1721 add_to_ops_vec (ops, binrhs);
1724 /* Repropagate the negates back into subtracts, since no other pass
1725 currently does it. */
1727 static void
1728 repropagate_negates (void)
1730 unsigned int i = 0;
1731 tree negate;
1733 for (i = 0; VEC_iterate (tree, plus_negates, i, negate); i++)
1735 gimple user = get_single_immediate_use (negate);
1737 if (!user || !is_gimple_assign (user))
1738 continue;
1740 /* The negate operand can be either operand of a PLUS_EXPR
1741 (it can be the LHS if the RHS is a constant for example).
1743 Force the negate operand to the RHS of the PLUS_EXPR, then
1744 transform the PLUS_EXPR into a MINUS_EXPR. */
1745 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
1747 /* If the negated operand appears on the LHS of the
1748 PLUS_EXPR, exchange the operands of the PLUS_EXPR
1749 to force the negated operand to the RHS of the PLUS_EXPR. */
1750 if (gimple_assign_rhs1 (user) == negate)
1752 swap_tree_operands (user,
1753 gimple_assign_rhs1_ptr (user),
1754 gimple_assign_rhs2_ptr (user));
1757 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
1758 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
1759 if (gimple_assign_rhs2 (user) == negate)
1761 tree rhs1 = gimple_assign_rhs1 (user);
1762 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
1763 gimple_stmt_iterator gsi = gsi_for_stmt (user);
1764 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
1765 update_stmt (user);
1768 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
1770 if (gimple_assign_rhs1 (user) == negate)
1772 /* We have
1773 x = -a
1774 y = x - b
1775 which we transform into
1776 x = a + b
1777 y = -x .
1778 This pushes down the negate which we possibly can merge
1779 into some other operation, hence insert it into the
1780 plus_negates vector. */
1781 gimple feed = SSA_NAME_DEF_STMT (negate);
1782 tree a = gimple_assign_rhs1 (feed);
1783 tree rhs2 = gimple_assign_rhs2 (user);
1784 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
1785 gimple_replace_lhs (feed, negate);
1786 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
1787 update_stmt (gsi_stmt (gsi));
1788 gsi2 = gsi_for_stmt (user);
1789 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
1790 update_stmt (gsi_stmt (gsi2));
1791 gsi_move_before (&gsi, &gsi2);
1792 VEC_safe_push (tree, heap, plus_negates,
1793 gimple_assign_lhs (gsi_stmt (gsi2)));
1795 else
1797 /* Transform "x = -a; y = b - x" into "y = b + a", getting
1798 rid of one operation. */
1799 gimple feed = SSA_NAME_DEF_STMT (negate);
1800 tree a = gimple_assign_rhs1 (feed);
1801 tree rhs1 = gimple_assign_rhs1 (user);
1802 gimple_stmt_iterator gsi = gsi_for_stmt (user);
1803 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
1804 update_stmt (gsi_stmt (gsi));
1810 /* Returns true if OP is of a type for which we can do reassociation.
1811 That is for integral or non-saturating fixed-point types, and for
1812 floating point type when associative-math is enabled. */
1814 static bool
1815 can_reassociate_p (tree op)
1817 tree type = TREE_TYPE (op);
1818 if (INTEGRAL_TYPE_P (type)
1819 || NON_SAT_FIXED_POINT_TYPE_P (type)
1820 || (flag_associative_math && SCALAR_FLOAT_TYPE_P (type)))
1821 return true;
1822 return false;
1825 /* Break up subtract operations in block BB.
1827 We do this top down because we don't know whether the subtract is
1828 part of a possible chain of reassociation except at the top.
1830 IE given
1831 d = f + g
1832 c = a + e
1833 b = c - d
1834 q = b - r
1835 k = t - q
1837 we want to break up k = t - q, but we won't until we've transformed q
1838 = b - r, which won't be broken up until we transform b = c - d.
1840 En passant, clear the GIMPLE visited flag on every statement. */
1842 static void
1843 break_up_subtract_bb (basic_block bb)
1845 gimple_stmt_iterator gsi;
1846 basic_block son;
1848 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1850 gimple stmt = gsi_stmt (gsi);
1851 gimple_set_visited (stmt, false);
1853 if (!is_gimple_assign (stmt)
1854 || !can_reassociate_p (gimple_assign_lhs (stmt)))
1855 continue;
1857 /* Look for simple gimple subtract operations. */
1858 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
1860 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
1861 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
1862 continue;
1864 /* Check for a subtract used only in an addition. If this
1865 is the case, transform it into add of a negate for better
1866 reassociation. IE transform C = A-B into C = A + -B if C
1867 is only used in an addition. */
1868 if (should_break_up_subtract (stmt))
1869 break_up_subtract (stmt, &gsi);
1871 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
1872 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
1873 VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt));
1875 for (son = first_dom_son (CDI_DOMINATORS, bb);
1876 son;
1877 son = next_dom_son (CDI_DOMINATORS, son))
1878 break_up_subtract_bb (son);
1881 /* Reassociate expressions in basic block BB and its post-dominator as
1882 children. */
1884 static void
1885 reassociate_bb (basic_block bb)
1887 gimple_stmt_iterator gsi;
1888 basic_block son;
1890 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
1892 gimple stmt = gsi_stmt (gsi);
1894 if (is_gimple_assign (stmt))
1896 tree lhs, rhs1, rhs2;
1897 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
1899 /* If this is not a gimple binary expression, there is
1900 nothing for us to do with it. */
1901 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
1902 continue;
1904 /* If this was part of an already processed statement,
1905 we don't need to touch it again. */
1906 if (gimple_visited_p (stmt))
1908 /* This statement might have become dead because of previous
1909 reassociations. */
1910 if (has_zero_uses (gimple_get_lhs (stmt)))
1912 gsi_remove (&gsi, true);
1913 release_defs (stmt);
1914 /* We might end up removing the last stmt above which
1915 places the iterator to the end of the sequence.
1916 Reset it to the last stmt in this case which might
1917 be the end of the sequence as well if we removed
1918 the last statement of the sequence. In which case
1919 we need to bail out. */
1920 if (gsi_end_p (gsi))
1922 gsi = gsi_last_bb (bb);
1923 if (gsi_end_p (gsi))
1924 break;
1927 continue;
1930 lhs = gimple_assign_lhs (stmt);
1931 rhs1 = gimple_assign_rhs1 (stmt);
1932 rhs2 = gimple_assign_rhs2 (stmt);
1934 if (!can_reassociate_p (lhs)
1935 || !can_reassociate_p (rhs1)
1936 || !can_reassociate_p (rhs2))
1937 continue;
1939 if (associative_tree_code (rhs_code))
1941 VEC(operand_entry_t, heap) *ops = NULL;
1943 /* There may be no immediate uses left by the time we
1944 get here because we may have eliminated them all. */
1945 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
1946 continue;
1948 gimple_set_visited (stmt, true);
1949 linearize_expr_tree (&ops, stmt, true, true);
1950 qsort (VEC_address (operand_entry_t, ops),
1951 VEC_length (operand_entry_t, ops),
1952 sizeof (operand_entry_t),
1953 sort_by_operand_rank);
1954 optimize_ops_list (rhs_code, &ops);
1955 if (undistribute_ops_list (rhs_code, &ops,
1956 loop_containing_stmt (stmt)))
1958 qsort (VEC_address (operand_entry_t, ops),
1959 VEC_length (operand_entry_t, ops),
1960 sizeof (operand_entry_t),
1961 sort_by_operand_rank);
1962 optimize_ops_list (rhs_code, &ops);
1965 if (VEC_length (operand_entry_t, ops) == 1)
1967 if (dump_file && (dump_flags & TDF_DETAILS))
1969 fprintf (dump_file, "Transforming ");
1970 print_gimple_stmt (dump_file, stmt, 0, 0);
1973 rhs1 = gimple_assign_rhs1 (stmt);
1974 gimple_assign_set_rhs_from_tree (&gsi,
1975 VEC_last (operand_entry_t,
1976 ops)->op);
1977 update_stmt (stmt);
1978 remove_visited_stmt_chain (rhs1);
1980 if (dump_file && (dump_flags & TDF_DETAILS))
1982 fprintf (dump_file, " into ");
1983 print_gimple_stmt (dump_file, stmt, 0, 0);
1986 else
1987 rewrite_expr_tree (stmt, 0, ops, false);
1989 VEC_free (operand_entry_t, heap, ops);
1993 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
1994 son;
1995 son = next_dom_son (CDI_POST_DOMINATORS, son))
1996 reassociate_bb (son);
1999 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
2000 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
2002 /* Dump the operand entry vector OPS to FILE. */
2004 void
2005 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
2007 operand_entry_t oe;
2008 unsigned int i;
2010 for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
2012 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
2013 print_generic_expr (file, oe->op, 0);
2017 /* Dump the operand entry vector OPS to STDERR. */
2019 void
2020 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
2022 dump_ops_vector (stderr, ops);
2025 static void
2026 do_reassoc (void)
2028 break_up_subtract_bb (ENTRY_BLOCK_PTR);
2029 reassociate_bb (EXIT_BLOCK_PTR);
2032 /* Initialize the reassociation pass. */
2034 static void
2035 init_reassoc (void)
2037 int i;
2038 long rank = 2;
2039 tree param;
2040 int *bbs = XNEWVEC (int, last_basic_block + 1);
2042 /* Find the loops, so that we can prevent moving calculations in
2043 them. */
2044 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2046 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
2048 operand_entry_pool = create_alloc_pool ("operand entry pool",
2049 sizeof (struct operand_entry), 30);
2050 next_operand_entry_id = 0;
2052 /* Reverse RPO (Reverse Post Order) will give us something where
2053 deeper loops come later. */
2054 pre_and_rev_post_order_compute (NULL, bbs, false);
2055 bb_rank = XCNEWVEC (long, last_basic_block + 1);
2056 operand_rank = pointer_map_create ();
2058 /* Give each argument a distinct rank. */
2059 for (param = DECL_ARGUMENTS (current_function_decl);
2060 param;
2061 param = TREE_CHAIN (param))
2063 if (gimple_default_def (cfun, param) != NULL)
2065 tree def = gimple_default_def (cfun, param);
2066 insert_operand_rank (def, ++rank);
2070 /* Give the chain decl a distinct rank. */
2071 if (cfun->static_chain_decl != NULL)
2073 tree def = gimple_default_def (cfun, cfun->static_chain_decl);
2074 if (def != NULL)
2075 insert_operand_rank (def, ++rank);
2078 /* Set up rank for each BB */
2079 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
2080 bb_rank[bbs[i]] = ++rank << 16;
2082 free (bbs);
2083 calculate_dominance_info (CDI_POST_DOMINATORS);
2084 plus_negates = NULL;
2087 /* Cleanup after the reassociation pass, and print stats if
2088 requested. */
2090 static void
2091 fini_reassoc (void)
2093 statistics_counter_event (cfun, "Linearized",
2094 reassociate_stats.linearized);
2095 statistics_counter_event (cfun, "Constants eliminated",
2096 reassociate_stats.constants_eliminated);
2097 statistics_counter_event (cfun, "Ops eliminated",
2098 reassociate_stats.ops_eliminated);
2099 statistics_counter_event (cfun, "Statements rewritten",
2100 reassociate_stats.rewritten);
2102 pointer_map_destroy (operand_rank);
2103 free_alloc_pool (operand_entry_pool);
2104 free (bb_rank);
2105 VEC_free (tree, heap, plus_negates);
2106 free_dominance_info (CDI_POST_DOMINATORS);
2107 loop_optimizer_finalize ();
2110 /* Gate and execute functions for Reassociation. */
2112 static unsigned int
2113 execute_reassoc (void)
2115 init_reassoc ();
2117 do_reassoc ();
2118 repropagate_negates ();
2120 fini_reassoc ();
2121 return 0;
2124 static bool
2125 gate_tree_ssa_reassoc (void)
2127 return flag_tree_reassoc != 0;
2130 struct gimple_opt_pass pass_reassoc =
2133 GIMPLE_PASS,
2134 "reassoc", /* name */
2135 gate_tree_ssa_reassoc, /* gate */
2136 execute_reassoc, /* execute */
2137 NULL, /* sub */
2138 NULL, /* next */
2139 0, /* static_pass_number */
2140 TV_TREE_REASSOC, /* tv_id */
2141 PROP_cfg | PROP_ssa, /* properties_required */
2142 0, /* properties_provided */
2143 0, /* properties_destroyed */
2144 0, /* todo_flags_start */
2145 TODO_dump_func | TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */