re PR middle-end/40815 (redundant neg instruction caused by loop-invariant)
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobcf05de5be711f05a0dc02432664023bb2d4939c1
1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "ggc.h"
26 #include "tree.h"
27 #include "basic-block.h"
28 #include "diagnostic.h"
29 #include "tree-inline.h"
30 #include "tree-flow.h"
31 #include "gimple.h"
32 #include "tree-dump.h"
33 #include "timevar.h"
34 #include "tree-iterator.h"
35 #include "real.h"
36 #include "tree-pass.h"
37 #include "alloc-pool.h"
38 #include "vec.h"
39 #include "langhooks.h"
40 #include "pointer-set.h"
41 #include "cfgloop.h"
42 #include "flags.h"
44 /* This is a simple global reassociation pass. It is, in part, based
45 on the LLVM pass of the same name (They do some things more/less
46 than we do, in different orders, etc).
48 It consists of five steps:
50 1. Breaking up subtract operations into addition + negate, where
51 it would promote the reassociation of adds.
53 2. Left linearization of the expression trees, so that (A+B)+(C+D)
54 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
55 During linearization, we place the operands of the binary
56 expressions into a vector of operand_entry_t
58 3. Optimization of the operand lists, eliminating things like a +
59 -a, a & a, etc.
61 4. Rewrite the expression trees we linearized and optimized so
62 they are in proper rank order.
64 5. Repropagate negates, as nothing else will clean it up ATM.
66 A bit of theory on #4, since nobody seems to write anything down
67 about why it makes sense to do it the way they do it:
69 We could do this much nicer theoretically, but don't (for reasons
70 explained after how to do it theoretically nice :P).
72 In order to promote the most redundancy elimination, you want
73 binary expressions whose operands are the same rank (or
74 preferably, the same value) exposed to the redundancy eliminator,
75 for possible elimination.
77 So the way to do this if we really cared, is to build the new op
78 tree from the leaves to the roots, merging as you go, and putting the
79 new op on the end of the worklist, until you are left with one
80 thing on the worklist.
82 IE if you have to rewrite the following set of operands (listed with
83 rank in parentheses), with opcode PLUS_EXPR:
85 a (1), b (1), c (1), d (2), e (2)
88 We start with our merge worklist empty, and the ops list with all of
89 those on it.
91 You want to first merge all leaves of the same rank, as much as
92 possible.
94 So first build a binary op of
96 mergetmp = a + b, and put "mergetmp" on the merge worklist.
98 Because there is no three operand form of PLUS_EXPR, c is not going to
99 be exposed to redundancy elimination as a rank 1 operand.
101 So you might as well throw it on the merge worklist (you could also
102 consider it to now be a rank two operand, and merge it with d and e,
103 but in this case, you then have evicted e from a binary op. So at
104 least in this situation, you can't win.)
106 Then build a binary op of d + e
107 mergetmp2 = d + e
109 and put mergetmp2 on the merge worklist.
111 so merge worklist = {mergetmp, c, mergetmp2}
113 Continue building binary ops of these operations until you have only
114 one operation left on the worklist.
116 So we have
118 build binary op
119 mergetmp3 = mergetmp + c
121 worklist = {mergetmp2, mergetmp3}
123 mergetmp4 = mergetmp2 + mergetmp3
125 worklist = {mergetmp4}
127 because we have one operation left, we can now just set the original
128 statement equal to the result of that operation.
130 This will at least expose a + b and d + e to redundancy elimination
131 as binary operations.
133 For extra points, you can reuse the old statements to build the
134 mergetmps, since you shouldn't run out.
136 So why don't we do this?
138 Because it's expensive, and rarely will help. Most trees we are
139 reassociating have 3 or less ops. If they have 2 ops, they already
140 will be written into a nice single binary op. If you have 3 ops, a
141 single simple check suffices to tell you whether the first two are of the
142 same rank. If so, you know to order it
144 mergetmp = op1 + op2
145 newstmt = mergetmp + op3
147 instead of
148 mergetmp = op2 + op3
149 newstmt = mergetmp + op1
151 If all three are of the same rank, you can't expose them all in a
152 single binary operator anyway, so the above is *still* the best you
153 can do.
155 Thus, this is what we do. When we have three ops left, we check to see
156 what order to put them in, and call it a day. As a nod to vector sum
157 reduction, we check if any of the ops are really a phi node that is a
158 destructive update for the associating op, and keep the destructive
159 update together for vector sum reduction recognition. */
162 /* Statistics */
163 static struct
165 int linearized;
166 int constants_eliminated;
167 int ops_eliminated;
168 int rewritten;
169 } reassociate_stats;
171 /* Operator, rank pair. */
172 typedef struct operand_entry
174 unsigned int rank;
175 tree op;
176 } *operand_entry_t;
178 static alloc_pool operand_entry_pool;
181 /* Starting rank number for a given basic block, so that we can rank
182 operations using unmovable instructions in that BB based on the bb
183 depth. */
184 static long *bb_rank;
186 /* Operand->rank hashtable. */
187 static struct pointer_map_t *operand_rank;
190 /* Look up the operand rank structure for expression E. */
192 static inline long
193 find_operand_rank (tree e)
195 void **slot = pointer_map_contains (operand_rank, e);
196 return slot ? (long) (intptr_t) *slot : -1;
199 /* Insert {E,RANK} into the operand rank hashtable. */
201 static inline void
202 insert_operand_rank (tree e, long rank)
204 void **slot;
205 gcc_assert (rank > 0);
206 slot = pointer_map_insert (operand_rank, e);
207 gcc_assert (!*slot);
208 *slot = (void *) (intptr_t) rank;
211 /* Given an expression E, return the rank of the expression. */
213 static long
214 get_rank (tree e)
216 /* Constants have rank 0. */
217 if (is_gimple_min_invariant (e))
218 return 0;
220 /* SSA_NAME's have the rank of the expression they are the result
222 For globals and uninitialized values, the rank is 0.
223 For function arguments, use the pre-setup rank.
224 For PHI nodes, stores, asm statements, etc, we use the rank of
225 the BB.
226 For simple operations, the rank is the maximum rank of any of
227 its operands, or the bb_rank, whichever is less.
228 I make no claims that this is optimal, however, it gives good
229 results. */
231 if (TREE_CODE (e) == SSA_NAME)
233 gimple stmt;
234 long rank, maxrank;
235 int i, n;
237 if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
238 && SSA_NAME_IS_DEFAULT_DEF (e))
239 return find_operand_rank (e);
241 stmt = SSA_NAME_DEF_STMT (e);
242 if (gimple_bb (stmt) == NULL)
243 return 0;
245 if (!is_gimple_assign (stmt)
246 || gimple_vdef (stmt))
247 return bb_rank[gimple_bb (stmt)->index];
249 /* If we already have a rank for this expression, use that. */
250 rank = find_operand_rank (e);
251 if (rank != -1)
252 return rank;
254 /* Otherwise, find the maximum rank for the operands, or the bb
255 rank, whichever is less. */
256 rank = 0;
257 maxrank = bb_rank[gimple_bb(stmt)->index];
258 if (gimple_assign_single_p (stmt))
260 tree rhs = gimple_assign_rhs1 (stmt);
261 n = TREE_OPERAND_LENGTH (rhs);
262 if (n == 0)
263 rank = MAX (rank, get_rank (rhs));
264 else
266 for (i = 0;
267 i < n && TREE_OPERAND (rhs, i) && rank != maxrank; i++)
268 rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i)));
271 else
273 n = gimple_num_ops (stmt);
274 for (i = 1; i < n && rank != maxrank; i++)
276 gcc_assert (gimple_op (stmt, i));
277 rank = MAX(rank, get_rank (gimple_op (stmt, i)));
281 if (dump_file && (dump_flags & TDF_DETAILS))
283 fprintf (dump_file, "Rank for ");
284 print_generic_expr (dump_file, e, 0);
285 fprintf (dump_file, " is %ld\n", (rank + 1));
288 /* Note the rank in the hashtable so we don't recompute it. */
289 insert_operand_rank (e, (rank + 1));
290 return (rank + 1);
293 /* Globals, etc, are rank 0 */
294 return 0;
297 DEF_VEC_P(operand_entry_t);
298 DEF_VEC_ALLOC_P(operand_entry_t, heap);
300 /* We want integer ones to end up last no matter what, since they are
301 the ones we can do the most with. */
302 #define INTEGER_CONST_TYPE 1 << 3
303 #define FLOAT_CONST_TYPE 1 << 2
304 #define OTHER_CONST_TYPE 1 << 1
306 /* Classify an invariant tree into integer, float, or other, so that
307 we can sort them to be near other constants of the same type. */
308 static inline int
309 constant_type (tree t)
311 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
312 return INTEGER_CONST_TYPE;
313 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
314 return FLOAT_CONST_TYPE;
315 else
316 return OTHER_CONST_TYPE;
319 /* qsort comparison function to sort operand entries PA and PB by rank
320 so that the sorted array is ordered by rank in decreasing order. */
321 static int
322 sort_by_operand_rank (const void *pa, const void *pb)
324 const operand_entry_t oea = *(const operand_entry_t *)pa;
325 const operand_entry_t oeb = *(const operand_entry_t *)pb;
327 /* It's nicer for optimize_expression if constants that are likely
328 to fold when added/multiplied//whatever are put next to each
329 other. Since all constants have rank 0, order them by type. */
330 if (oeb->rank == 0 && oea->rank == 0)
331 return constant_type (oeb->op) - constant_type (oea->op);
333 /* Lastly, make sure the versions that are the same go next to each
334 other. We use SSA_NAME_VERSION because it's stable. */
335 if ((oeb->rank - oea->rank == 0)
336 && TREE_CODE (oea->op) == SSA_NAME
337 && TREE_CODE (oeb->op) == SSA_NAME)
338 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
340 return oeb->rank - oea->rank;
343 /* Add an operand entry to *OPS for the tree operand OP. */
345 static void
346 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
348 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
350 oe->op = op;
351 oe->rank = get_rank (op);
352 VEC_safe_push (operand_entry_t, heap, *ops, oe);
355 /* Return true if STMT is reassociable operation containing a binary
356 operation with tree code CODE, and is inside LOOP. */
358 static bool
359 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
361 basic_block bb = gimple_bb (stmt);
363 if (gimple_bb (stmt) == NULL)
364 return false;
366 if (!flow_bb_inside_loop_p (loop, bb))
367 return false;
369 if (is_gimple_assign (stmt)
370 && gimple_assign_rhs_code (stmt) == code
371 && has_single_use (gimple_assign_lhs (stmt)))
372 return true;
374 return false;
378 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
379 operand of the negate operation. Otherwise, return NULL. */
381 static tree
382 get_unary_op (tree name, enum tree_code opcode)
384 gimple stmt = SSA_NAME_DEF_STMT (name);
386 if (!is_gimple_assign (stmt))
387 return NULL_TREE;
389 if (gimple_assign_rhs_code (stmt) == opcode)
390 return gimple_assign_rhs1 (stmt);
391 return NULL_TREE;
394 /* If CURR and LAST are a pair of ops that OPCODE allows us to
395 eliminate through equivalences, do so, remove them from OPS, and
396 return true. Otherwise, return false. */
398 static bool
399 eliminate_duplicate_pair (enum tree_code opcode,
400 VEC (operand_entry_t, heap) **ops,
401 bool *all_done,
402 unsigned int i,
403 operand_entry_t curr,
404 operand_entry_t last)
407 /* If we have two of the same op, and the opcode is & |, min, or max,
408 we can eliminate one of them.
409 If we have two of the same op, and the opcode is ^, we can
410 eliminate both of them. */
412 if (last && last->op == curr->op)
414 switch (opcode)
416 case MAX_EXPR:
417 case MIN_EXPR:
418 case BIT_IOR_EXPR:
419 case BIT_AND_EXPR:
420 if (dump_file && (dump_flags & TDF_DETAILS))
422 fprintf (dump_file, "Equivalence: ");
423 print_generic_expr (dump_file, curr->op, 0);
424 fprintf (dump_file, " [&|minmax] ");
425 print_generic_expr (dump_file, last->op, 0);
426 fprintf (dump_file, " -> ");
427 print_generic_stmt (dump_file, last->op, 0);
430 VEC_ordered_remove (operand_entry_t, *ops, i);
431 reassociate_stats.ops_eliminated ++;
433 return true;
435 case BIT_XOR_EXPR:
436 if (dump_file && (dump_flags & TDF_DETAILS))
438 fprintf (dump_file, "Equivalence: ");
439 print_generic_expr (dump_file, curr->op, 0);
440 fprintf (dump_file, " ^ ");
441 print_generic_expr (dump_file, last->op, 0);
442 fprintf (dump_file, " -> nothing\n");
445 reassociate_stats.ops_eliminated += 2;
447 if (VEC_length (operand_entry_t, *ops) == 2)
449 VEC_free (operand_entry_t, heap, *ops);
450 *ops = NULL;
451 add_to_ops_vec (ops, fold_convert (TREE_TYPE (last->op),
452 integer_zero_node));
453 *all_done = true;
455 else
457 VEC_ordered_remove (operand_entry_t, *ops, i-1);
458 VEC_ordered_remove (operand_entry_t, *ops, i-1);
461 return true;
463 default:
464 break;
467 return false;
470 static VEC(tree, heap) *plus_negates;
472 /* If OPCODE is PLUS_EXPR, CURR->OP is really a negate expression,
473 look in OPS for a corresponding positive operation to cancel it
474 out. If we find one, remove the other from OPS, replace
475 OPS[CURRINDEX] with 0, and return true. Otherwise, return
476 false. */
478 static bool
479 eliminate_plus_minus_pair (enum tree_code opcode,
480 VEC (operand_entry_t, heap) **ops,
481 unsigned int currindex,
482 operand_entry_t curr)
484 tree negateop;
485 unsigned int i;
486 operand_entry_t oe;
488 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
489 return false;
491 negateop = get_unary_op (curr->op, NEGATE_EXPR);
492 if (negateop == NULL_TREE)
493 return false;
495 /* Any non-negated version will have a rank that is one less than
496 the current rank. So once we hit those ranks, if we don't find
497 one, we can stop. */
499 for (i = currindex + 1;
500 VEC_iterate (operand_entry_t, *ops, i, oe)
501 && oe->rank >= curr->rank - 1 ;
502 i++)
504 if (oe->op == negateop)
507 if (dump_file && (dump_flags & TDF_DETAILS))
509 fprintf (dump_file, "Equivalence: ");
510 print_generic_expr (dump_file, negateop, 0);
511 fprintf (dump_file, " + -");
512 print_generic_expr (dump_file, oe->op, 0);
513 fprintf (dump_file, " -> 0\n");
516 VEC_ordered_remove (operand_entry_t, *ops, i);
517 add_to_ops_vec (ops, fold_convert(TREE_TYPE (oe->op),
518 integer_zero_node));
519 VEC_ordered_remove (operand_entry_t, *ops, currindex);
520 reassociate_stats.ops_eliminated ++;
522 return true;
526 /* CURR->OP is a negate expr in a plus expr: save it for later
527 inspection in repropagate_negates(). */
528 VEC_safe_push (tree, heap, plus_negates, curr->op);
530 return false;
533 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
534 bitwise not expression, look in OPS for a corresponding operand to
535 cancel it out. If we find one, remove the other from OPS, replace
536 OPS[CURRINDEX] with 0, and return true. Otherwise, return
537 false. */
539 static bool
540 eliminate_not_pairs (enum tree_code opcode,
541 VEC (operand_entry_t, heap) **ops,
542 unsigned int currindex,
543 operand_entry_t curr)
545 tree notop;
546 unsigned int i;
547 operand_entry_t oe;
549 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
550 || TREE_CODE (curr->op) != SSA_NAME)
551 return false;
553 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
554 if (notop == NULL_TREE)
555 return false;
557 /* Any non-not version will have a rank that is one less than
558 the current rank. So once we hit those ranks, if we don't find
559 one, we can stop. */
561 for (i = currindex + 1;
562 VEC_iterate (operand_entry_t, *ops, i, oe)
563 && oe->rank >= curr->rank - 1;
564 i++)
566 if (oe->op == notop)
568 if (dump_file && (dump_flags & TDF_DETAILS))
570 fprintf (dump_file, "Equivalence: ");
571 print_generic_expr (dump_file, notop, 0);
572 if (opcode == BIT_AND_EXPR)
573 fprintf (dump_file, " & ~");
574 else if (opcode == BIT_IOR_EXPR)
575 fprintf (dump_file, " | ~");
576 print_generic_expr (dump_file, oe->op, 0);
577 if (opcode == BIT_AND_EXPR)
578 fprintf (dump_file, " -> 0\n");
579 else if (opcode == BIT_IOR_EXPR)
580 fprintf (dump_file, " -> -1\n");
583 if (opcode == BIT_AND_EXPR)
584 oe->op = fold_convert (TREE_TYPE (oe->op), integer_zero_node);
585 else if (opcode == BIT_IOR_EXPR)
586 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
587 TYPE_PRECISION (TREE_TYPE (oe->op)));
589 reassociate_stats.ops_eliminated
590 += VEC_length (operand_entry_t, *ops) - 1;
591 VEC_free (operand_entry_t, heap, *ops);
592 *ops = NULL;
593 VEC_safe_push (operand_entry_t, heap, *ops, oe);
594 return true;
598 return false;
601 /* Use constant value that may be present in OPS to try to eliminate
602 operands. Note that this function is only really used when we've
603 eliminated ops for other reasons, or merged constants. Across
604 single statements, fold already does all of this, plus more. There
605 is little point in duplicating logic, so I've only included the
606 identities that I could ever construct testcases to trigger. */
608 static void
609 eliminate_using_constants (enum tree_code opcode,
610 VEC(operand_entry_t, heap) **ops)
612 operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
613 tree type = TREE_TYPE (oelast->op);
615 if (oelast->rank == 0
616 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
618 switch (opcode)
620 case BIT_AND_EXPR:
621 if (integer_zerop (oelast->op))
623 if (VEC_length (operand_entry_t, *ops) != 1)
625 if (dump_file && (dump_flags & TDF_DETAILS))
626 fprintf (dump_file, "Found & 0, removing all other ops\n");
628 reassociate_stats.ops_eliminated
629 += VEC_length (operand_entry_t, *ops) - 1;
631 VEC_free (operand_entry_t, heap, *ops);
632 *ops = NULL;
633 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
634 return;
637 else if (integer_all_onesp (oelast->op))
639 if (VEC_length (operand_entry_t, *ops) != 1)
641 if (dump_file && (dump_flags & TDF_DETAILS))
642 fprintf (dump_file, "Found & -1, removing\n");
643 VEC_pop (operand_entry_t, *ops);
644 reassociate_stats.ops_eliminated++;
647 break;
648 case BIT_IOR_EXPR:
649 if (integer_all_onesp (oelast->op))
651 if (VEC_length (operand_entry_t, *ops) != 1)
653 if (dump_file && (dump_flags & TDF_DETAILS))
654 fprintf (dump_file, "Found | -1, removing all other ops\n");
656 reassociate_stats.ops_eliminated
657 += VEC_length (operand_entry_t, *ops) - 1;
659 VEC_free (operand_entry_t, heap, *ops);
660 *ops = NULL;
661 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
662 return;
665 else if (integer_zerop (oelast->op))
667 if (VEC_length (operand_entry_t, *ops) != 1)
669 if (dump_file && (dump_flags & TDF_DETAILS))
670 fprintf (dump_file, "Found | 0, removing\n");
671 VEC_pop (operand_entry_t, *ops);
672 reassociate_stats.ops_eliminated++;
675 break;
676 case MULT_EXPR:
677 if (integer_zerop (oelast->op)
678 || (FLOAT_TYPE_P (type)
679 && !HONOR_NANS (TYPE_MODE (type))
680 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
681 && real_zerop (oelast->op)))
683 if (VEC_length (operand_entry_t, *ops) != 1)
685 if (dump_file && (dump_flags & TDF_DETAILS))
686 fprintf (dump_file, "Found * 0, removing all other ops\n");
688 reassociate_stats.ops_eliminated
689 += VEC_length (operand_entry_t, *ops) - 1;
690 VEC_free (operand_entry_t, heap, *ops);
691 *ops = NULL;
692 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
693 return;
696 else if (integer_onep (oelast->op)
697 || (FLOAT_TYPE_P (type)
698 && !HONOR_SNANS (TYPE_MODE (type))
699 && real_onep (oelast->op)))
701 if (VEC_length (operand_entry_t, *ops) != 1)
703 if (dump_file && (dump_flags & TDF_DETAILS))
704 fprintf (dump_file, "Found * 1, removing\n");
705 VEC_pop (operand_entry_t, *ops);
706 reassociate_stats.ops_eliminated++;
707 return;
710 break;
711 case BIT_XOR_EXPR:
712 case PLUS_EXPR:
713 case MINUS_EXPR:
714 if (integer_zerop (oelast->op)
715 || (FLOAT_TYPE_P (type)
716 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
717 && fold_real_zero_addition_p (type, oelast->op,
718 opcode == MINUS_EXPR)))
720 if (VEC_length (operand_entry_t, *ops) != 1)
722 if (dump_file && (dump_flags & TDF_DETAILS))
723 fprintf (dump_file, "Found [|^+] 0, removing\n");
724 VEC_pop (operand_entry_t, *ops);
725 reassociate_stats.ops_eliminated++;
726 return;
729 break;
730 default:
731 break;
737 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
738 bool, bool);
740 /* Structure for tracking and counting operands. */
741 typedef struct oecount_s {
742 int cnt;
743 enum tree_code oecode;
744 tree op;
745 } oecount;
747 DEF_VEC_O(oecount);
748 DEF_VEC_ALLOC_O(oecount,heap);
750 /* The heap for the oecount hashtable and the sorted list of operands. */
751 static VEC (oecount, heap) *cvec;
753 /* Hash function for oecount. */
755 static hashval_t
756 oecount_hash (const void *p)
758 const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
759 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
762 /* Comparison function for oecount. */
764 static int
765 oecount_eq (const void *p1, const void *p2)
767 const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
768 const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
769 return (c1->oecode == c2->oecode
770 && c1->op == c2->op);
773 /* Comparison function for qsort sorting oecount elements by count. */
775 static int
776 oecount_cmp (const void *p1, const void *p2)
778 const oecount *c1 = (const oecount *)p1;
779 const oecount *c2 = (const oecount *)p2;
780 return c1->cnt - c2->cnt;
783 /* Walks the linear chain with result *DEF searching for an operation
784 with operand OP and code OPCODE removing that from the chain. *DEF
785 is updated if there is only one operand but no operation left. */
787 static void
788 zero_one_operation (tree *def, enum tree_code opcode, tree op)
790 gimple stmt = SSA_NAME_DEF_STMT (*def);
794 tree name = gimple_assign_rhs1 (stmt);
796 /* If this is the operation we look for and one of the operands
797 is ours simply propagate the other operand into the stmts
798 single use. */
799 if (gimple_assign_rhs_code (stmt) == opcode
800 && (name == op
801 || gimple_assign_rhs2 (stmt) == op))
803 gimple use_stmt;
804 use_operand_p use;
805 gimple_stmt_iterator gsi;
806 if (name == op)
807 name = gimple_assign_rhs2 (stmt);
808 gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
809 single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
810 if (gimple_assign_lhs (stmt) == *def)
811 *def = name;
812 SET_USE (use, name);
813 if (TREE_CODE (name) != SSA_NAME)
814 update_stmt (use_stmt);
815 gsi = gsi_for_stmt (stmt);
816 gsi_remove (&gsi, true);
817 release_defs (stmt);
818 return;
821 /* Continue walking the chain. */
822 gcc_assert (name != op
823 && TREE_CODE (name) == SSA_NAME);
824 stmt = SSA_NAME_DEF_STMT (name);
826 while (1);
829 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
830 the result. Places the statement after the definition of either
831 OP1 or OP2. Returns the new statement. */
833 static gimple
834 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
836 gimple op1def = NULL, op2def = NULL;
837 gimple_stmt_iterator gsi;
838 tree op;
839 gimple sum;
841 /* Create the addition statement. */
842 sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
843 op = make_ssa_name (tmpvar, sum);
844 gimple_assign_set_lhs (sum, op);
846 /* Find an insertion place and insert. */
847 if (TREE_CODE (op1) == SSA_NAME)
848 op1def = SSA_NAME_DEF_STMT (op1);
849 if (TREE_CODE (op2) == SSA_NAME)
850 op2def = SSA_NAME_DEF_STMT (op2);
851 if ((!op1def || gimple_nop_p (op1def))
852 && (!op2def || gimple_nop_p (op2def)))
854 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
855 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
857 else if ((!op1def || gimple_nop_p (op1def))
858 || (op2def && !gimple_nop_p (op2def)
859 && stmt_dominates_stmt_p (op1def, op2def)))
861 if (gimple_code (op2def) == GIMPLE_PHI)
863 gsi = gsi_after_labels (gimple_bb (op2def));
864 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
866 else
868 if (!stmt_ends_bb_p (op2def))
870 gsi = gsi_for_stmt (op2def);
871 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
873 else
875 edge e;
876 edge_iterator ei;
878 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
879 if (e->flags & EDGE_FALLTHRU)
880 gsi_insert_on_edge_immediate (e, sum);
884 else
886 if (gimple_code (op1def) == GIMPLE_PHI)
888 gsi = gsi_after_labels (gimple_bb (op1def));
889 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
891 else
893 if (!stmt_ends_bb_p (op1def))
895 gsi = gsi_for_stmt (op1def);
896 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
898 else
900 edge e;
901 edge_iterator ei;
903 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
904 if (e->flags & EDGE_FALLTHRU)
905 gsi_insert_on_edge_immediate (e, sum);
909 update_stmt (sum);
911 return sum;
914 /* Perform un-distribution of divisions and multiplications.
915 A * X + B * X is transformed into (A + B) * X and A / X + B / X
916 to (A + B) / X for real X.
918 The algorithm is organized as follows.
920 - First we walk the addition chain *OPS looking for summands that
921 are defined by a multiplication or a real division. This results
922 in the candidates bitmap with relevant indices into *OPS.
924 - Second we build the chains of multiplications or divisions for
925 these candidates, counting the number of occurences of (operand, code)
926 pairs in all of the candidates chains.
928 - Third we sort the (operand, code) pairs by number of occurence and
929 process them starting with the pair with the most uses.
931 * For each such pair we walk the candidates again to build a
932 second candidate bitmap noting all multiplication/division chains
933 that have at least one occurence of (operand, code).
935 * We build an alternate addition chain only covering these
936 candidates with one (operand, code) operation removed from their
937 multiplication/division chain.
939 * The first candidate gets replaced by the alternate addition chain
940 multiplied/divided by the operand.
942 * All candidate chains get disabled for further processing and
943 processing of (operand, code) pairs continues.
945 The alternate addition chains built are re-processed by the main
946 reassociation algorithm which allows optimizing a * x * y + b * y * x
947 to (a + b ) * x * y in one invocation of the reassociation pass. */
949 static bool
950 undistribute_ops_list (enum tree_code opcode,
951 VEC (operand_entry_t, heap) **ops, struct loop *loop)
953 unsigned int length = VEC_length (operand_entry_t, *ops);
954 operand_entry_t oe1;
955 unsigned i, j;
956 sbitmap candidates, candidates2;
957 unsigned nr_candidates, nr_candidates2;
958 sbitmap_iterator sbi0;
959 VEC (operand_entry_t, heap) **subops;
960 htab_t ctable;
961 bool changed = false;
963 if (length <= 1
964 || opcode != PLUS_EXPR)
965 return false;
967 /* Build a list of candidates to process. */
968 candidates = sbitmap_alloc (length);
969 sbitmap_zero (candidates);
970 nr_candidates = 0;
971 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe1); ++i)
973 enum tree_code dcode;
974 gimple oe1def;
976 if (TREE_CODE (oe1->op) != SSA_NAME)
977 continue;
978 oe1def = SSA_NAME_DEF_STMT (oe1->op);
979 if (!is_gimple_assign (oe1def))
980 continue;
981 dcode = gimple_assign_rhs_code (oe1def);
982 if ((dcode != MULT_EXPR
983 && dcode != RDIV_EXPR)
984 || !is_reassociable_op (oe1def, dcode, loop))
985 continue;
987 SET_BIT (candidates, i);
988 nr_candidates++;
991 if (nr_candidates < 2)
993 sbitmap_free (candidates);
994 return false;
997 if (dump_file && (dump_flags & TDF_DETAILS))
999 fprintf (dump_file, "searching for un-distribute opportunities ");
1000 print_generic_expr (dump_file,
1001 VEC_index (operand_entry_t, *ops,
1002 sbitmap_first_set_bit (candidates))->op, 0);
1003 fprintf (dump_file, " %d\n", nr_candidates);
1006 /* Build linearized sub-operand lists and the counting table. */
1007 cvec = NULL;
1008 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1009 subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1010 VEC_length (operand_entry_t, *ops));
1011 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1013 gimple oedef;
1014 enum tree_code oecode;
1015 unsigned j;
1017 oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1018 oecode = gimple_assign_rhs_code (oedef);
1019 linearize_expr_tree (&subops[i], oedef,
1020 associative_tree_code (oecode), false);
1022 for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
1024 oecount c;
1025 void **slot;
1026 size_t idx;
1027 c.oecode = oecode;
1028 c.cnt = 1;
1029 c.op = oe1->op;
1030 VEC_safe_push (oecount, heap, cvec, &c);
1031 idx = VEC_length (oecount, cvec) + 41;
1032 slot = htab_find_slot (ctable, (void *)idx, INSERT);
1033 if (!*slot)
1035 *slot = (void *)idx;
1037 else
1039 VEC_pop (oecount, cvec);
1040 VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1044 htab_delete (ctable);
1046 /* Sort the counting table. */
1047 qsort (VEC_address (oecount, cvec), VEC_length (oecount, cvec),
1048 sizeof (oecount), oecount_cmp);
1050 if (dump_file && (dump_flags & TDF_DETAILS))
1052 oecount *c;
1053 fprintf (dump_file, "Candidates:\n");
1054 for (j = 0; VEC_iterate (oecount, cvec, j, c); ++j)
1056 fprintf (dump_file, " %u %s: ", c->cnt,
1057 c->oecode == MULT_EXPR
1058 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1059 print_generic_expr (dump_file, c->op, 0);
1060 fprintf (dump_file, "\n");
1064 /* Process the (operand, code) pairs in order of most occurence. */
1065 candidates2 = sbitmap_alloc (length);
1066 while (!VEC_empty (oecount, cvec))
1068 oecount *c = VEC_last (oecount, cvec);
1069 if (c->cnt < 2)
1070 break;
1072 /* Now collect the operands in the outer chain that contain
1073 the common operand in their inner chain. */
1074 sbitmap_zero (candidates2);
1075 nr_candidates2 = 0;
1076 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1078 gimple oedef;
1079 enum tree_code oecode;
1080 unsigned j;
1081 tree op = VEC_index (operand_entry_t, *ops, i)->op;
1083 /* If we undistributed in this chain already this may be
1084 a constant. */
1085 if (TREE_CODE (op) != SSA_NAME)
1086 continue;
1088 oedef = SSA_NAME_DEF_STMT (op);
1089 oecode = gimple_assign_rhs_code (oedef);
1090 if (oecode != c->oecode)
1091 continue;
1093 for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
1095 if (oe1->op == c->op)
1097 SET_BIT (candidates2, i);
1098 ++nr_candidates2;
1099 break;
1104 if (nr_candidates2 >= 2)
1106 operand_entry_t oe1, oe2;
1107 tree tmpvar;
1108 gimple prod;
1109 int first = sbitmap_first_set_bit (candidates2);
1111 /* Build the new addition chain. */
1112 oe1 = VEC_index (operand_entry_t, *ops, first);
1113 if (dump_file && (dump_flags & TDF_DETAILS))
1115 fprintf (dump_file, "Building (");
1116 print_generic_expr (dump_file, oe1->op, 0);
1118 tmpvar = create_tmp_var (TREE_TYPE (oe1->op), NULL);
1119 add_referenced_var (tmpvar);
1120 zero_one_operation (&oe1->op, c->oecode, c->op);
1121 EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1123 gimple sum;
1124 oe2 = VEC_index (operand_entry_t, *ops, i);
1125 if (dump_file && (dump_flags & TDF_DETAILS))
1127 fprintf (dump_file, " + ");
1128 print_generic_expr (dump_file, oe2->op, 0);
1130 zero_one_operation (&oe2->op, c->oecode, c->op);
1131 sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1132 oe2->op = fold_convert (TREE_TYPE (oe2->op), integer_zero_node);
1133 oe2->rank = 0;
1134 oe1->op = gimple_get_lhs (sum);
1137 /* Apply the multiplication/division. */
1138 prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1139 if (dump_file && (dump_flags & TDF_DETAILS))
1141 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1142 print_generic_expr (dump_file, c->op, 0);
1143 fprintf (dump_file, "\n");
1146 /* Record it in the addition chain and disable further
1147 undistribution with this op. */
1148 oe1->op = gimple_assign_lhs (prod);
1149 oe1->rank = get_rank (oe1->op);
1150 VEC_free (operand_entry_t, heap, subops[first]);
1152 changed = true;
1155 VEC_pop (oecount, cvec);
1158 for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1159 VEC_free (operand_entry_t, heap, subops[i]);
1160 free (subops);
1161 VEC_free (oecount, heap, cvec);
1162 sbitmap_free (candidates);
1163 sbitmap_free (candidates2);
1165 return changed;
1169 /* Perform various identities and other optimizations on the list of
1170 operand entries, stored in OPS. The tree code for the binary
1171 operation between all the operands is OPCODE. */
1173 static void
1174 optimize_ops_list (enum tree_code opcode,
1175 VEC (operand_entry_t, heap) **ops)
1177 unsigned int length = VEC_length (operand_entry_t, *ops);
1178 unsigned int i;
1179 operand_entry_t oe;
1180 operand_entry_t oelast = NULL;
1181 bool iterate = false;
1183 if (length == 1)
1184 return;
1186 oelast = VEC_last (operand_entry_t, *ops);
1188 /* If the last two are constants, pop the constants off, merge them
1189 and try the next two. */
1190 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1192 operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1194 if (oelm1->rank == 0
1195 && is_gimple_min_invariant (oelm1->op)
1196 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1197 TREE_TYPE (oelast->op)))
1199 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1200 oelm1->op, oelast->op);
1202 if (folded && is_gimple_min_invariant (folded))
1204 if (dump_file && (dump_flags & TDF_DETAILS))
1205 fprintf (dump_file, "Merging constants\n");
1207 VEC_pop (operand_entry_t, *ops);
1208 VEC_pop (operand_entry_t, *ops);
1210 add_to_ops_vec (ops, folded);
1211 reassociate_stats.constants_eliminated++;
1213 optimize_ops_list (opcode, ops);
1214 return;
1219 eliminate_using_constants (opcode, ops);
1220 oelast = NULL;
1222 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1224 bool done = false;
1226 if (eliminate_not_pairs (opcode, ops, i, oe))
1227 return;
1228 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1229 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)))
1231 if (done)
1232 return;
1233 iterate = true;
1234 oelast = NULL;
1235 continue;
1237 oelast = oe;
1238 i++;
1241 length = VEC_length (operand_entry_t, *ops);
1242 oelast = VEC_last (operand_entry_t, *ops);
1244 if (iterate)
1245 optimize_ops_list (opcode, ops);
1248 /* Return true if OPERAND is defined by a PHI node which uses the LHS
1249 of STMT in it's operands. This is also known as a "destructive
1250 update" operation. */
1252 static bool
1253 is_phi_for_stmt (gimple stmt, tree operand)
1255 gimple def_stmt;
1256 tree lhs;
1257 use_operand_p arg_p;
1258 ssa_op_iter i;
1260 if (TREE_CODE (operand) != SSA_NAME)
1261 return false;
1263 lhs = gimple_assign_lhs (stmt);
1265 def_stmt = SSA_NAME_DEF_STMT (operand);
1266 if (gimple_code (def_stmt) != GIMPLE_PHI)
1267 return false;
1269 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
1270 if (lhs == USE_FROM_PTR (arg_p))
1271 return true;
1272 return false;
1275 /* Remove def stmt of VAR if VAR has zero uses and recurse
1276 on rhs1 operand if so. */
1278 static void
1279 remove_visited_stmt_chain (tree var)
1281 gimple stmt;
1282 gimple_stmt_iterator gsi;
1284 while (1)
1286 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
1287 return;
1288 stmt = SSA_NAME_DEF_STMT (var);
1289 if (!is_gimple_assign (stmt)
1290 || !gimple_visited_p (stmt))
1291 return;
1292 var = gimple_assign_rhs1 (stmt);
1293 gsi = gsi_for_stmt (stmt);
1294 gsi_remove (&gsi, true);
1295 release_defs (stmt);
1299 /* Recursively rewrite our linearized statements so that the operators
1300 match those in OPS[OPINDEX], putting the computation in rank
1301 order. */
1303 static void
1304 rewrite_expr_tree (gimple stmt, unsigned int opindex,
1305 VEC(operand_entry_t, heap) * ops, bool moved)
1307 tree rhs1 = gimple_assign_rhs1 (stmt);
1308 tree rhs2 = gimple_assign_rhs2 (stmt);
1309 operand_entry_t oe;
1311 /* If we have three operands left, then we want to make sure the one
1312 that gets the double binary op are the ones with the same rank.
1314 The alternative we try is to see if this is a destructive
1315 update style statement, which is like:
1316 b = phi (a, ...)
1317 a = c + b;
1318 In that case, we want to use the destructive update form to
1319 expose the possible vectorizer sum reduction opportunity.
1320 In that case, the third operand will be the phi node.
1322 We could, of course, try to be better as noted above, and do a
1323 lot of work to try to find these opportunities in >3 operand
1324 cases, but it is unlikely to be worth it. */
1325 if (opindex + 3 == VEC_length (operand_entry_t, ops))
1327 operand_entry_t oe1, oe2, oe3;
1329 oe1 = VEC_index (operand_entry_t, ops, opindex);
1330 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
1331 oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
1333 if ((oe1->rank == oe2->rank
1334 && oe2->rank != oe3->rank)
1335 || (is_phi_for_stmt (stmt, oe3->op)
1336 && !is_phi_for_stmt (stmt, oe1->op)
1337 && !is_phi_for_stmt (stmt, oe2->op)))
1339 struct operand_entry temp = *oe3;
1340 oe3->op = oe1->op;
1341 oe3->rank = oe1->rank;
1342 oe1->op = temp.op;
1343 oe1->rank= temp.rank;
1345 else if ((oe1->rank == oe3->rank
1346 && oe2->rank != oe3->rank)
1347 || (is_phi_for_stmt (stmt, oe2->op)
1348 && !is_phi_for_stmt (stmt, oe1->op)
1349 && !is_phi_for_stmt (stmt, oe3->op)))
1351 struct operand_entry temp = *oe2;
1352 oe2->op = oe1->op;
1353 oe2->rank = oe1->rank;
1354 oe1->op = temp.op;
1355 oe1->rank= temp.rank;
1359 /* The final recursion case for this function is that you have
1360 exactly two operations left.
1361 If we had one exactly one op in the entire list to start with, we
1362 would have never called this function, and the tail recursion
1363 rewrites them one at a time. */
1364 if (opindex + 2 == VEC_length (operand_entry_t, ops))
1366 operand_entry_t oe1, oe2;
1368 oe1 = VEC_index (operand_entry_t, ops, opindex);
1369 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
1371 if (rhs1 != oe1->op || rhs2 != oe2->op)
1373 if (dump_file && (dump_flags & TDF_DETAILS))
1375 fprintf (dump_file, "Transforming ");
1376 print_gimple_stmt (dump_file, stmt, 0, 0);
1379 gimple_assign_set_rhs1 (stmt, oe1->op);
1380 gimple_assign_set_rhs2 (stmt, oe2->op);
1381 update_stmt (stmt);
1382 if (rhs1 != oe1->op && rhs1 != oe2->op)
1383 remove_visited_stmt_chain (rhs1);
1385 if (dump_file && (dump_flags & TDF_DETAILS))
1387 fprintf (dump_file, " into ");
1388 print_gimple_stmt (dump_file, stmt, 0, 0);
1392 return;
1395 /* If we hit here, we should have 3 or more ops left. */
1396 gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
1398 /* Rewrite the next operator. */
1399 oe = VEC_index (operand_entry_t, ops, opindex);
1401 if (oe->op != rhs2)
1403 if (!moved)
1405 gimple_stmt_iterator gsinow, gsirhs1;
1406 gimple stmt1 = stmt, stmt2;
1407 unsigned int count;
1409 gsinow = gsi_for_stmt (stmt);
1410 count = VEC_length (operand_entry_t, ops) - opindex - 2;
1411 while (count-- != 0)
1413 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
1414 gsirhs1 = gsi_for_stmt (stmt2);
1415 gsi_move_before (&gsirhs1, &gsinow);
1416 gsi_prev (&gsinow);
1417 stmt1 = stmt2;
1419 moved = true;
1422 if (dump_file && (dump_flags & TDF_DETAILS))
1424 fprintf (dump_file, "Transforming ");
1425 print_gimple_stmt (dump_file, stmt, 0, 0);
1428 gimple_assign_set_rhs2 (stmt, oe->op);
1429 update_stmt (stmt);
1431 if (dump_file && (dump_flags & TDF_DETAILS))
1433 fprintf (dump_file, " into ");
1434 print_gimple_stmt (dump_file, stmt, 0, 0);
1437 /* Recurse on the LHS of the binary operator, which is guaranteed to
1438 be the non-leaf side. */
1439 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
1442 /* Transform STMT, which is really (A +B) + (C + D) into the left
1443 linear form, ((A+B)+C)+D.
1444 Recurse on D if necessary. */
1446 static void
1447 linearize_expr (gimple stmt)
1449 gimple_stmt_iterator gsinow, gsirhs;
1450 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
1451 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1452 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
1453 gimple newbinrhs = NULL;
1454 struct loop *loop = loop_containing_stmt (stmt);
1456 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
1457 && is_reassociable_op (binrhs, rhscode, loop));
1459 gsinow = gsi_for_stmt (stmt);
1460 gsirhs = gsi_for_stmt (binrhs);
1461 gsi_move_before (&gsirhs, &gsinow);
1463 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
1464 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
1465 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
1467 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1468 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1470 if (dump_file && (dump_flags & TDF_DETAILS))
1472 fprintf (dump_file, "Linearized: ");
1473 print_gimple_stmt (dump_file, stmt, 0, 0);
1476 reassociate_stats.linearized++;
1477 update_stmt (binrhs);
1478 update_stmt (binlhs);
1479 update_stmt (stmt);
1481 gimple_set_visited (stmt, true);
1482 gimple_set_visited (binlhs, true);
1483 gimple_set_visited (binrhs, true);
1485 /* Tail recurse on the new rhs if it still needs reassociation. */
1486 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
1487 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
1488 want to change the algorithm while converting to tuples. */
1489 linearize_expr (stmt);
1492 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
1493 it. Otherwise, return NULL. */
1495 static gimple
1496 get_single_immediate_use (tree lhs)
1498 use_operand_p immuse;
1499 gimple immusestmt;
1501 if (TREE_CODE (lhs) == SSA_NAME
1502 && single_imm_use (lhs, &immuse, &immusestmt)
1503 && is_gimple_assign (immusestmt))
1504 return immusestmt;
1506 return NULL;
1509 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
1510 representing the negated value. Insertions of any necessary
1511 instructions go before GSI.
1512 This function is recursive in that, if you hand it "a_5" as the
1513 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
1514 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
1516 static tree
1517 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
1519 gimple negatedefstmt= NULL;
1520 tree resultofnegate;
1522 /* If we are trying to negate a name, defined by an add, negate the
1523 add operands instead. */
1524 if (TREE_CODE (tonegate) == SSA_NAME)
1525 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
1526 if (TREE_CODE (tonegate) == SSA_NAME
1527 && is_gimple_assign (negatedefstmt)
1528 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
1529 && has_single_use (gimple_assign_lhs (negatedefstmt))
1530 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
1532 gimple_stmt_iterator gsi;
1533 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
1534 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
1536 gsi = gsi_for_stmt (negatedefstmt);
1537 rhs1 = negate_value (rhs1, &gsi);
1538 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
1540 gsi = gsi_for_stmt (negatedefstmt);
1541 rhs2 = negate_value (rhs2, &gsi);
1542 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
1544 update_stmt (negatedefstmt);
1545 return gimple_assign_lhs (negatedefstmt);
1548 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
1549 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
1550 NULL_TREE, true, GSI_SAME_STMT);
1551 return resultofnegate;
1554 /* Return true if we should break up the subtract in STMT into an add
1555 with negate. This is true when we the subtract operands are really
1556 adds, or the subtract itself is used in an add expression. In
1557 either case, breaking up the subtract into an add with negate
1558 exposes the adds to reassociation. */
1560 static bool
1561 should_break_up_subtract (gimple stmt)
1563 tree lhs = gimple_assign_lhs (stmt);
1564 tree binlhs = gimple_assign_rhs1 (stmt);
1565 tree binrhs = gimple_assign_rhs2 (stmt);
1566 gimple immusestmt;
1567 struct loop *loop = loop_containing_stmt (stmt);
1569 if (TREE_CODE (binlhs) == SSA_NAME
1570 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
1571 return true;
1573 if (TREE_CODE (binrhs) == SSA_NAME
1574 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
1575 return true;
1577 if (TREE_CODE (lhs) == SSA_NAME
1578 && (immusestmt = get_single_immediate_use (lhs))
1579 && is_gimple_assign (immusestmt)
1580 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
1581 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
1582 return true;
1583 return false;
1586 /* Transform STMT from A - B into A + -B. */
1588 static void
1589 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
1591 tree rhs1 = gimple_assign_rhs1 (stmt);
1592 tree rhs2 = gimple_assign_rhs2 (stmt);
1594 if (dump_file && (dump_flags & TDF_DETAILS))
1596 fprintf (dump_file, "Breaking up subtract ");
1597 print_gimple_stmt (dump_file, stmt, 0, 0);
1600 rhs2 = negate_value (rhs2, gsip);
1601 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
1602 update_stmt (stmt);
1605 /* Recursively linearize a binary expression that is the RHS of STMT.
1606 Place the operands of the expression tree in the vector named OPS. */
1608 static void
1609 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
1610 bool is_associative, bool set_visited)
1612 tree binlhs = gimple_assign_rhs1 (stmt);
1613 tree binrhs = gimple_assign_rhs2 (stmt);
1614 gimple binlhsdef, binrhsdef;
1615 bool binlhsisreassoc = false;
1616 bool binrhsisreassoc = false;
1617 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
1618 struct loop *loop = loop_containing_stmt (stmt);
1620 if (set_visited)
1621 gimple_set_visited (stmt, true);
1623 if (TREE_CODE (binlhs) == SSA_NAME)
1625 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
1626 binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode, loop);
1629 if (TREE_CODE (binrhs) == SSA_NAME)
1631 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
1632 binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode, loop);
1635 /* If the LHS is not reassociable, but the RHS is, we need to swap
1636 them. If neither is reassociable, there is nothing we can do, so
1637 just put them in the ops vector. If the LHS is reassociable,
1638 linearize it. If both are reassociable, then linearize the RHS
1639 and the LHS. */
1641 if (!binlhsisreassoc)
1643 tree temp;
1645 /* If this is not a associative operation like division, give up. */
1646 if (!is_associative)
1648 add_to_ops_vec (ops, binrhs);
1649 return;
1652 if (!binrhsisreassoc)
1654 add_to_ops_vec (ops, binrhs);
1655 add_to_ops_vec (ops, binlhs);
1656 return;
1659 if (dump_file && (dump_flags & TDF_DETAILS))
1661 fprintf (dump_file, "swapping operands of ");
1662 print_gimple_stmt (dump_file, stmt, 0, 0);
1665 swap_tree_operands (stmt,
1666 gimple_assign_rhs1_ptr (stmt),
1667 gimple_assign_rhs2_ptr (stmt));
1668 update_stmt (stmt);
1670 if (dump_file && (dump_flags & TDF_DETAILS))
1672 fprintf (dump_file, " is now ");
1673 print_gimple_stmt (dump_file, stmt, 0, 0);
1676 /* We want to make it so the lhs is always the reassociative op,
1677 so swap. */
1678 temp = binlhs;
1679 binlhs = binrhs;
1680 binrhs = temp;
1682 else if (binrhsisreassoc)
1684 linearize_expr (stmt);
1685 binlhs = gimple_assign_rhs1 (stmt);
1686 binrhs = gimple_assign_rhs2 (stmt);
1689 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
1690 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
1691 rhscode, loop));
1692 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
1693 is_associative, set_visited);
1694 add_to_ops_vec (ops, binrhs);
1697 /* Repropagate the negates back into subtracts, since no other pass
1698 currently does it. */
1700 static void
1701 repropagate_negates (void)
1703 unsigned int i = 0;
1704 tree negate;
1706 for (i = 0; VEC_iterate (tree, plus_negates, i, negate); i++)
1708 gimple user = get_single_immediate_use (negate);
1710 /* The negate operand can be either operand of a PLUS_EXPR
1711 (it can be the LHS if the RHS is a constant for example).
1713 Force the negate operand to the RHS of the PLUS_EXPR, then
1714 transform the PLUS_EXPR into a MINUS_EXPR. */
1715 if (user
1716 && is_gimple_assign (user)
1717 && gimple_assign_rhs_code (user) == PLUS_EXPR)
1719 /* If the negated operand appears on the LHS of the
1720 PLUS_EXPR, exchange the operands of the PLUS_EXPR
1721 to force the negated operand to the RHS of the PLUS_EXPR. */
1722 if (gimple_assign_rhs1 (user) == negate)
1724 swap_tree_operands (user,
1725 gimple_assign_rhs1_ptr (user),
1726 gimple_assign_rhs2_ptr (user));
1729 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
1730 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
1731 if (gimple_assign_rhs2 (user) == negate)
1733 tree rhs1 = gimple_assign_rhs1 (user);
1734 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
1735 gimple_stmt_iterator gsi = gsi_for_stmt (user);
1736 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
1737 update_stmt (user);
1743 /* Break up subtract operations in block BB.
1745 We do this top down because we don't know whether the subtract is
1746 part of a possible chain of reassociation except at the top.
1748 IE given
1749 d = f + g
1750 c = a + e
1751 b = c - d
1752 q = b - r
1753 k = t - q
1755 we want to break up k = t - q, but we won't until we've transformed q
1756 = b - r, which won't be broken up until we transform b = c - d.
1758 En passant, clear the GIMPLE visited flag on every statement. */
1760 static void
1761 break_up_subtract_bb (basic_block bb)
1763 gimple_stmt_iterator gsi;
1764 basic_block son;
1766 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1768 gimple stmt = gsi_stmt (gsi);
1769 gimple_set_visited (stmt, false);
1771 /* Look for simple gimple subtract operations. */
1772 if (is_gimple_assign (stmt)
1773 && gimple_assign_rhs_code (stmt) == MINUS_EXPR)
1775 tree lhs = gimple_assign_lhs (stmt);
1776 tree rhs1 = gimple_assign_rhs1 (stmt);
1777 tree rhs2 = gimple_assign_rhs2 (stmt);
1779 /* If associative-math we can do reassociation for
1780 non-integral types. Or, we can do reassociation for
1781 non-saturating fixed-point types. */
1782 if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1783 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
1784 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs2)))
1785 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs))
1786 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs1))
1787 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs2))
1788 || !flag_associative_math)
1789 && (!NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE (lhs))
1790 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs1))
1791 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs2))))
1792 continue;
1794 /* Check for a subtract used only in an addition. If this
1795 is the case, transform it into add of a negate for better
1796 reassociation. IE transform C = A-B into C = A + -B if C
1797 is only used in an addition. */
1798 if (should_break_up_subtract (stmt))
1799 break_up_subtract (stmt, &gsi);
1802 for (son = first_dom_son (CDI_DOMINATORS, bb);
1803 son;
1804 son = next_dom_son (CDI_DOMINATORS, son))
1805 break_up_subtract_bb (son);
1808 /* Reassociate expressions in basic block BB and its post-dominator as
1809 children. */
1811 static void
1812 reassociate_bb (basic_block bb)
1814 gimple_stmt_iterator gsi;
1815 basic_block son;
1817 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
1819 gimple stmt = gsi_stmt (gsi);
1821 if (is_gimple_assign (stmt))
1823 tree lhs, rhs1, rhs2;
1824 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
1826 /* If this is not a gimple binary expression, there is
1827 nothing for us to do with it. */
1828 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
1829 continue;
1831 /* If this was part of an already processed statement,
1832 we don't need to touch it again. */
1833 if (gimple_visited_p (stmt))
1835 /* This statement might have become dead because of previous
1836 reassociations. */
1837 if (has_zero_uses (gimple_get_lhs (stmt)))
1839 gsi_remove (&gsi, true);
1840 release_defs (stmt);
1841 /* We might end up removing the last stmt above which
1842 places the iterator to the end of the sequence.
1843 Reset it to the last stmt in this case which might
1844 be the end of the sequence as well if we removed
1845 the last statement of the sequence. In which case
1846 we need to bail out. */
1847 if (gsi_end_p (gsi))
1849 gsi = gsi_last_bb (bb);
1850 if (gsi_end_p (gsi))
1851 break;
1854 continue;
1857 lhs = gimple_assign_lhs (stmt);
1858 rhs1 = gimple_assign_rhs1 (stmt);
1859 rhs2 = gimple_assign_rhs2 (stmt);
1861 /* If associative-math we can do reassociation for
1862 non-integral types. Or, we can do reassociation for
1863 non-saturating fixed-point types. */
1864 if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1865 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
1866 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs2)))
1867 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs))
1868 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs1))
1869 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs2))
1870 || !flag_associative_math)
1871 && (!NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE (lhs))
1872 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs1))
1873 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs2))))
1874 continue;
1876 if (associative_tree_code (rhs_code))
1878 VEC(operand_entry_t, heap) *ops = NULL;
1880 /* There may be no immediate uses left by the time we
1881 get here because we may have eliminated them all. */
1882 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
1883 continue;
1885 gimple_set_visited (stmt, true);
1886 linearize_expr_tree (&ops, stmt, true, true);
1887 qsort (VEC_address (operand_entry_t, ops),
1888 VEC_length (operand_entry_t, ops),
1889 sizeof (operand_entry_t),
1890 sort_by_operand_rank);
1891 optimize_ops_list (rhs_code, &ops);
1892 if (undistribute_ops_list (rhs_code, &ops,
1893 loop_containing_stmt (stmt)))
1895 qsort (VEC_address (operand_entry_t, ops),
1896 VEC_length (operand_entry_t, ops),
1897 sizeof (operand_entry_t),
1898 sort_by_operand_rank);
1899 optimize_ops_list (rhs_code, &ops);
1902 if (VEC_length (operand_entry_t, ops) == 1)
1904 if (dump_file && (dump_flags & TDF_DETAILS))
1906 fprintf (dump_file, "Transforming ");
1907 print_gimple_stmt (dump_file, stmt, 0, 0);
1910 rhs1 = gimple_assign_rhs1 (stmt);
1911 gimple_assign_set_rhs_from_tree (&gsi,
1912 VEC_last (operand_entry_t,
1913 ops)->op);
1914 update_stmt (stmt);
1915 remove_visited_stmt_chain (rhs1);
1917 if (dump_file && (dump_flags & TDF_DETAILS))
1919 fprintf (dump_file, " into ");
1920 print_gimple_stmt (dump_file, stmt, 0, 0);
1923 else
1924 rewrite_expr_tree (stmt, 0, ops, false);
1926 VEC_free (operand_entry_t, heap, ops);
1930 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
1931 son;
1932 son = next_dom_son (CDI_POST_DOMINATORS, son))
1933 reassociate_bb (son);
1936 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
1937 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
1939 /* Dump the operand entry vector OPS to FILE. */
1941 void
1942 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
1944 operand_entry_t oe;
1945 unsigned int i;
1947 for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
1949 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
1950 print_generic_expr (file, oe->op, 0);
1954 /* Dump the operand entry vector OPS to STDERR. */
1956 void
1957 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
1959 dump_ops_vector (stderr, ops);
1962 static void
1963 do_reassoc (void)
1965 break_up_subtract_bb (ENTRY_BLOCK_PTR);
1966 reassociate_bb (EXIT_BLOCK_PTR);
1969 /* Initialize the reassociation pass. */
1971 static void
1972 init_reassoc (void)
1974 int i;
1975 long rank = 2;
1976 tree param;
1977 int *bbs = XNEWVEC (int, last_basic_block + 1);
1979 /* Find the loops, so that we can prevent moving calculations in
1980 them. */
1981 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
1983 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
1985 operand_entry_pool = create_alloc_pool ("operand entry pool",
1986 sizeof (struct operand_entry), 30);
1988 /* Reverse RPO (Reverse Post Order) will give us something where
1989 deeper loops come later. */
1990 pre_and_rev_post_order_compute (NULL, bbs, false);
1991 bb_rank = XCNEWVEC (long, last_basic_block + 1);
1992 operand_rank = pointer_map_create ();
1994 /* Give each argument a distinct rank. */
1995 for (param = DECL_ARGUMENTS (current_function_decl);
1996 param;
1997 param = TREE_CHAIN (param))
1999 if (gimple_default_def (cfun, param) != NULL)
2001 tree def = gimple_default_def (cfun, param);
2002 insert_operand_rank (def, ++rank);
2006 /* Give the chain decl a distinct rank. */
2007 if (cfun->static_chain_decl != NULL)
2009 tree def = gimple_default_def (cfun, cfun->static_chain_decl);
2010 if (def != NULL)
2011 insert_operand_rank (def, ++rank);
2014 /* Set up rank for each BB */
2015 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
2016 bb_rank[bbs[i]] = ++rank << 16;
2018 free (bbs);
2019 calculate_dominance_info (CDI_POST_DOMINATORS);
2020 plus_negates = NULL;
2023 /* Cleanup after the reassociation pass, and print stats if
2024 requested. */
2026 static void
2027 fini_reassoc (void)
2029 statistics_counter_event (cfun, "Linearized",
2030 reassociate_stats.linearized);
2031 statistics_counter_event (cfun, "Constants eliminated",
2032 reassociate_stats.constants_eliminated);
2033 statistics_counter_event (cfun, "Ops eliminated",
2034 reassociate_stats.ops_eliminated);
2035 statistics_counter_event (cfun, "Statements rewritten",
2036 reassociate_stats.rewritten);
2038 pointer_map_destroy (operand_rank);
2039 free_alloc_pool (operand_entry_pool);
2040 free (bb_rank);
2041 VEC_free (tree, heap, plus_negates);
2042 free_dominance_info (CDI_POST_DOMINATORS);
2043 loop_optimizer_finalize ();
2046 /* Gate and execute functions for Reassociation. */
2048 static unsigned int
2049 execute_reassoc (void)
2051 init_reassoc ();
2053 do_reassoc ();
2054 repropagate_negates ();
2056 fini_reassoc ();
2057 return 0;
2060 static bool
2061 gate_tree_ssa_reassoc (void)
2063 return flag_tree_reassoc != 0;
2066 struct gimple_opt_pass pass_reassoc =
2069 GIMPLE_PASS,
2070 "reassoc", /* name */
2071 gate_tree_ssa_reassoc, /* gate */
2072 execute_reassoc, /* execute */
2073 NULL, /* sub */
2074 NULL, /* next */
2075 0, /* static_pass_number */
2076 TV_TREE_REASSOC, /* tv_id */
2077 PROP_cfg | PROP_ssa, /* properties_required */
2078 0, /* properties_provided */
2079 0, /* properties_destroyed */
2080 0, /* todo_flags_start */
2081 TODO_dump_func | TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */