* configure.ac (HAVE_GAS_CFI_DIRECTIVE): Always test for assembler
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobe4e7db69d2ea2dadfa1ff0126c77d20dcd1b3ab0
1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "errors.h"
26 #include "ggc.h"
27 #include "tree.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-inline.h"
31 #include "tree-flow.h"
32 #include "gimple.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "alloc-pool.h"
38 #include "vec.h"
39 #include "langhooks.h"
40 #include "pointer-set.h"
41 #include "cfgloop.h"
42 #include "flags.h"
44 /* This is a simple global reassociation pass. It is, in part, based
45 on the LLVM pass of the same name (They do some things more/less
46 than we do, in different orders, etc).
48 It consists of five steps:
50 1. Breaking up subtract operations into addition + negate, where
51 it would promote the reassociation of adds.
53 2. Left linearization of the expression trees, so that (A+B)+(C+D)
54 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
55 During linearization, we place the operands of the binary
56 expressions into a vector of operand_entry_t
58 3. Optimization of the operand lists, eliminating things like a +
59 -a, a & a, etc.
61 4. Rewrite the expression trees we linearized and optimized so
62 they are in proper rank order.
64 5. Repropagate negates, as nothing else will clean it up ATM.
66 A bit of theory on #4, since nobody seems to write anything down
67 about why it makes sense to do it the way they do it:
69 We could do this much nicer theoretically, but don't (for reasons
70 explained after how to do it theoretically nice :P).
72 In order to promote the most redundancy elimination, you want
73 binary expressions whose operands are the same rank (or
74 preferably, the same value) exposed to the redundancy eliminator,
75 for possible elimination.
77 So the way to do this if we really cared, is to build the new op
78 tree from the leaves to the roots, merging as you go, and putting the
79 new op on the end of the worklist, until you are left with one
80 thing on the worklist.
82 IE if you have to rewrite the following set of operands (listed with
83 rank in parentheses), with opcode PLUS_EXPR:
85 a (1), b (1), c (1), d (2), e (2)
88 We start with our merge worklist empty, and the ops list with all of
89 those on it.
91 You want to first merge all leaves of the same rank, as much as
92 possible.
94 So first build a binary op of
96 mergetmp = a + b, and put "mergetmp" on the merge worklist.
98 Because there is no three operand form of PLUS_EXPR, c is not going to
99 be exposed to redundancy elimination as a rank 1 operand.
101 So you might as well throw it on the merge worklist (you could also
102 consider it to now be a rank two operand, and merge it with d and e,
103 but in this case, you then have evicted e from a binary op. So at
104 least in this situation, you can't win.)
106 Then build a binary op of d + e
107 mergetmp2 = d + e
109 and put mergetmp2 on the merge worklist.
111 so merge worklist = {mergetmp, c, mergetmp2}
113 Continue building binary ops of these operations until you have only
114 one operation left on the worklist.
116 So we have
118 build binary op
119 mergetmp3 = mergetmp + c
121 worklist = {mergetmp2, mergetmp3}
123 mergetmp4 = mergetmp2 + mergetmp3
125 worklist = {mergetmp4}
127 because we have one operation left, we can now just set the original
128 statement equal to the result of that operation.
130 This will at least expose a + b and d + e to redundancy elimination
131 as binary operations.
133 For extra points, you can reuse the old statements to build the
134 mergetmps, since you shouldn't run out.
136 So why don't we do this?
138 Because it's expensive, and rarely will help. Most trees we are
139 reassociating have 3 or less ops. If they have 2 ops, they already
140 will be written into a nice single binary op. If you have 3 ops, a
141 single simple check suffices to tell you whether the first two are of the
142 same rank. If so, you know to order it
144 mergetmp = op1 + op2
145 newstmt = mergetmp + op3
147 instead of
148 mergetmp = op2 + op3
149 newstmt = mergetmp + op1
151 If all three are of the same rank, you can't expose them all in a
152 single binary operator anyway, so the above is *still* the best you
153 can do.
155 Thus, this is what we do. When we have three ops left, we check to see
156 what order to put them in, and call it a day. As a nod to vector sum
157 reduction, we check if any of ops are a really a phi node that is a
158 destructive update for the associating op, and keep the destructive
159 update together for vector sum reduction recognition. */
162 /* Statistics */
163 static struct
165 int linearized;
166 int constants_eliminated;
167 int ops_eliminated;
168 int rewritten;
169 } reassociate_stats;
171 /* Operator, rank pair. */
172 typedef struct operand_entry
174 unsigned int rank;
175 tree op;
176 } *operand_entry_t;
178 static alloc_pool operand_entry_pool;
181 /* Starting rank number for a given basic block, so that we can rank
182 operations using unmovable instructions in that BB based on the bb
183 depth. */
184 static long *bb_rank;
186 /* Operand->rank hashtable. */
187 static struct pointer_map_t *operand_rank;
190 /* Look up the operand rank structure for expression E. */
192 static inline long
193 find_operand_rank (tree e)
195 void **slot = pointer_map_contains (operand_rank, e);
196 return slot ? (long) *slot : -1;
199 /* Insert {E,RANK} into the operand rank hashtable. */
201 static inline void
202 insert_operand_rank (tree e, long rank)
204 void **slot;
205 gcc_assert (rank > 0);
206 slot = pointer_map_insert (operand_rank, e);
207 gcc_assert (!*slot);
208 *slot = (void *) rank;
211 /* Given an expression E, return the rank of the expression. */
213 static long
214 get_rank (tree e)
216 /* Constants have rank 0. */
217 if (is_gimple_min_invariant (e))
218 return 0;
220 /* SSA_NAME's have the rank of the expression they are the result
222 For globals and uninitialized values, the rank is 0.
223 For function arguments, use the pre-setup rank.
224 For PHI nodes, stores, asm statements, etc, we use the rank of
225 the BB.
226 For simple operations, the rank is the maximum rank of any of
227 its operands, or the bb_rank, whichever is less.
228 I make no claims that this is optimal, however, it gives good
229 results. */
231 if (TREE_CODE (e) == SSA_NAME)
233 gimple stmt;
234 long rank, maxrank;
235 int i, n;
237 if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
238 && SSA_NAME_IS_DEFAULT_DEF (e))
239 return find_operand_rank (e);
241 stmt = SSA_NAME_DEF_STMT (e);
242 if (gimple_bb (stmt) == NULL)
243 return 0;
245 if (!is_gimple_assign (stmt)
246 || !ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
247 return bb_rank[gimple_bb (stmt)->index];
249 /* If we already have a rank for this expression, use that. */
250 rank = find_operand_rank (e);
251 if (rank != -1)
252 return rank;
254 /* Otherwise, find the maximum rank for the operands, or the bb
255 rank, whichever is less. */
256 rank = 0;
257 maxrank = bb_rank[gimple_bb(stmt)->index];
258 if (gimple_assign_single_p (stmt))
260 tree rhs = gimple_assign_rhs1 (stmt);
261 n = TREE_OPERAND_LENGTH (rhs);
262 if (n == 0)
263 rank = MAX (rank, get_rank (rhs));
264 else
266 for (i = 0;
267 i < n && TREE_OPERAND (rhs, i) && rank != maxrank; i++)
268 rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i)));
271 else
273 n = gimple_num_ops (stmt);
274 for (i = 1; i < n && rank != maxrank; i++)
276 gcc_assert (gimple_op (stmt, i));
277 rank = MAX(rank, get_rank (gimple_op (stmt, i)));
281 if (dump_file && (dump_flags & TDF_DETAILS))
283 fprintf (dump_file, "Rank for ");
284 print_generic_expr (dump_file, e, 0);
285 fprintf (dump_file, " is %ld\n", (rank + 1));
288 /* Note the rank in the hashtable so we don't recompute it. */
289 insert_operand_rank (e, (rank + 1));
290 return (rank + 1);
293 /* Globals, etc, are rank 0 */
294 return 0;
297 DEF_VEC_P(operand_entry_t);
298 DEF_VEC_ALLOC_P(operand_entry_t, heap);
300 /* We want integer ones to end up last no matter what, since they are
301 the ones we can do the most with. */
302 #define INTEGER_CONST_TYPE 1 << 3
303 #define FLOAT_CONST_TYPE 1 << 2
304 #define OTHER_CONST_TYPE 1 << 1
306 /* Classify an invariant tree into integer, float, or other, so that
307 we can sort them to be near other constants of the same type. */
308 static inline int
309 constant_type (tree t)
311 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
312 return INTEGER_CONST_TYPE;
313 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
314 return FLOAT_CONST_TYPE;
315 else
316 return OTHER_CONST_TYPE;
319 /* qsort comparison function to sort operand entries PA and PB by rank
320 so that the sorted array is ordered by rank in decreasing order. */
321 static int
322 sort_by_operand_rank (const void *pa, const void *pb)
324 const operand_entry_t oea = *(const operand_entry_t *)pa;
325 const operand_entry_t oeb = *(const operand_entry_t *)pb;
327 /* It's nicer for optimize_expression if constants that are likely
328 to fold when added/multiplied//whatever are put next to each
329 other. Since all constants have rank 0, order them by type. */
330 if (oeb->rank == 0 && oea->rank == 0)
331 return constant_type (oeb->op) - constant_type (oea->op);
333 /* Lastly, make sure the versions that are the same go next to each
334 other. We use SSA_NAME_VERSION because it's stable. */
335 if ((oeb->rank - oea->rank == 0)
336 && TREE_CODE (oea->op) == SSA_NAME
337 && TREE_CODE (oeb->op) == SSA_NAME)
338 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
340 return oeb->rank - oea->rank;
343 /* Add an operand entry to *OPS for the tree operand OP. */
345 static void
346 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
348 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
350 oe->op = op;
351 oe->rank = get_rank (op);
352 VEC_safe_push (operand_entry_t, heap, *ops, oe);
355 /* Return true if STMT is reassociable operation containing a binary
356 operation with tree code CODE, and is inside LOOP. */
358 static bool
359 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
361 basic_block bb = gimple_bb (stmt);
363 if (gimple_bb (stmt) == NULL)
364 return false;
366 if (!flow_bb_inside_loop_p (loop, bb))
367 return false;
369 if (is_gimple_assign (stmt)
370 && gimple_assign_rhs_code (stmt) == code
371 && has_single_use (gimple_assign_lhs (stmt)))
372 return true;
374 return false;
378 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
379 operand of the negate operation. Otherwise, return NULL. */
381 static tree
382 get_unary_op (tree name, enum tree_code opcode)
384 gimple stmt = SSA_NAME_DEF_STMT (name);
386 if (!is_gimple_assign (stmt))
387 return NULL_TREE;
389 if (gimple_assign_rhs_code (stmt) == opcode)
390 return gimple_assign_rhs1 (stmt);
391 return NULL_TREE;
394 /* If CURR and LAST are a pair of ops that OPCODE allows us to
395 eliminate through equivalences, do so, remove them from OPS, and
396 return true. Otherwise, return false. */
398 static bool
399 eliminate_duplicate_pair (enum tree_code opcode,
400 VEC (operand_entry_t, heap) **ops,
401 bool *all_done,
402 unsigned int i,
403 operand_entry_t curr,
404 operand_entry_t last)
407 /* If we have two of the same op, and the opcode is & |, min, or max,
408 we can eliminate one of them.
409 If we have two of the same op, and the opcode is ^, we can
410 eliminate both of them. */
412 if (last && last->op == curr->op)
414 switch (opcode)
416 case MAX_EXPR:
417 case MIN_EXPR:
418 case BIT_IOR_EXPR:
419 case BIT_AND_EXPR:
420 if (dump_file && (dump_flags & TDF_DETAILS))
422 fprintf (dump_file, "Equivalence: ");
423 print_generic_expr (dump_file, curr->op, 0);
424 fprintf (dump_file, " [&|minmax] ");
425 print_generic_expr (dump_file, last->op, 0);
426 fprintf (dump_file, " -> ");
427 print_generic_stmt (dump_file, last->op, 0);
430 VEC_ordered_remove (operand_entry_t, *ops, i);
431 reassociate_stats.ops_eliminated ++;
433 return true;
435 case BIT_XOR_EXPR:
436 if (dump_file && (dump_flags & TDF_DETAILS))
438 fprintf (dump_file, "Equivalence: ");
439 print_generic_expr (dump_file, curr->op, 0);
440 fprintf (dump_file, " ^ ");
441 print_generic_expr (dump_file, last->op, 0);
442 fprintf (dump_file, " -> nothing\n");
445 reassociate_stats.ops_eliminated += 2;
447 if (VEC_length (operand_entry_t, *ops) == 2)
449 VEC_free (operand_entry_t, heap, *ops);
450 *ops = NULL;
451 add_to_ops_vec (ops, fold_convert (TREE_TYPE (last->op),
452 integer_zero_node));
453 *all_done = true;
455 else
457 VEC_ordered_remove (operand_entry_t, *ops, i-1);
458 VEC_ordered_remove (operand_entry_t, *ops, i-1);
461 return true;
463 default:
464 break;
467 return false;
470 /* If OPCODE is PLUS_EXPR, CURR->OP is really a negate expression,
471 look in OPS for a corresponding positive operation to cancel it
472 out. If we find one, remove the other from OPS, replace
473 OPS[CURRINDEX] with 0, and return true. Otherwise, return
474 false. */
476 static bool
477 eliminate_plus_minus_pair (enum tree_code opcode,
478 VEC (operand_entry_t, heap) **ops,
479 unsigned int currindex,
480 operand_entry_t curr)
482 tree negateop;
483 unsigned int i;
484 operand_entry_t oe;
486 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
487 return false;
489 negateop = get_unary_op (curr->op, NEGATE_EXPR);
490 if (negateop == NULL_TREE)
491 return false;
493 /* Any non-negated version will have a rank that is one less than
494 the current rank. So once we hit those ranks, if we don't find
495 one, we can stop. */
497 for (i = currindex + 1;
498 VEC_iterate (operand_entry_t, *ops, i, oe)
499 && oe->rank >= curr->rank - 1 ;
500 i++)
502 if (oe->op == negateop)
505 if (dump_file && (dump_flags & TDF_DETAILS))
507 fprintf (dump_file, "Equivalence: ");
508 print_generic_expr (dump_file, negateop, 0);
509 fprintf (dump_file, " + -");
510 print_generic_expr (dump_file, oe->op, 0);
511 fprintf (dump_file, " -> 0\n");
514 VEC_ordered_remove (operand_entry_t, *ops, i);
515 add_to_ops_vec (ops, fold_convert(TREE_TYPE (oe->op),
516 integer_zero_node));
517 VEC_ordered_remove (operand_entry_t, *ops, currindex);
518 reassociate_stats.ops_eliminated ++;
520 return true;
524 return false;
527 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
528 bitwise not expression, look in OPS for a corresponding operand to
529 cancel it out. If we find one, remove the other from OPS, replace
530 OPS[CURRINDEX] with 0, and return true. Otherwise, return
531 false. */
533 static bool
534 eliminate_not_pairs (enum tree_code opcode,
535 VEC (operand_entry_t, heap) **ops,
536 unsigned int currindex,
537 operand_entry_t curr)
539 tree notop;
540 unsigned int i;
541 operand_entry_t oe;
543 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
544 || TREE_CODE (curr->op) != SSA_NAME)
545 return false;
547 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
548 if (notop == NULL_TREE)
549 return false;
551 /* Any non-not version will have a rank that is one less than
552 the current rank. So once we hit those ranks, if we don't find
553 one, we can stop. */
555 for (i = currindex + 1;
556 VEC_iterate (operand_entry_t, *ops, i, oe)
557 && oe->rank >= curr->rank - 1;
558 i++)
560 if (oe->op == notop)
562 if (dump_file && (dump_flags & TDF_DETAILS))
564 fprintf (dump_file, "Equivalence: ");
565 print_generic_expr (dump_file, notop, 0);
566 if (opcode == BIT_AND_EXPR)
567 fprintf (dump_file, " & ~");
568 else if (opcode == BIT_IOR_EXPR)
569 fprintf (dump_file, " | ~");
570 print_generic_expr (dump_file, oe->op, 0);
571 if (opcode == BIT_AND_EXPR)
572 fprintf (dump_file, " -> 0\n");
573 else if (opcode == BIT_IOR_EXPR)
574 fprintf (dump_file, " -> -1\n");
577 if (opcode == BIT_AND_EXPR)
578 oe->op = fold_convert (TREE_TYPE (oe->op), integer_zero_node);
579 else if (opcode == BIT_IOR_EXPR)
580 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
581 TYPE_PRECISION (TREE_TYPE (oe->op)));
583 reassociate_stats.ops_eliminated
584 += VEC_length (operand_entry_t, *ops) - 1;
585 VEC_free (operand_entry_t, heap, *ops);
586 *ops = NULL;
587 VEC_safe_push (operand_entry_t, heap, *ops, oe);
588 return true;
592 return false;
595 /* Use constant value that may be present in OPS to try to eliminate
596 operands. Note that this function is only really used when we've
597 eliminated ops for other reasons, or merged constants. Across
598 single statements, fold already does all of this, plus more. There
599 is little point in duplicating logic, so I've only included the
600 identities that I could ever construct testcases to trigger. */
602 static void
603 eliminate_using_constants (enum tree_code opcode,
604 VEC(operand_entry_t, heap) **ops)
606 operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
607 tree type = TREE_TYPE (oelast->op);
609 if (oelast->rank == 0
610 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
612 switch (opcode)
614 case BIT_AND_EXPR:
615 if (integer_zerop (oelast->op))
617 if (VEC_length (operand_entry_t, *ops) != 1)
619 if (dump_file && (dump_flags & TDF_DETAILS))
620 fprintf (dump_file, "Found & 0, removing all other ops\n");
622 reassociate_stats.ops_eliminated
623 += VEC_length (operand_entry_t, *ops) - 1;
625 VEC_free (operand_entry_t, heap, *ops);
626 *ops = NULL;
627 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
628 return;
631 else if (integer_all_onesp (oelast->op))
633 if (VEC_length (operand_entry_t, *ops) != 1)
635 if (dump_file && (dump_flags & TDF_DETAILS))
636 fprintf (dump_file, "Found & -1, removing\n");
637 VEC_pop (operand_entry_t, *ops);
638 reassociate_stats.ops_eliminated++;
641 break;
642 case BIT_IOR_EXPR:
643 if (integer_all_onesp (oelast->op))
645 if (VEC_length (operand_entry_t, *ops) != 1)
647 if (dump_file && (dump_flags & TDF_DETAILS))
648 fprintf (dump_file, "Found | -1, removing all other ops\n");
650 reassociate_stats.ops_eliminated
651 += VEC_length (operand_entry_t, *ops) - 1;
653 VEC_free (operand_entry_t, heap, *ops);
654 *ops = NULL;
655 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
656 return;
659 else if (integer_zerop (oelast->op))
661 if (VEC_length (operand_entry_t, *ops) != 1)
663 if (dump_file && (dump_flags & TDF_DETAILS))
664 fprintf (dump_file, "Found | 0, removing\n");
665 VEC_pop (operand_entry_t, *ops);
666 reassociate_stats.ops_eliminated++;
669 break;
670 case MULT_EXPR:
671 if (integer_zerop (oelast->op)
672 || (FLOAT_TYPE_P (type)
673 && !HONOR_NANS (TYPE_MODE (type))
674 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
675 && real_zerop (oelast->op)))
677 if (VEC_length (operand_entry_t, *ops) != 1)
679 if (dump_file && (dump_flags & TDF_DETAILS))
680 fprintf (dump_file, "Found * 0, removing all other ops\n");
682 reassociate_stats.ops_eliminated
683 += VEC_length (operand_entry_t, *ops) - 1;
684 VEC_free (operand_entry_t, heap, *ops);
685 *ops = NULL;
686 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
687 return;
690 else if (integer_onep (oelast->op)
691 || (FLOAT_TYPE_P (type)
692 && !HONOR_SNANS (TYPE_MODE (type))
693 && real_onep (oelast->op)))
695 if (VEC_length (operand_entry_t, *ops) != 1)
697 if (dump_file && (dump_flags & TDF_DETAILS))
698 fprintf (dump_file, "Found * 1, removing\n");
699 VEC_pop (operand_entry_t, *ops);
700 reassociate_stats.ops_eliminated++;
701 return;
704 break;
705 case BIT_XOR_EXPR:
706 case PLUS_EXPR:
707 case MINUS_EXPR:
708 if (integer_zerop (oelast->op)
709 || (FLOAT_TYPE_P (type)
710 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
711 && fold_real_zero_addition_p (type, oelast->op,
712 opcode == MINUS_EXPR)))
714 if (VEC_length (operand_entry_t, *ops) != 1)
716 if (dump_file && (dump_flags & TDF_DETAILS))
717 fprintf (dump_file, "Found [|^+] 0, removing\n");
718 VEC_pop (operand_entry_t, *ops);
719 reassociate_stats.ops_eliminated++;
720 return;
723 break;
724 default:
725 break;
731 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
732 bool, bool);
734 /* Structure for tracking and counting operands. */
735 typedef struct oecount_s {
736 int cnt;
737 enum tree_code oecode;
738 tree op;
739 } oecount;
741 DEF_VEC_O(oecount);
742 DEF_VEC_ALLOC_O(oecount,heap);
744 /* The heap for the oecount hashtable and the sorted list of operands. */
745 static VEC (oecount, heap) *cvec;
747 /* Hash function for oecount. */
749 static hashval_t
750 oecount_hash (const void *p)
752 const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
753 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
756 /* Comparison function for oecount. */
758 static int
759 oecount_eq (const void *p1, const void *p2)
761 const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
762 const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
763 return (c1->oecode == c2->oecode
764 && c1->op == c2->op);
767 /* Comparison function for qsort sorting oecount elements by count. */
769 static int
770 oecount_cmp (const void *p1, const void *p2)
772 const oecount *c1 = (const oecount *)p1;
773 const oecount *c2 = (const oecount *)p2;
774 return c1->cnt - c2->cnt;
777 /* Walks the linear chain with result *DEF searching for an operation
778 with operand OP and code OPCODE removing that from the chain. *DEF
779 is updated if there is only one operand but no operation left. */
781 static void
782 zero_one_operation (tree *def, enum tree_code opcode, tree op)
784 gimple stmt = SSA_NAME_DEF_STMT (*def);
788 tree name = gimple_assign_rhs1 (stmt);
790 /* If this is the operation we look for and one of the operands
791 is ours simply propagate the other operand into the stmts
792 single use. */
793 if (gimple_assign_rhs_code (stmt) == opcode
794 && (name == op
795 || gimple_assign_rhs2 (stmt) == op))
797 gimple use_stmt;
798 use_operand_p use;
799 gimple_stmt_iterator gsi;
800 if (name == op)
801 name = gimple_assign_rhs2 (stmt);
802 gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
803 single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
804 if (gimple_assign_lhs (stmt) == *def)
805 *def = name;
806 SET_USE (use, name);
807 if (TREE_CODE (name) != SSA_NAME)
808 update_stmt (use_stmt);
809 gsi = gsi_for_stmt (stmt);
810 gsi_remove (&gsi, true);
811 release_defs (stmt);
812 return;
815 /* Continue walking the chain. */
816 gcc_assert (name != op
817 && TREE_CODE (name) == SSA_NAME);
818 stmt = SSA_NAME_DEF_STMT (name);
820 while (1);
823 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
824 the result. Places the statement after the definition of either
825 OP1 or OP2. Returns the new statement. */
827 static gimple
828 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
830 gimple op1def = NULL, op2def = NULL;
831 gimple_stmt_iterator gsi;
832 tree op;
833 gimple sum;
835 /* Create the addition statement. */
836 sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
837 op = make_ssa_name (tmpvar, sum);
838 gimple_assign_set_lhs (sum, op);
840 /* Find an insertion place and insert. */
841 if (TREE_CODE (op1) == SSA_NAME)
842 op1def = SSA_NAME_DEF_STMT (op1);
843 if (TREE_CODE (op2) == SSA_NAME)
844 op2def = SSA_NAME_DEF_STMT (op2);
845 if ((!op1def || gimple_nop_p (op1def))
846 && (!op2def || gimple_nop_p (op2def)))
848 gsi = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR));
849 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
851 else if ((!op1def || gimple_nop_p (op1def))
852 || (op2def && !gimple_nop_p (op2def)
853 && stmt_dominates_stmt_p (op1def, op2def)))
855 if (gimple_code (op2def) == GIMPLE_PHI)
857 gsi = gsi_start_bb (gimple_bb (op2def));
858 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
860 else
862 gsi = gsi_for_stmt (op2def);
863 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
866 else
868 if (gimple_code (op1def) == GIMPLE_PHI)
870 gsi = gsi_start_bb (gimple_bb (op1def));
871 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
873 else
875 gsi = gsi_for_stmt (op1def);
876 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
879 update_stmt (sum);
881 return sum;
884 /* Perform un-distribution of divisions and multiplications.
885 A * X + B * X is transformed into (A + B) * X and A / X + B / X
886 to (A + B) / X for real X.
888 The algorithm is organized as follows.
890 - First we walk the addition chain *OPS looking for summands that
891 are defined by a multiplication or a real division. This results
892 in the candidates bitmap with relevant indices into *OPS.
894 - Second we build the chains of multiplications or divisions for
895 these candidates, counting the number of occurences of (operand, code)
896 pairs in all of the candidates chains.
898 - Third we sort the (operand, code) pairs by number of occurence and
899 process them starting with the pair with the most uses.
901 * For each such pair we walk the candidates again to build a
902 second candidate bitmap noting all multiplication/division chains
903 that have at least one occurence of (operand, code).
905 * We build an alternate addition chain only covering these
906 candidates with one (operand, code) operation removed from their
907 multiplication/division chain.
909 * The first candidate gets replaced by the alternate addition chain
910 multiplied/divided by the operand.
912 * All candidate chains get disabled for further processing and
913 processing of (operand, code) pairs continues.
915 The alternate addition chains built are re-processed by the main
916 reassociation algorithm which allows optimizing a * x * y + b * y * x
917 to (a + b ) * x * y in one invocation of the reassociation pass. */
919 static bool
920 undistribute_ops_list (enum tree_code opcode,
921 VEC (operand_entry_t, heap) **ops, struct loop *loop)
923 unsigned int length = VEC_length (operand_entry_t, *ops);
924 operand_entry_t oe1;
925 unsigned i, j;
926 sbitmap candidates, candidates2;
927 unsigned nr_candidates, nr_candidates2;
928 sbitmap_iterator sbi0;
929 VEC (operand_entry_t, heap) **subops;
930 htab_t ctable;
931 bool changed = false;
933 if (length <= 1
934 || opcode != PLUS_EXPR)
935 return false;
937 /* Build a list of candidates to process. */
938 candidates = sbitmap_alloc (length);
939 sbitmap_zero (candidates);
940 nr_candidates = 0;
941 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe1); ++i)
943 enum tree_code dcode;
944 gimple oe1def;
946 if (TREE_CODE (oe1->op) != SSA_NAME)
947 continue;
948 oe1def = SSA_NAME_DEF_STMT (oe1->op);
949 if (!is_gimple_assign (oe1def))
950 continue;
951 dcode = gimple_assign_rhs_code (oe1def);
952 if ((dcode != MULT_EXPR
953 && dcode != RDIV_EXPR)
954 || !is_reassociable_op (oe1def, dcode, loop))
955 continue;
957 SET_BIT (candidates, i);
958 nr_candidates++;
961 if (nr_candidates < 2)
963 sbitmap_free (candidates);
964 return false;
967 if (dump_file && (dump_flags & TDF_DETAILS))
969 fprintf (dump_file, "searching for un-distribute opportunities ");
970 print_generic_expr (dump_file,
971 VEC_index (operand_entry_t, *ops,
972 sbitmap_first_set_bit (candidates))->op, 0);
973 fprintf (dump_file, " %d\n", nr_candidates);
976 /* Build linearized sub-operand lists and the counting table. */
977 cvec = NULL;
978 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
979 subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
980 VEC_length (operand_entry_t, *ops));
981 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
983 gimple oedef;
984 enum tree_code oecode;
985 unsigned j;
987 oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
988 oecode = gimple_assign_rhs_code (oedef);
989 linearize_expr_tree (&subops[i], oedef,
990 associative_tree_code (oecode), false);
992 for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
994 oecount c;
995 void **slot;
996 size_t idx;
997 c.oecode = oecode;
998 c.cnt = 1;
999 c.op = oe1->op;
1000 VEC_safe_push (oecount, heap, cvec, &c);
1001 idx = VEC_length (oecount, cvec) + 41;
1002 slot = htab_find_slot (ctable, (void *)idx, INSERT);
1003 if (!*slot)
1005 *slot = (void *)idx;
1007 else
1009 VEC_pop (oecount, cvec);
1010 VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1014 htab_delete (ctable);
1016 /* Sort the counting table. */
1017 qsort (VEC_address (oecount, cvec), VEC_length (oecount, cvec),
1018 sizeof (oecount), oecount_cmp);
1020 if (dump_file && (dump_flags & TDF_DETAILS))
1022 oecount *c;
1023 fprintf (dump_file, "Candidates:\n");
1024 for (j = 0; VEC_iterate (oecount, cvec, j, c); ++j)
1026 fprintf (dump_file, " %u %s: ", c->cnt,
1027 c->oecode == MULT_EXPR
1028 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1029 print_generic_expr (dump_file, c->op, 0);
1030 fprintf (dump_file, "\n");
1034 /* Process the (operand, code) pairs in order of most occurence. */
1035 candidates2 = sbitmap_alloc (length);
1036 while (!VEC_empty (oecount, cvec))
1038 oecount *c = VEC_last (oecount, cvec);
1039 if (c->cnt < 2)
1040 break;
1042 /* Now collect the operands in the outer chain that contain
1043 the common operand in their inner chain. */
1044 sbitmap_zero (candidates2);
1045 nr_candidates2 = 0;
1046 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1048 gimple oedef;
1049 enum tree_code oecode;
1050 unsigned j;
1051 tree op = VEC_index (operand_entry_t, *ops, i)->op;
1053 /* If we undistributed in this chain already this may be
1054 a constant. */
1055 if (TREE_CODE (op) != SSA_NAME)
1056 continue;
1058 oedef = SSA_NAME_DEF_STMT (op);
1059 oecode = gimple_assign_rhs_code (oedef);
1060 if (oecode != c->oecode)
1061 continue;
1063 for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
1065 if (oe1->op == c->op)
1067 SET_BIT (candidates2, i);
1068 ++nr_candidates2;
1069 break;
1074 if (nr_candidates2 >= 2)
1076 operand_entry_t oe1, oe2;
1077 tree tmpvar;
1078 gimple prod;
1079 int first = sbitmap_first_set_bit (candidates2);
1081 /* Build the new addition chain. */
1082 oe1 = VEC_index (operand_entry_t, *ops, first);
1083 if (dump_file && (dump_flags & TDF_DETAILS))
1085 fprintf (dump_file, "Building (");
1086 print_generic_expr (dump_file, oe1->op, 0);
1088 tmpvar = create_tmp_var (TREE_TYPE (oe1->op), NULL);
1089 add_referenced_var (tmpvar);
1090 zero_one_operation (&oe1->op, c->oecode, c->op);
1091 EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1093 gimple sum;
1094 oe2 = VEC_index (operand_entry_t, *ops, i);
1095 if (dump_file && (dump_flags & TDF_DETAILS))
1097 fprintf (dump_file, " + ");
1098 print_generic_expr (dump_file, oe2->op, 0);
1100 zero_one_operation (&oe2->op, c->oecode, c->op);
1101 sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1102 oe2->op = fold_convert (TREE_TYPE (oe2->op), integer_zero_node);
1103 oe2->rank = 0;
1104 oe1->op = gimple_get_lhs (sum);
1107 /* Apply the multiplication/division. */
1108 prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1109 if (dump_file && (dump_flags & TDF_DETAILS))
1111 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1112 print_generic_expr (dump_file, c->op, 0);
1113 fprintf (dump_file, "\n");
1116 /* Record it in the addition chain and disable further
1117 undistribution with this op. */
1118 oe1->op = gimple_assign_lhs (prod);
1119 oe1->rank = get_rank (oe1->op);
1120 VEC_free (operand_entry_t, heap, subops[first]);
1122 changed = true;
1125 VEC_pop (oecount, cvec);
1128 for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1129 VEC_free (operand_entry_t, heap, subops[i]);
1130 free (subops);
1131 VEC_free (oecount, heap, cvec);
1132 sbitmap_free (candidates);
1133 sbitmap_free (candidates2);
1135 return changed;
1139 /* Perform various identities and other optimizations on the list of
1140 operand entries, stored in OPS. The tree code for the binary
1141 operation between all the operands is OPCODE. */
1143 static void
1144 optimize_ops_list (enum tree_code opcode,
1145 VEC (operand_entry_t, heap) **ops)
1147 unsigned int length = VEC_length (operand_entry_t, *ops);
1148 unsigned int i;
1149 operand_entry_t oe;
1150 operand_entry_t oelast = NULL;
1151 bool iterate = false;
1153 if (length == 1)
1154 return;
1156 oelast = VEC_last (operand_entry_t, *ops);
1158 /* If the last two are constants, pop the constants off, merge them
1159 and try the next two. */
1160 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1162 operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1164 if (oelm1->rank == 0
1165 && is_gimple_min_invariant (oelm1->op)
1166 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1167 TREE_TYPE (oelast->op)))
1169 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1170 oelm1->op, oelast->op);
1172 if (folded && is_gimple_min_invariant (folded))
1174 if (dump_file && (dump_flags & TDF_DETAILS))
1175 fprintf (dump_file, "Merging constants\n");
1177 VEC_pop (operand_entry_t, *ops);
1178 VEC_pop (operand_entry_t, *ops);
1180 add_to_ops_vec (ops, folded);
1181 reassociate_stats.constants_eliminated++;
1183 optimize_ops_list (opcode, ops);
1184 return;
1189 eliminate_using_constants (opcode, ops);
1190 oelast = NULL;
1192 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1194 bool done = false;
1196 if (eliminate_not_pairs (opcode, ops, i, oe))
1197 return;
1198 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1199 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)))
1201 if (done)
1202 return;
1203 iterate = true;
1204 oelast = NULL;
1205 continue;
1207 oelast = oe;
1208 i++;
1211 length = VEC_length (operand_entry_t, *ops);
1212 oelast = VEC_last (operand_entry_t, *ops);
1214 if (iterate)
1215 optimize_ops_list (opcode, ops);
1218 /* Return true if OPERAND is defined by a PHI node which uses the LHS
1219 of STMT in it's operands. This is also known as a "destructive
1220 update" operation. */
1222 static bool
1223 is_phi_for_stmt (gimple stmt, tree operand)
1225 gimple def_stmt;
1226 tree lhs;
1227 use_operand_p arg_p;
1228 ssa_op_iter i;
1230 if (TREE_CODE (operand) != SSA_NAME)
1231 return false;
1233 lhs = gimple_assign_lhs (stmt);
1235 def_stmt = SSA_NAME_DEF_STMT (operand);
1236 if (gimple_code (def_stmt) != GIMPLE_PHI)
1237 return false;
1239 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
1240 if (lhs == USE_FROM_PTR (arg_p))
1241 return true;
1242 return false;
1245 /* Recursively rewrite our linearized statements so that the operators
1246 match those in OPS[OPINDEX], putting the computation in rank
1247 order. */
1249 static void
1250 rewrite_expr_tree (gimple stmt, unsigned int opindex,
1251 VEC(operand_entry_t, heap) * ops)
1253 tree rhs1 = gimple_assign_rhs1 (stmt);
1254 tree rhs2 = gimple_assign_rhs2 (stmt);
1255 operand_entry_t oe;
1257 /* If we have three operands left, then we want to make sure the one
1258 that gets the double binary op are the ones with the same rank.
1260 The alternative we try is to see if this is a destructive
1261 update style statement, which is like:
1262 b = phi (a, ...)
1263 a = c + b;
1264 In that case, we want to use the destructive update form to
1265 expose the possible vectorizer sum reduction opportunity.
1266 In that case, the third operand will be the phi node.
1268 We could, of course, try to be better as noted above, and do a
1269 lot of work to try to find these opportunities in >3 operand
1270 cases, but it is unlikely to be worth it. */
1271 if (opindex + 3 == VEC_length (operand_entry_t, ops))
1273 operand_entry_t oe1, oe2, oe3;
1275 oe1 = VEC_index (operand_entry_t, ops, opindex);
1276 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
1277 oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
1279 if ((oe1->rank == oe2->rank
1280 && oe2->rank != oe3->rank)
1281 || (is_phi_for_stmt (stmt, oe3->op)
1282 && !is_phi_for_stmt (stmt, oe1->op)
1283 && !is_phi_for_stmt (stmt, oe2->op)))
1285 struct operand_entry temp = *oe3;
1286 oe3->op = oe1->op;
1287 oe3->rank = oe1->rank;
1288 oe1->op = temp.op;
1289 oe1->rank= temp.rank;
1291 else if ((oe1->rank == oe3->rank
1292 && oe2->rank != oe3->rank)
1293 || (is_phi_for_stmt (stmt, oe2->op)
1294 && !is_phi_for_stmt (stmt, oe1->op)
1295 && !is_phi_for_stmt (stmt, oe3->op)))
1297 struct operand_entry temp = *oe2;
1298 oe2->op = oe1->op;
1299 oe2->rank = oe1->rank;
1300 oe1->op = temp.op;
1301 oe1->rank= temp.rank;
1305 /* The final recursion case for this function is that you have
1306 exactly two operations left.
1307 If we had one exactly one op in the entire list to start with, we
1308 would have never called this function, and the tail recursion
1309 rewrites them one at a time. */
1310 if (opindex + 2 == VEC_length (operand_entry_t, ops))
1312 operand_entry_t oe1, oe2;
1314 oe1 = VEC_index (operand_entry_t, ops, opindex);
1315 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
1317 if (rhs1 != oe1->op || rhs2 != oe2->op)
1319 if (dump_file && (dump_flags & TDF_DETAILS))
1321 fprintf (dump_file, "Transforming ");
1322 print_gimple_stmt (dump_file, stmt, 0, 0);
1325 gimple_assign_set_rhs1 (stmt, oe1->op);
1326 gimple_assign_set_rhs2 (stmt, oe2->op);
1327 update_stmt (stmt);
1329 if (dump_file && (dump_flags & TDF_DETAILS))
1331 fprintf (dump_file, " into ");
1332 print_gimple_stmt (dump_file, stmt, 0, 0);
1336 return;
1339 /* If we hit here, we should have 3 or more ops left. */
1340 gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
1342 /* Rewrite the next operator. */
1343 oe = VEC_index (operand_entry_t, ops, opindex);
1345 if (oe->op != rhs2)
1348 if (dump_file && (dump_flags & TDF_DETAILS))
1350 fprintf (dump_file, "Transforming ");
1351 print_gimple_stmt (dump_file, stmt, 0, 0);
1354 gimple_assign_set_rhs2 (stmt, oe->op);
1355 update_stmt (stmt);
1357 if (dump_file && (dump_flags & TDF_DETAILS))
1359 fprintf (dump_file, " into ");
1360 print_gimple_stmt (dump_file, stmt, 0, 0);
1363 /* Recurse on the LHS of the binary operator, which is guaranteed to
1364 be the non-leaf side. */
1365 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops);
1368 /* Transform STMT, which is really (A +B) + (C + D) into the left
1369 linear form, ((A+B)+C)+D.
1370 Recurse on D if necessary. */
1372 static void
1373 linearize_expr (gimple stmt)
1375 gimple_stmt_iterator gsinow, gsirhs;
1376 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
1377 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1378 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
1379 gimple newbinrhs = NULL;
1380 struct loop *loop = loop_containing_stmt (stmt);
1382 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
1383 && is_reassociable_op (binrhs, rhscode, loop));
1385 gsinow = gsi_for_stmt (stmt);
1386 gsirhs = gsi_for_stmt (binrhs);
1387 gsi_move_before (&gsirhs, &gsinow);
1389 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
1390 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
1391 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
1393 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1394 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1396 if (dump_file && (dump_flags & TDF_DETAILS))
1398 fprintf (dump_file, "Linearized: ");
1399 print_gimple_stmt (dump_file, stmt, 0, 0);
1402 reassociate_stats.linearized++;
1403 update_stmt (binrhs);
1404 update_stmt (binlhs);
1405 update_stmt (stmt);
1407 gimple_set_visited (stmt, true);
1408 gimple_set_visited (binlhs, true);
1409 gimple_set_visited (binrhs, true);
1411 /* Tail recurse on the new rhs if it still needs reassociation. */
1412 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
1413 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
1414 want to change the algorithm while converting to tuples. */
1415 linearize_expr (stmt);
1418 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
1419 it. Otherwise, return NULL. */
1421 static gimple
1422 get_single_immediate_use (tree lhs)
1424 use_operand_p immuse;
1425 gimple immusestmt;
1427 if (TREE_CODE (lhs) == SSA_NAME
1428 && single_imm_use (lhs, &immuse, &immusestmt)
1429 && is_gimple_assign (immusestmt))
1430 return immusestmt;
1432 return NULL;
1435 static VEC(tree, heap) *broken_up_subtracts;
1437 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
1438 representing the negated value. Insertions of any necessary
1439 instructions go before GSI.
1440 This function is recursive in that, if you hand it "a_5" as the
1441 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
1442 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
1444 static tree
1445 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
1447 gimple negatedefstmt= NULL;
1448 tree resultofnegate;
1450 /* If we are trying to negate a name, defined by an add, negate the
1451 add operands instead. */
1452 if (TREE_CODE (tonegate) == SSA_NAME)
1453 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
1454 if (TREE_CODE (tonegate) == SSA_NAME
1455 && is_gimple_assign (negatedefstmt)
1456 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
1457 && has_single_use (gimple_assign_lhs (negatedefstmt))
1458 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
1460 gimple_stmt_iterator gsi;
1461 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
1462 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
1464 gsi = gsi_for_stmt (negatedefstmt);
1465 rhs1 = negate_value (rhs1, &gsi);
1466 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
1468 gsi = gsi_for_stmt (negatedefstmt);
1469 rhs2 = negate_value (rhs2, &gsi);
1470 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
1472 update_stmt (negatedefstmt);
1473 return gimple_assign_lhs (negatedefstmt);
1476 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
1477 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
1478 NULL_TREE, true, GSI_SAME_STMT);
1479 VEC_safe_push (tree, heap, broken_up_subtracts, resultofnegate);
1480 return resultofnegate;
1483 /* Return true if we should break up the subtract in STMT into an add
1484 with negate. This is true when we the subtract operands are really
1485 adds, or the subtract itself is used in an add expression. In
1486 either case, breaking up the subtract into an add with negate
1487 exposes the adds to reassociation. */
1489 static bool
1490 should_break_up_subtract (gimple stmt)
1492 tree lhs = gimple_assign_lhs (stmt);
1493 tree binlhs = gimple_assign_rhs1 (stmt);
1494 tree binrhs = gimple_assign_rhs2 (stmt);
1495 gimple immusestmt;
1496 struct loop *loop = loop_containing_stmt (stmt);
1498 if (TREE_CODE (binlhs) == SSA_NAME
1499 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
1500 return true;
1502 if (TREE_CODE (binrhs) == SSA_NAME
1503 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
1504 return true;
1506 if (TREE_CODE (lhs) == SSA_NAME
1507 && (immusestmt = get_single_immediate_use (lhs))
1508 && is_gimple_assign (immusestmt)
1509 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
1510 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
1511 return true;
1512 return false;
1515 /* Transform STMT from A - B into A + -B. */
1517 static void
1518 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
1520 tree rhs1 = gimple_assign_rhs1 (stmt);
1521 tree rhs2 = gimple_assign_rhs2 (stmt);
1523 if (dump_file && (dump_flags & TDF_DETAILS))
1525 fprintf (dump_file, "Breaking up subtract ");
1526 print_gimple_stmt (dump_file, stmt, 0, 0);
1529 rhs2 = negate_value (rhs2, gsip);
1530 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
1531 update_stmt (stmt);
1534 /* Recursively linearize a binary expression that is the RHS of STMT.
1535 Place the operands of the expression tree in the vector named OPS. */
1537 static void
1538 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
1539 bool is_associative, bool set_visited)
1541 gimple_stmt_iterator gsinow, gsilhs;
1542 tree binlhs = gimple_assign_rhs1 (stmt);
1543 tree binrhs = gimple_assign_rhs2 (stmt);
1544 gimple binlhsdef, binrhsdef;
1545 bool binlhsisreassoc = false;
1546 bool binrhsisreassoc = false;
1547 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
1548 struct loop *loop = loop_containing_stmt (stmt);
1550 if (set_visited)
1551 gimple_set_visited (stmt, true);
1553 if (TREE_CODE (binlhs) == SSA_NAME)
1555 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
1556 binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode, loop);
1559 if (TREE_CODE (binrhs) == SSA_NAME)
1561 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
1562 binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode, loop);
1565 /* If the LHS is not reassociable, but the RHS is, we need to swap
1566 them. If neither is reassociable, there is nothing we can do, so
1567 just put them in the ops vector. If the LHS is reassociable,
1568 linearize it. If both are reassociable, then linearize the RHS
1569 and the LHS. */
1571 if (!binlhsisreassoc)
1573 tree temp;
1575 /* If this is not a associative operation like division, give up. */
1576 if (!is_associative)
1578 add_to_ops_vec (ops, binrhs);
1579 return;
1582 if (!binrhsisreassoc)
1584 add_to_ops_vec (ops, binrhs);
1585 add_to_ops_vec (ops, binlhs);
1586 return;
1589 if (dump_file && (dump_flags & TDF_DETAILS))
1591 fprintf (dump_file, "swapping operands of ");
1592 print_gimple_stmt (dump_file, stmt, 0, 0);
1595 swap_tree_operands (stmt,
1596 gimple_assign_rhs1_ptr (stmt),
1597 gimple_assign_rhs2_ptr (stmt));
1598 update_stmt (stmt);
1600 if (dump_file && (dump_flags & TDF_DETAILS))
1602 fprintf (dump_file, " is now ");
1603 print_gimple_stmt (dump_file, stmt, 0, 0);
1606 /* We want to make it so the lhs is always the reassociative op,
1607 so swap. */
1608 temp = binlhs;
1609 binlhs = binrhs;
1610 binrhs = temp;
1612 else if (binrhsisreassoc)
1614 linearize_expr (stmt);
1615 binlhs = gimple_assign_rhs1 (stmt);
1616 binrhs = gimple_assign_rhs2 (stmt);
1619 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
1620 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
1621 rhscode, loop));
1622 gsinow = gsi_for_stmt (stmt);
1623 gsilhs = gsi_for_stmt (SSA_NAME_DEF_STMT (binlhs));
1624 gsi_move_before (&gsilhs, &gsinow);
1625 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
1626 is_associative, set_visited);
1627 add_to_ops_vec (ops, binrhs);
1630 /* Repropagate the negates back into subtracts, since no other pass
1631 currently does it. */
1633 static void
1634 repropagate_negates (void)
1636 unsigned int i = 0;
1637 tree negate;
1639 for (i = 0; VEC_iterate (tree, broken_up_subtracts, i, negate); i++)
1641 gimple user = get_single_immediate_use (negate);
1643 /* The negate operand can be either operand of a PLUS_EXPR
1644 (it can be the LHS if the RHS is a constant for example).
1646 Force the negate operand to the RHS of the PLUS_EXPR, then
1647 transform the PLUS_EXPR into a MINUS_EXPR. */
1648 if (user
1649 && is_gimple_assign (user)
1650 && gimple_assign_rhs_code (user) == PLUS_EXPR)
1652 /* If the negated operand appears on the LHS of the
1653 PLUS_EXPR, exchange the operands of the PLUS_EXPR
1654 to force the negated operand to the RHS of the PLUS_EXPR. */
1655 if (gimple_assign_rhs1 (user) == negate)
1657 swap_tree_operands (user,
1658 gimple_assign_rhs1_ptr (user),
1659 gimple_assign_rhs2_ptr (user));
1662 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
1663 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
1664 if (gimple_assign_rhs2 (user) == negate)
1666 tree rhs1 = gimple_assign_rhs1 (user);
1667 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
1668 gimple_stmt_iterator gsi = gsi_for_stmt (user);
1669 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
1670 update_stmt (user);
1676 /* Break up subtract operations in block BB.
1678 We do this top down because we don't know whether the subtract is
1679 part of a possible chain of reassociation except at the top.
1681 IE given
1682 d = f + g
1683 c = a + e
1684 b = c - d
1685 q = b - r
1686 k = t - q
1688 we want to break up k = t - q, but we won't until we've transformed q
1689 = b - r, which won't be broken up until we transform b = c - d.
1691 En passant, clear the GIMPLE visited flag on every statement. */
1693 static void
1694 break_up_subtract_bb (basic_block bb)
1696 gimple_stmt_iterator gsi;
1697 basic_block son;
1699 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1701 gimple stmt = gsi_stmt (gsi);
1702 gimple_set_visited (stmt, false);
1704 /* Look for simple gimple subtract operations. */
1705 if (is_gimple_assign (stmt)
1706 && gimple_assign_rhs_code (stmt) == MINUS_EXPR)
1708 tree lhs = gimple_assign_lhs (stmt);
1709 tree rhs1 = gimple_assign_rhs1 (stmt);
1710 tree rhs2 = gimple_assign_rhs2 (stmt);
1712 /* If associative-math we can do reassociation for
1713 non-integral types. Or, we can do reassociation for
1714 non-saturating fixed-point types. */
1715 if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1716 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
1717 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs2)))
1718 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs))
1719 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs1))
1720 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs2))
1721 || !flag_associative_math)
1722 && (!NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE (lhs))
1723 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs1))
1724 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs2))))
1725 continue;
1727 /* Check for a subtract used only in an addition. If this
1728 is the case, transform it into add of a negate for better
1729 reassociation. IE transform C = A-B into C = A + -B if C
1730 is only used in an addition. */
1731 if (should_break_up_subtract (stmt))
1732 break_up_subtract (stmt, &gsi);
1735 for (son = first_dom_son (CDI_DOMINATORS, bb);
1736 son;
1737 son = next_dom_son (CDI_DOMINATORS, son))
1738 break_up_subtract_bb (son);
1741 /* Reassociate expressions in basic block BB and its post-dominator as
1742 children. */
1744 static void
1745 reassociate_bb (basic_block bb)
1747 gimple_stmt_iterator gsi;
1748 basic_block son;
1750 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
1752 gimple stmt = gsi_stmt (gsi);
1754 if (is_gimple_assign (stmt))
1756 tree lhs, rhs1, rhs2;
1757 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
1759 /* If this is not a gimple binary expression, there is
1760 nothing for us to do with it. */
1761 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
1762 continue;
1764 /* If this was part of an already processed statement,
1765 we don't need to touch it again. */
1766 if (gimple_visited_p (stmt))
1768 /* This statement might have become dead because of previous
1769 reassociations. */
1770 if (has_zero_uses (gimple_get_lhs (stmt)))
1772 gsi_remove (&gsi, true);
1773 release_defs (stmt);
1774 /* We might end up removing the last stmt above which
1775 places the iterator to the end of the sequence.
1776 Reset it to the last stmt in this case which might
1777 be the end of the sequence as well if we removed
1778 the last statement of the sequence. In which case
1779 we need to bail out. */
1780 if (gsi_end_p (gsi))
1782 gsi = gsi_last_bb (bb);
1783 if (gsi_end_p (gsi))
1784 break;
1787 continue;
1790 lhs = gimple_assign_lhs (stmt);
1791 rhs1 = gimple_assign_rhs1 (stmt);
1792 rhs2 = gimple_assign_rhs2 (stmt);
1794 /* If associative-math we can do reassociation for
1795 non-integral types. Or, we can do reassociation for
1796 non-saturating fixed-point types. */
1797 if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1798 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
1799 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs2)))
1800 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs))
1801 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs1))
1802 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs2))
1803 || !flag_associative_math)
1804 && (!NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE (lhs))
1805 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs1))
1806 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs2))))
1807 continue;
1809 if (associative_tree_code (rhs_code))
1811 VEC(operand_entry_t, heap) *ops = NULL;
1813 /* There may be no immediate uses left by the time we
1814 get here because we may have eliminated them all. */
1815 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
1816 continue;
1818 gimple_set_visited (stmt, true);
1819 linearize_expr_tree (&ops, stmt, true, true);
1820 qsort (VEC_address (operand_entry_t, ops),
1821 VEC_length (operand_entry_t, ops),
1822 sizeof (operand_entry_t),
1823 sort_by_operand_rank);
1824 optimize_ops_list (rhs_code, &ops);
1825 if (undistribute_ops_list (rhs_code, &ops,
1826 loop_containing_stmt (stmt)))
1828 qsort (VEC_address (operand_entry_t, ops),
1829 VEC_length (operand_entry_t, ops),
1830 sizeof (operand_entry_t),
1831 sort_by_operand_rank);
1832 optimize_ops_list (rhs_code, &ops);
1835 if (VEC_length (operand_entry_t, ops) == 1)
1837 if (dump_file && (dump_flags & TDF_DETAILS))
1839 fprintf (dump_file, "Transforming ");
1840 print_gimple_stmt (dump_file, stmt, 0, 0);
1843 gimple_assign_set_rhs_from_tree (&gsi,
1844 VEC_last (operand_entry_t,
1845 ops)->op);
1846 update_stmt (stmt);
1848 if (dump_file && (dump_flags & TDF_DETAILS))
1850 fprintf (dump_file, " into ");
1851 print_gimple_stmt (dump_file, stmt, 0, 0);
1854 else
1856 rewrite_expr_tree (stmt, 0, ops);
1859 VEC_free (operand_entry_t, heap, ops);
1863 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
1864 son;
1865 son = next_dom_son (CDI_POST_DOMINATORS, son))
1866 reassociate_bb (son);
1869 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
1870 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
1872 /* Dump the operand entry vector OPS to FILE. */
1874 void
1875 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
1877 operand_entry_t oe;
1878 unsigned int i;
1880 for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
1882 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
1883 print_generic_expr (file, oe->op, 0);
1887 /* Dump the operand entry vector OPS to STDERR. */
1889 void
1890 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
1892 dump_ops_vector (stderr, ops);
1895 static void
1896 do_reassoc (void)
1898 break_up_subtract_bb (ENTRY_BLOCK_PTR);
1899 reassociate_bb (EXIT_BLOCK_PTR);
1902 /* Initialize the reassociation pass. */
1904 static void
1905 init_reassoc (void)
1907 int i;
1908 long rank = 2;
1909 tree param;
1910 int *bbs = XNEWVEC (int, last_basic_block + 1);
1912 /* Find the loops, so that we can prevent moving calculations in
1913 them. */
1914 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
1916 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
1918 operand_entry_pool = create_alloc_pool ("operand entry pool",
1919 sizeof (struct operand_entry), 30);
1921 /* Reverse RPO (Reverse Post Order) will give us something where
1922 deeper loops come later. */
1923 pre_and_rev_post_order_compute (NULL, bbs, false);
1924 bb_rank = XCNEWVEC (long, last_basic_block + 1);
1925 operand_rank = pointer_map_create ();
1927 /* Give each argument a distinct rank. */
1928 for (param = DECL_ARGUMENTS (current_function_decl);
1929 param;
1930 param = TREE_CHAIN (param))
1932 if (gimple_default_def (cfun, param) != NULL)
1934 tree def = gimple_default_def (cfun, param);
1935 insert_operand_rank (def, ++rank);
1939 /* Give the chain decl a distinct rank. */
1940 if (cfun->static_chain_decl != NULL)
1942 tree def = gimple_default_def (cfun, cfun->static_chain_decl);
1943 if (def != NULL)
1944 insert_operand_rank (def, ++rank);
1947 /* Set up rank for each BB */
1948 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
1949 bb_rank[bbs[i]] = ++rank << 16;
1951 free (bbs);
1952 calculate_dominance_info (CDI_POST_DOMINATORS);
1953 broken_up_subtracts = NULL;
1956 /* Cleanup after the reassociation pass, and print stats if
1957 requested. */
1959 static void
1960 fini_reassoc (void)
1962 statistics_counter_event (cfun, "Linearized",
1963 reassociate_stats.linearized);
1964 statistics_counter_event (cfun, "Constants eliminated",
1965 reassociate_stats.constants_eliminated);
1966 statistics_counter_event (cfun, "Ops eliminated",
1967 reassociate_stats.ops_eliminated);
1968 statistics_counter_event (cfun, "Statements rewritten",
1969 reassociate_stats.rewritten);
1971 pointer_map_destroy (operand_rank);
1972 free_alloc_pool (operand_entry_pool);
1973 free (bb_rank);
1974 VEC_free (tree, heap, broken_up_subtracts);
1975 free_dominance_info (CDI_POST_DOMINATORS);
1976 loop_optimizer_finalize ();
1979 /* Gate and execute functions for Reassociation. */
1981 static unsigned int
1982 execute_reassoc (void)
1984 init_reassoc ();
1986 do_reassoc ();
1987 repropagate_negates ();
1989 fini_reassoc ();
1990 return 0;
1993 static bool
1994 gate_tree_ssa_reassoc (void)
1996 return flag_tree_reassoc != 0;
1999 struct gimple_opt_pass pass_reassoc =
2002 GIMPLE_PASS,
2003 "reassoc", /* name */
2004 gate_tree_ssa_reassoc, /* gate */
2005 execute_reassoc, /* execute */
2006 NULL, /* sub */
2007 NULL, /* next */
2008 0, /* static_pass_number */
2009 TV_TREE_REASSOC, /* tv_id */
2010 PROP_cfg | PROP_ssa | PROP_alias, /* properties_required */
2011 0, /* properties_provided */
2012 0, /* properties_destroyed */
2013 0, /* todo_flags_start */
2014 TODO_dump_func | TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */