PR c++/53989
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob5c55fe06a1a4a46156c16e8ca6a5c2b7d279bd4e
1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Daniel Berlin <dan@dberlin.org>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "tree.h"
27 #include "basic-block.h"
28 #include "gimple-pretty-print.h"
29 #include "tree-inline.h"
30 #include "tree-flow.h"
31 #include "gimple.h"
32 #include "tree-iterator.h"
33 #include "tree-pass.h"
34 #include "alloc-pool.h"
35 #include "vec.h"
36 #include "langhooks.h"
37 #include "pointer-set.h"
38 #include "cfgloop.h"
39 #include "flags.h"
40 #include "target.h"
41 #include "params.h"
42 #include "diagnostic-core.h"
44 /* This is a simple global reassociation pass. It is, in part, based
45 on the LLVM pass of the same name (They do some things more/less
46 than we do, in different orders, etc).
48 It consists of five steps:
50 1. Breaking up subtract operations into addition + negate, where
51 it would promote the reassociation of adds.
53 2. Left linearization of the expression trees, so that (A+B)+(C+D)
54 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
55 During linearization, we place the operands of the binary
56 expressions into a vector of operand_entry_t
58 3. Optimization of the operand lists, eliminating things like a +
59 -a, a & a, etc.
61 3a. Combine repeated factors with the same occurrence counts
62 into a __builtin_powi call that will later be optimized into
63 an optimal number of multiplies.
65 4. Rewrite the expression trees we linearized and optimized so
66 they are in proper rank order.
68 5. Repropagate negates, as nothing else will clean it up ATM.
70 A bit of theory on #4, since nobody seems to write anything down
71 about why it makes sense to do it the way they do it:
73 We could do this much nicer theoretically, but don't (for reasons
74 explained after how to do it theoretically nice :P).
76 In order to promote the most redundancy elimination, you want
77 binary expressions whose operands are the same rank (or
78 preferably, the same value) exposed to the redundancy eliminator,
79 for possible elimination.
81 So the way to do this if we really cared, is to build the new op
82 tree from the leaves to the roots, merging as you go, and putting the
83 new op on the end of the worklist, until you are left with one
84 thing on the worklist.
86 IE if you have to rewrite the following set of operands (listed with
87 rank in parentheses), with opcode PLUS_EXPR:
89 a (1), b (1), c (1), d (2), e (2)
92 We start with our merge worklist empty, and the ops list with all of
93 those on it.
95 You want to first merge all leaves of the same rank, as much as
96 possible.
98 So first build a binary op of
100 mergetmp = a + b, and put "mergetmp" on the merge worklist.
102 Because there is no three operand form of PLUS_EXPR, c is not going to
103 be exposed to redundancy elimination as a rank 1 operand.
105 So you might as well throw it on the merge worklist (you could also
106 consider it to now be a rank two operand, and merge it with d and e,
107 but in this case, you then have evicted e from a binary op. So at
108 least in this situation, you can't win.)
110 Then build a binary op of d + e
111 mergetmp2 = d + e
113 and put mergetmp2 on the merge worklist.
115 so merge worklist = {mergetmp, c, mergetmp2}
117 Continue building binary ops of these operations until you have only
118 one operation left on the worklist.
120 So we have
122 build binary op
123 mergetmp3 = mergetmp + c
125 worklist = {mergetmp2, mergetmp3}
127 mergetmp4 = mergetmp2 + mergetmp3
129 worklist = {mergetmp4}
131 because we have one operation left, we can now just set the original
132 statement equal to the result of that operation.
134 This will at least expose a + b and d + e to redundancy elimination
135 as binary operations.
137 For extra points, you can reuse the old statements to build the
138 mergetmps, since you shouldn't run out.
140 So why don't we do this?
142 Because it's expensive, and rarely will help. Most trees we are
143 reassociating have 3 or less ops. If they have 2 ops, they already
144 will be written into a nice single binary op. If you have 3 ops, a
145 single simple check suffices to tell you whether the first two are of the
146 same rank. If so, you know to order it
148 mergetmp = op1 + op2
149 newstmt = mergetmp + op3
151 instead of
152 mergetmp = op2 + op3
153 newstmt = mergetmp + op1
155 If all three are of the same rank, you can't expose them all in a
156 single binary operator anyway, so the above is *still* the best you
157 can do.
159 Thus, this is what we do. When we have three ops left, we check to see
160 what order to put them in, and call it a day. As a nod to vector sum
161 reduction, we check if any of the ops are really a phi node that is a
162 destructive update for the associating op, and keep the destructive
163 update together for vector sum reduction recognition. */
166 /* Statistics */
167 static struct
169 int linearized;
170 int constants_eliminated;
171 int ops_eliminated;
172 int rewritten;
173 int pows_encountered;
174 int pows_created;
175 } reassociate_stats;
177 /* Operator, rank pair. */
178 typedef struct operand_entry
180 unsigned int rank;
181 int id;
182 tree op;
183 unsigned int count;
184 } *operand_entry_t;
186 static alloc_pool operand_entry_pool;
188 /* This is used to assign a unique ID to each struct operand_entry
189 so that qsort results are identical on different hosts. */
190 static int next_operand_entry_id;
192 /* Starting rank number for a given basic block, so that we can rank
193 operations using unmovable instructions in that BB based on the bb
194 depth. */
195 static long *bb_rank;
197 /* Operand->rank hashtable. */
198 static struct pointer_map_t *operand_rank;
200 /* Forward decls. */
201 static long get_rank (tree);
204 /* Bias amount for loop-carried phis. We want this to be larger than
205 the depth of any reassociation tree we can see, but not larger than
206 the rank difference between two blocks. */
207 #define PHI_LOOP_BIAS (1 << 15)
209 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
210 an innermost loop, and the phi has only a single use which is inside
211 the loop, then the rank is the block rank of the loop latch plus an
212 extra bias for the loop-carried dependence. This causes expressions
213 calculated into an accumulator variable to be independent for each
214 iteration of the loop. If STMT is some other phi, the rank is the
215 block rank of its containing block. */
216 static long
217 phi_rank (gimple stmt)
219 basic_block bb = gimple_bb (stmt);
220 struct loop *father = bb->loop_father;
221 tree res;
222 unsigned i;
223 use_operand_p use;
224 gimple use_stmt;
226 /* We only care about real loops (those with a latch). */
227 if (!father->latch)
228 return bb_rank[bb->index];
230 /* Interesting phis must be in headers of innermost loops. */
231 if (bb != father->header
232 || father->inner)
233 return bb_rank[bb->index];
235 /* Ignore virtual SSA_NAMEs. */
236 res = gimple_phi_result (stmt);
237 if (!is_gimple_reg (SSA_NAME_VAR (res)))
238 return bb_rank[bb->index];
240 /* The phi definition must have a single use, and that use must be
241 within the loop. Otherwise this isn't an accumulator pattern. */
242 if (!single_imm_use (res, &use, &use_stmt)
243 || gimple_bb (use_stmt)->loop_father != father)
244 return bb_rank[bb->index];
246 /* Look for phi arguments from within the loop. If found, bias this phi. */
247 for (i = 0; i < gimple_phi_num_args (stmt); i++)
249 tree arg = gimple_phi_arg_def (stmt, i);
250 if (TREE_CODE (arg) == SSA_NAME
251 && !SSA_NAME_IS_DEFAULT_DEF (arg))
253 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
254 if (gimple_bb (def_stmt)->loop_father == father)
255 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
259 /* Must be an uninteresting phi. */
260 return bb_rank[bb->index];
263 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
264 loop-carried dependence of an innermost loop, return TRUE; else
265 return FALSE. */
266 static bool
267 loop_carried_phi (tree exp)
269 gimple phi_stmt;
270 long block_rank;
272 if (TREE_CODE (exp) != SSA_NAME
273 || SSA_NAME_IS_DEFAULT_DEF (exp))
274 return false;
276 phi_stmt = SSA_NAME_DEF_STMT (exp);
278 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
279 return false;
281 /* Non-loop-carried phis have block rank. Loop-carried phis have
282 an additional bias added in. If this phi doesn't have block rank,
283 it's biased and should not be propagated. */
284 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
286 if (phi_rank (phi_stmt) != block_rank)
287 return true;
289 return false;
292 /* Return the maximum of RANK and the rank that should be propagated
293 from expression OP. For most operands, this is just the rank of OP.
294 For loop-carried phis, the value is zero to avoid undoing the bias
295 in favor of the phi. */
296 static long
297 propagate_rank (long rank, tree op)
299 long op_rank;
301 if (loop_carried_phi (op))
302 return rank;
304 op_rank = get_rank (op);
306 return MAX (rank, op_rank);
309 /* Look up the operand rank structure for expression E. */
311 static inline long
312 find_operand_rank (tree e)
314 void **slot = pointer_map_contains (operand_rank, e);
315 return slot ? (long) (intptr_t) *slot : -1;
318 /* Insert {E,RANK} into the operand rank hashtable. */
320 static inline void
321 insert_operand_rank (tree e, long rank)
323 void **slot;
324 gcc_assert (rank > 0);
325 slot = pointer_map_insert (operand_rank, e);
326 gcc_assert (!*slot);
327 *slot = (void *) (intptr_t) rank;
330 /* Given an expression E, return the rank of the expression. */
332 static long
333 get_rank (tree e)
335 /* Constants have rank 0. */
336 if (is_gimple_min_invariant (e))
337 return 0;
339 /* SSA_NAME's have the rank of the expression they are the result
341 For globals and uninitialized values, the rank is 0.
342 For function arguments, use the pre-setup rank.
343 For PHI nodes, stores, asm statements, etc, we use the rank of
344 the BB.
345 For simple operations, the rank is the maximum rank of any of
346 its operands, or the bb_rank, whichever is less.
347 I make no claims that this is optimal, however, it gives good
348 results. */
350 /* We make an exception to the normal ranking system to break
351 dependences of accumulator variables in loops. Suppose we
352 have a simple one-block loop containing:
354 x_1 = phi(x_0, x_2)
355 b = a + x_1
356 c = b + d
357 x_2 = c + e
359 As shown, each iteration of the calculation into x is fully
360 dependent upon the iteration before it. We would prefer to
361 see this in the form:
363 x_1 = phi(x_0, x_2)
364 b = a + d
365 c = b + e
366 x_2 = c + x_1
368 If the loop is unrolled, the calculations of b and c from
369 different iterations can be interleaved.
371 To obtain this result during reassociation, we bias the rank
372 of the phi definition x_1 upward, when it is recognized as an
373 accumulator pattern. The artificial rank causes it to be
374 added last, providing the desired independence. */
376 if (TREE_CODE (e) == SSA_NAME)
378 gimple stmt;
379 long rank;
380 int i, n;
381 tree op;
383 if (SSA_NAME_IS_DEFAULT_DEF (e))
384 return find_operand_rank (e);
386 stmt = SSA_NAME_DEF_STMT (e);
387 if (gimple_code (stmt) == GIMPLE_PHI)
388 return phi_rank (stmt);
390 if (!is_gimple_assign (stmt)
391 || gimple_vdef (stmt))
392 return bb_rank[gimple_bb (stmt)->index];
394 /* If we already have a rank for this expression, use that. */
395 rank = find_operand_rank (e);
396 if (rank != -1)
397 return rank;
399 /* Otherwise, find the maximum rank for the operands. As an
400 exception, remove the bias from loop-carried phis when propagating
401 the rank so that dependent operations are not also biased. */
402 rank = 0;
403 if (gimple_assign_single_p (stmt))
405 tree rhs = gimple_assign_rhs1 (stmt);
406 n = TREE_OPERAND_LENGTH (rhs);
407 if (n == 0)
408 rank = propagate_rank (rank, rhs);
409 else
411 for (i = 0; i < n; i++)
413 op = TREE_OPERAND (rhs, i);
415 if (op != NULL_TREE)
416 rank = propagate_rank (rank, op);
420 else
422 n = gimple_num_ops (stmt);
423 for (i = 1; i < n; i++)
425 op = gimple_op (stmt, i);
426 gcc_assert (op);
427 rank = propagate_rank (rank, op);
431 if (dump_file && (dump_flags & TDF_DETAILS))
433 fprintf (dump_file, "Rank for ");
434 print_generic_expr (dump_file, e, 0);
435 fprintf (dump_file, " is %ld\n", (rank + 1));
438 /* Note the rank in the hashtable so we don't recompute it. */
439 insert_operand_rank (e, (rank + 1));
440 return (rank + 1);
443 /* Globals, etc, are rank 0 */
444 return 0;
447 DEF_VEC_P(operand_entry_t);
448 DEF_VEC_ALLOC_P(operand_entry_t, heap);
450 /* We want integer ones to end up last no matter what, since they are
451 the ones we can do the most with. */
452 #define INTEGER_CONST_TYPE 1 << 3
453 #define FLOAT_CONST_TYPE 1 << 2
454 #define OTHER_CONST_TYPE 1 << 1
456 /* Classify an invariant tree into integer, float, or other, so that
457 we can sort them to be near other constants of the same type. */
458 static inline int
459 constant_type (tree t)
461 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
462 return INTEGER_CONST_TYPE;
463 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
464 return FLOAT_CONST_TYPE;
465 else
466 return OTHER_CONST_TYPE;
469 /* qsort comparison function to sort operand entries PA and PB by rank
470 so that the sorted array is ordered by rank in decreasing order. */
471 static int
472 sort_by_operand_rank (const void *pa, const void *pb)
474 const operand_entry_t oea = *(const operand_entry_t *)pa;
475 const operand_entry_t oeb = *(const operand_entry_t *)pb;
477 /* It's nicer for optimize_expression if constants that are likely
478 to fold when added/multiplied//whatever are put next to each
479 other. Since all constants have rank 0, order them by type. */
480 if (oeb->rank == 0 && oea->rank == 0)
482 if (constant_type (oeb->op) != constant_type (oea->op))
483 return constant_type (oeb->op) - constant_type (oea->op);
484 else
485 /* To make sorting result stable, we use unique IDs to determine
486 order. */
487 return oeb->id - oea->id;
490 /* Lastly, make sure the versions that are the same go next to each
491 other. We use SSA_NAME_VERSION because it's stable. */
492 if ((oeb->rank - oea->rank == 0)
493 && TREE_CODE (oea->op) == SSA_NAME
494 && TREE_CODE (oeb->op) == SSA_NAME)
496 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
497 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
498 else
499 return oeb->id - oea->id;
502 if (oeb->rank != oea->rank)
503 return oeb->rank - oea->rank;
504 else
505 return oeb->id - oea->id;
508 /* Add an operand entry to *OPS for the tree operand OP. */
510 static void
511 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
513 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
515 oe->op = op;
516 oe->rank = get_rank (op);
517 oe->id = next_operand_entry_id++;
518 oe->count = 1;
519 VEC_safe_push (operand_entry_t, heap, *ops, oe);
522 /* Add an operand entry to *OPS for the tree operand OP with repeat
523 count REPEAT. */
525 static void
526 add_repeat_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op,
527 HOST_WIDE_INT repeat)
529 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
531 oe->op = op;
532 oe->rank = get_rank (op);
533 oe->id = next_operand_entry_id++;
534 oe->count = repeat;
535 VEC_safe_push (operand_entry_t, heap, *ops, oe);
537 reassociate_stats.pows_encountered++;
540 /* Return true if STMT is reassociable operation containing a binary
541 operation with tree code CODE, and is inside LOOP. */
543 static bool
544 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
546 basic_block bb = gimple_bb (stmt);
548 if (gimple_bb (stmt) == NULL)
549 return false;
551 if (!flow_bb_inside_loop_p (loop, bb))
552 return false;
554 if (is_gimple_assign (stmt)
555 && gimple_assign_rhs_code (stmt) == code
556 && has_single_use (gimple_assign_lhs (stmt)))
557 return true;
559 return false;
563 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
564 operand of the negate operation. Otherwise, return NULL. */
566 static tree
567 get_unary_op (tree name, enum tree_code opcode)
569 gimple stmt = SSA_NAME_DEF_STMT (name);
571 if (!is_gimple_assign (stmt))
572 return NULL_TREE;
574 if (gimple_assign_rhs_code (stmt) == opcode)
575 return gimple_assign_rhs1 (stmt);
576 return NULL_TREE;
579 /* If CURR and LAST are a pair of ops that OPCODE allows us to
580 eliminate through equivalences, do so, remove them from OPS, and
581 return true. Otherwise, return false. */
583 static bool
584 eliminate_duplicate_pair (enum tree_code opcode,
585 VEC (operand_entry_t, heap) **ops,
586 bool *all_done,
587 unsigned int i,
588 operand_entry_t curr,
589 operand_entry_t last)
592 /* If we have two of the same op, and the opcode is & |, min, or max,
593 we can eliminate one of them.
594 If we have two of the same op, and the opcode is ^, we can
595 eliminate both of them. */
597 if (last && last->op == curr->op)
599 switch (opcode)
601 case MAX_EXPR:
602 case MIN_EXPR:
603 case BIT_IOR_EXPR:
604 case BIT_AND_EXPR:
605 if (dump_file && (dump_flags & TDF_DETAILS))
607 fprintf (dump_file, "Equivalence: ");
608 print_generic_expr (dump_file, curr->op, 0);
609 fprintf (dump_file, " [&|minmax] ");
610 print_generic_expr (dump_file, last->op, 0);
611 fprintf (dump_file, " -> ");
612 print_generic_stmt (dump_file, last->op, 0);
615 VEC_ordered_remove (operand_entry_t, *ops, i);
616 reassociate_stats.ops_eliminated ++;
618 return true;
620 case BIT_XOR_EXPR:
621 if (dump_file && (dump_flags & TDF_DETAILS))
623 fprintf (dump_file, "Equivalence: ");
624 print_generic_expr (dump_file, curr->op, 0);
625 fprintf (dump_file, " ^ ");
626 print_generic_expr (dump_file, last->op, 0);
627 fprintf (dump_file, " -> nothing\n");
630 reassociate_stats.ops_eliminated += 2;
632 if (VEC_length (operand_entry_t, *ops) == 2)
634 VEC_free (operand_entry_t, heap, *ops);
635 *ops = NULL;
636 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
637 *all_done = true;
639 else
641 VEC_ordered_remove (operand_entry_t, *ops, i-1);
642 VEC_ordered_remove (operand_entry_t, *ops, i-1);
645 return true;
647 default:
648 break;
651 return false;
654 static VEC(tree, heap) *plus_negates;
656 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
657 expression, look in OPS for a corresponding positive operation to cancel
658 it out. If we find one, remove the other from OPS, replace
659 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
660 return false. */
662 static bool
663 eliminate_plus_minus_pair (enum tree_code opcode,
664 VEC (operand_entry_t, heap) **ops,
665 unsigned int currindex,
666 operand_entry_t curr)
668 tree negateop;
669 tree notop;
670 unsigned int i;
671 operand_entry_t oe;
673 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
674 return false;
676 negateop = get_unary_op (curr->op, NEGATE_EXPR);
677 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
678 if (negateop == NULL_TREE && notop == NULL_TREE)
679 return false;
681 /* Any non-negated version will have a rank that is one less than
682 the current rank. So once we hit those ranks, if we don't find
683 one, we can stop. */
685 for (i = currindex + 1;
686 VEC_iterate (operand_entry_t, *ops, i, oe)
687 && oe->rank >= curr->rank - 1 ;
688 i++)
690 if (oe->op == negateop)
693 if (dump_file && (dump_flags & TDF_DETAILS))
695 fprintf (dump_file, "Equivalence: ");
696 print_generic_expr (dump_file, negateop, 0);
697 fprintf (dump_file, " + -");
698 print_generic_expr (dump_file, oe->op, 0);
699 fprintf (dump_file, " -> 0\n");
702 VEC_ordered_remove (operand_entry_t, *ops, i);
703 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
704 VEC_ordered_remove (operand_entry_t, *ops, currindex);
705 reassociate_stats.ops_eliminated ++;
707 return true;
709 else if (oe->op == notop)
711 tree op_type = TREE_TYPE (oe->op);
713 if (dump_file && (dump_flags & TDF_DETAILS))
715 fprintf (dump_file, "Equivalence: ");
716 print_generic_expr (dump_file, notop, 0);
717 fprintf (dump_file, " + ~");
718 print_generic_expr (dump_file, oe->op, 0);
719 fprintf (dump_file, " -> -1\n");
722 VEC_ordered_remove (operand_entry_t, *ops, i);
723 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
724 VEC_ordered_remove (operand_entry_t, *ops, currindex);
725 reassociate_stats.ops_eliminated ++;
727 return true;
731 /* CURR->OP is a negate expr in a plus expr: save it for later
732 inspection in repropagate_negates(). */
733 if (negateop != NULL_TREE)
734 VEC_safe_push (tree, heap, plus_negates, curr->op);
736 return false;
739 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
740 bitwise not expression, look in OPS for a corresponding operand to
741 cancel it out. If we find one, remove the other from OPS, replace
742 OPS[CURRINDEX] with 0, and return true. Otherwise, return
743 false. */
745 static bool
746 eliminate_not_pairs (enum tree_code opcode,
747 VEC (operand_entry_t, heap) **ops,
748 unsigned int currindex,
749 operand_entry_t curr)
751 tree notop;
752 unsigned int i;
753 operand_entry_t oe;
755 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
756 || TREE_CODE (curr->op) != SSA_NAME)
757 return false;
759 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
760 if (notop == NULL_TREE)
761 return false;
763 /* Any non-not version will have a rank that is one less than
764 the current rank. So once we hit those ranks, if we don't find
765 one, we can stop. */
767 for (i = currindex + 1;
768 VEC_iterate (operand_entry_t, *ops, i, oe)
769 && oe->rank >= curr->rank - 1;
770 i++)
772 if (oe->op == notop)
774 if (dump_file && (dump_flags & TDF_DETAILS))
776 fprintf (dump_file, "Equivalence: ");
777 print_generic_expr (dump_file, notop, 0);
778 if (opcode == BIT_AND_EXPR)
779 fprintf (dump_file, " & ~");
780 else if (opcode == BIT_IOR_EXPR)
781 fprintf (dump_file, " | ~");
782 print_generic_expr (dump_file, oe->op, 0);
783 if (opcode == BIT_AND_EXPR)
784 fprintf (dump_file, " -> 0\n");
785 else if (opcode == BIT_IOR_EXPR)
786 fprintf (dump_file, " -> -1\n");
789 if (opcode == BIT_AND_EXPR)
790 oe->op = build_zero_cst (TREE_TYPE (oe->op));
791 else if (opcode == BIT_IOR_EXPR)
792 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
793 TYPE_PRECISION (TREE_TYPE (oe->op)));
795 reassociate_stats.ops_eliminated
796 += VEC_length (operand_entry_t, *ops) - 1;
797 VEC_free (operand_entry_t, heap, *ops);
798 *ops = NULL;
799 VEC_safe_push (operand_entry_t, heap, *ops, oe);
800 return true;
804 return false;
807 /* Use constant value that may be present in OPS to try to eliminate
808 operands. Note that this function is only really used when we've
809 eliminated ops for other reasons, or merged constants. Across
810 single statements, fold already does all of this, plus more. There
811 is little point in duplicating logic, so I've only included the
812 identities that I could ever construct testcases to trigger. */
814 static void
815 eliminate_using_constants (enum tree_code opcode,
816 VEC(operand_entry_t, heap) **ops)
818 operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
819 tree type = TREE_TYPE (oelast->op);
821 if (oelast->rank == 0
822 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
824 switch (opcode)
826 case BIT_AND_EXPR:
827 if (integer_zerop (oelast->op))
829 if (VEC_length (operand_entry_t, *ops) != 1)
831 if (dump_file && (dump_flags & TDF_DETAILS))
832 fprintf (dump_file, "Found & 0, removing all other ops\n");
834 reassociate_stats.ops_eliminated
835 += VEC_length (operand_entry_t, *ops) - 1;
837 VEC_free (operand_entry_t, heap, *ops);
838 *ops = NULL;
839 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
840 return;
843 else if (integer_all_onesp (oelast->op))
845 if (VEC_length (operand_entry_t, *ops) != 1)
847 if (dump_file && (dump_flags & TDF_DETAILS))
848 fprintf (dump_file, "Found & -1, removing\n");
849 VEC_pop (operand_entry_t, *ops);
850 reassociate_stats.ops_eliminated++;
853 break;
854 case BIT_IOR_EXPR:
855 if (integer_all_onesp (oelast->op))
857 if (VEC_length (operand_entry_t, *ops) != 1)
859 if (dump_file && (dump_flags & TDF_DETAILS))
860 fprintf (dump_file, "Found | -1, removing all other ops\n");
862 reassociate_stats.ops_eliminated
863 += VEC_length (operand_entry_t, *ops) - 1;
865 VEC_free (operand_entry_t, heap, *ops);
866 *ops = NULL;
867 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
868 return;
871 else if (integer_zerop (oelast->op))
873 if (VEC_length (operand_entry_t, *ops) != 1)
875 if (dump_file && (dump_flags & TDF_DETAILS))
876 fprintf (dump_file, "Found | 0, removing\n");
877 VEC_pop (operand_entry_t, *ops);
878 reassociate_stats.ops_eliminated++;
881 break;
882 case MULT_EXPR:
883 if (integer_zerop (oelast->op)
884 || (FLOAT_TYPE_P (type)
885 && !HONOR_NANS (TYPE_MODE (type))
886 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
887 && real_zerop (oelast->op)))
889 if (VEC_length (operand_entry_t, *ops) != 1)
891 if (dump_file && (dump_flags & TDF_DETAILS))
892 fprintf (dump_file, "Found * 0, removing all other ops\n");
894 reassociate_stats.ops_eliminated
895 += VEC_length (operand_entry_t, *ops) - 1;
896 VEC_free (operand_entry_t, heap, *ops);
897 *ops = NULL;
898 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
899 return;
902 else if (integer_onep (oelast->op)
903 || (FLOAT_TYPE_P (type)
904 && !HONOR_SNANS (TYPE_MODE (type))
905 && real_onep (oelast->op)))
907 if (VEC_length (operand_entry_t, *ops) != 1)
909 if (dump_file && (dump_flags & TDF_DETAILS))
910 fprintf (dump_file, "Found * 1, removing\n");
911 VEC_pop (operand_entry_t, *ops);
912 reassociate_stats.ops_eliminated++;
913 return;
916 break;
917 case BIT_XOR_EXPR:
918 case PLUS_EXPR:
919 case MINUS_EXPR:
920 if (integer_zerop (oelast->op)
921 || (FLOAT_TYPE_P (type)
922 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
923 && fold_real_zero_addition_p (type, oelast->op,
924 opcode == MINUS_EXPR)))
926 if (VEC_length (operand_entry_t, *ops) != 1)
928 if (dump_file && (dump_flags & TDF_DETAILS))
929 fprintf (dump_file, "Found [|^+] 0, removing\n");
930 VEC_pop (operand_entry_t, *ops);
931 reassociate_stats.ops_eliminated++;
932 return;
935 break;
936 default:
937 break;
943 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
944 bool, bool);
946 /* Structure for tracking and counting operands. */
947 typedef struct oecount_s {
948 int cnt;
949 int id;
950 enum tree_code oecode;
951 tree op;
952 } oecount;
954 DEF_VEC_O(oecount);
955 DEF_VEC_ALLOC_O(oecount,heap);
957 /* The heap for the oecount hashtable and the sorted list of operands. */
958 static VEC (oecount, heap) *cvec;
960 /* Hash function for oecount. */
962 static hashval_t
963 oecount_hash (const void *p)
965 const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
966 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
969 /* Comparison function for oecount. */
971 static int
972 oecount_eq (const void *p1, const void *p2)
974 const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
975 const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
976 return (c1->oecode == c2->oecode
977 && c1->op == c2->op);
980 /* Comparison function for qsort sorting oecount elements by count. */
982 static int
983 oecount_cmp (const void *p1, const void *p2)
985 const oecount *c1 = (const oecount *)p1;
986 const oecount *c2 = (const oecount *)p2;
987 if (c1->cnt != c2->cnt)
988 return c1->cnt - c2->cnt;
989 else
990 /* If counts are identical, use unique IDs to stabilize qsort. */
991 return c1->id - c2->id;
994 /* Return TRUE iff STMT represents a builtin call that raises OP
995 to some exponent. */
997 static bool
998 stmt_is_power_of_op (gimple stmt, tree op)
1000 tree fndecl;
1002 if (!is_gimple_call (stmt))
1003 return false;
1005 fndecl = gimple_call_fndecl (stmt);
1007 if (!fndecl
1008 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1009 return false;
1011 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1013 CASE_FLT_FN (BUILT_IN_POW):
1014 CASE_FLT_FN (BUILT_IN_POWI):
1015 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1017 default:
1018 return false;
1022 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1023 in place and return the result. Assumes that stmt_is_power_of_op
1024 was previously called for STMT and returned TRUE. */
1026 static HOST_WIDE_INT
1027 decrement_power (gimple stmt)
1029 REAL_VALUE_TYPE c, cint;
1030 HOST_WIDE_INT power;
1031 tree arg1;
1033 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1035 CASE_FLT_FN (BUILT_IN_POW):
1036 arg1 = gimple_call_arg (stmt, 1);
1037 c = TREE_REAL_CST (arg1);
1038 power = real_to_integer (&c) - 1;
1039 real_from_integer (&cint, VOIDmode, power, 0, 0);
1040 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1041 return power;
1043 CASE_FLT_FN (BUILT_IN_POWI):
1044 arg1 = gimple_call_arg (stmt, 1);
1045 power = TREE_INT_CST_LOW (arg1) - 1;
1046 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1047 return power;
1049 default:
1050 gcc_unreachable ();
1054 /* Find the single immediate use of STMT's LHS, and replace it
1055 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1056 replace *DEF with OP as well. */
1058 static void
1059 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1061 tree lhs;
1062 gimple use_stmt;
1063 use_operand_p use;
1064 gimple_stmt_iterator gsi;
1066 if (is_gimple_call (stmt))
1067 lhs = gimple_call_lhs (stmt);
1068 else
1069 lhs = gimple_assign_lhs (stmt);
1071 gcc_assert (has_single_use (lhs));
1072 single_imm_use (lhs, &use, &use_stmt);
1073 if (lhs == *def)
1074 *def = op;
1075 SET_USE (use, op);
1076 if (TREE_CODE (op) != SSA_NAME)
1077 update_stmt (use_stmt);
1078 gsi = gsi_for_stmt (stmt);
1079 gsi_remove (&gsi, true);
1080 release_defs (stmt);
1082 if (is_gimple_call (stmt))
1083 unlink_stmt_vdef (stmt);
1086 /* Walks the linear chain with result *DEF searching for an operation
1087 with operand OP and code OPCODE removing that from the chain. *DEF
1088 is updated if there is only one operand but no operation left. */
1090 static void
1091 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1093 gimple stmt = SSA_NAME_DEF_STMT (*def);
1097 tree name;
1099 if (opcode == MULT_EXPR
1100 && stmt_is_power_of_op (stmt, op))
1102 if (decrement_power (stmt) == 1)
1103 propagate_op_to_single_use (op, stmt, def);
1104 return;
1107 name = gimple_assign_rhs1 (stmt);
1109 /* If this is the operation we look for and one of the operands
1110 is ours simply propagate the other operand into the stmts
1111 single use. */
1112 if (gimple_assign_rhs_code (stmt) == opcode
1113 && (name == op
1114 || gimple_assign_rhs2 (stmt) == op))
1116 if (name == op)
1117 name = gimple_assign_rhs2 (stmt);
1118 propagate_op_to_single_use (name, stmt, def);
1119 return;
1122 /* We might have a multiply of two __builtin_pow* calls, and
1123 the operand might be hiding in the rightmost one. */
1124 if (opcode == MULT_EXPR
1125 && gimple_assign_rhs_code (stmt) == opcode
1126 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1128 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1129 if (stmt_is_power_of_op (stmt2, op))
1131 if (decrement_power (stmt2) == 1)
1132 propagate_op_to_single_use (op, stmt2, def);
1133 return;
1137 /* Continue walking the chain. */
1138 gcc_assert (name != op
1139 && TREE_CODE (name) == SSA_NAME);
1140 stmt = SSA_NAME_DEF_STMT (name);
1142 while (1);
1145 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1146 the result. Places the statement after the definition of either
1147 OP1 or OP2. Returns the new statement. */
1149 static gimple
1150 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
1152 gimple op1def = NULL, op2def = NULL;
1153 gimple_stmt_iterator gsi;
1154 tree op;
1155 gimple sum;
1157 /* Create the addition statement. */
1158 sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
1159 op = make_ssa_name (tmpvar, sum);
1160 gimple_assign_set_lhs (sum, op);
1162 /* Find an insertion place and insert. */
1163 if (TREE_CODE (op1) == SSA_NAME)
1164 op1def = SSA_NAME_DEF_STMT (op1);
1165 if (TREE_CODE (op2) == SSA_NAME)
1166 op2def = SSA_NAME_DEF_STMT (op2);
1167 if ((!op1def || gimple_nop_p (op1def))
1168 && (!op2def || gimple_nop_p (op2def)))
1170 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1171 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1173 else if ((!op1def || gimple_nop_p (op1def))
1174 || (op2def && !gimple_nop_p (op2def)
1175 && stmt_dominates_stmt_p (op1def, op2def)))
1177 if (gimple_code (op2def) == GIMPLE_PHI)
1179 gsi = gsi_after_labels (gimple_bb (op2def));
1180 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1182 else
1184 if (!stmt_ends_bb_p (op2def))
1186 gsi = gsi_for_stmt (op2def);
1187 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1189 else
1191 edge e;
1192 edge_iterator ei;
1194 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1195 if (e->flags & EDGE_FALLTHRU)
1196 gsi_insert_on_edge_immediate (e, sum);
1200 else
1202 if (gimple_code (op1def) == GIMPLE_PHI)
1204 gsi = gsi_after_labels (gimple_bb (op1def));
1205 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1207 else
1209 if (!stmt_ends_bb_p (op1def))
1211 gsi = gsi_for_stmt (op1def);
1212 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1214 else
1216 edge e;
1217 edge_iterator ei;
1219 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1220 if (e->flags & EDGE_FALLTHRU)
1221 gsi_insert_on_edge_immediate (e, sum);
1225 update_stmt (sum);
1227 return sum;
1230 /* Perform un-distribution of divisions and multiplications.
1231 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1232 to (A + B) / X for real X.
1234 The algorithm is organized as follows.
1236 - First we walk the addition chain *OPS looking for summands that
1237 are defined by a multiplication or a real division. This results
1238 in the candidates bitmap with relevant indices into *OPS.
1240 - Second we build the chains of multiplications or divisions for
1241 these candidates, counting the number of occurrences of (operand, code)
1242 pairs in all of the candidates chains.
1244 - Third we sort the (operand, code) pairs by number of occurrence and
1245 process them starting with the pair with the most uses.
1247 * For each such pair we walk the candidates again to build a
1248 second candidate bitmap noting all multiplication/division chains
1249 that have at least one occurrence of (operand, code).
1251 * We build an alternate addition chain only covering these
1252 candidates with one (operand, code) operation removed from their
1253 multiplication/division chain.
1255 * The first candidate gets replaced by the alternate addition chain
1256 multiplied/divided by the operand.
1258 * All candidate chains get disabled for further processing and
1259 processing of (operand, code) pairs continues.
1261 The alternate addition chains built are re-processed by the main
1262 reassociation algorithm which allows optimizing a * x * y + b * y * x
1263 to (a + b ) * x * y in one invocation of the reassociation pass. */
1265 static bool
1266 undistribute_ops_list (enum tree_code opcode,
1267 VEC (operand_entry_t, heap) **ops, struct loop *loop)
1269 unsigned int length = VEC_length (operand_entry_t, *ops);
1270 operand_entry_t oe1;
1271 unsigned i, j;
1272 sbitmap candidates, candidates2;
1273 unsigned nr_candidates, nr_candidates2;
1274 sbitmap_iterator sbi0;
1275 VEC (operand_entry_t, heap) **subops;
1276 htab_t ctable;
1277 bool changed = false;
1278 int next_oecount_id = 0;
1280 if (length <= 1
1281 || opcode != PLUS_EXPR)
1282 return false;
1284 /* Build a list of candidates to process. */
1285 candidates = sbitmap_alloc (length);
1286 sbitmap_zero (candidates);
1287 nr_candidates = 0;
1288 FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
1290 enum tree_code dcode;
1291 gimple oe1def;
1293 if (TREE_CODE (oe1->op) != SSA_NAME)
1294 continue;
1295 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1296 if (!is_gimple_assign (oe1def))
1297 continue;
1298 dcode = gimple_assign_rhs_code (oe1def);
1299 if ((dcode != MULT_EXPR
1300 && dcode != RDIV_EXPR)
1301 || !is_reassociable_op (oe1def, dcode, loop))
1302 continue;
1304 SET_BIT (candidates, i);
1305 nr_candidates++;
1308 if (nr_candidates < 2)
1310 sbitmap_free (candidates);
1311 return false;
1314 if (dump_file && (dump_flags & TDF_DETAILS))
1316 fprintf (dump_file, "searching for un-distribute opportunities ");
1317 print_generic_expr (dump_file,
1318 VEC_index (operand_entry_t, *ops,
1319 sbitmap_first_set_bit (candidates))->op, 0);
1320 fprintf (dump_file, " %d\n", nr_candidates);
1323 /* Build linearized sub-operand lists and the counting table. */
1324 cvec = NULL;
1325 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1326 subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1327 VEC_length (operand_entry_t, *ops));
1328 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1330 gimple oedef;
1331 enum tree_code oecode;
1332 unsigned j;
1334 oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1335 oecode = gimple_assign_rhs_code (oedef);
1336 linearize_expr_tree (&subops[i], oedef,
1337 associative_tree_code (oecode), false);
1339 FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1341 oecount c;
1342 void **slot;
1343 size_t idx;
1344 c.oecode = oecode;
1345 c.cnt = 1;
1346 c.id = next_oecount_id++;
1347 c.op = oe1->op;
1348 VEC_safe_push (oecount, heap, cvec, &c);
1349 idx = VEC_length (oecount, cvec) + 41;
1350 slot = htab_find_slot (ctable, (void *)idx, INSERT);
1351 if (!*slot)
1353 *slot = (void *)idx;
1355 else
1357 VEC_pop (oecount, cvec);
1358 VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1362 htab_delete (ctable);
1364 /* Sort the counting table. */
1365 VEC_qsort (oecount, cvec, oecount_cmp);
1367 if (dump_file && (dump_flags & TDF_DETAILS))
1369 oecount *c;
1370 fprintf (dump_file, "Candidates:\n");
1371 FOR_EACH_VEC_ELT (oecount, cvec, j, c)
1373 fprintf (dump_file, " %u %s: ", c->cnt,
1374 c->oecode == MULT_EXPR
1375 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1376 print_generic_expr (dump_file, c->op, 0);
1377 fprintf (dump_file, "\n");
1381 /* Process the (operand, code) pairs in order of most occurence. */
1382 candidates2 = sbitmap_alloc (length);
1383 while (!VEC_empty (oecount, cvec))
1385 oecount *c = VEC_last (oecount, cvec);
1386 if (c->cnt < 2)
1387 break;
1389 /* Now collect the operands in the outer chain that contain
1390 the common operand in their inner chain. */
1391 sbitmap_zero (candidates2);
1392 nr_candidates2 = 0;
1393 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1395 gimple oedef;
1396 enum tree_code oecode;
1397 unsigned j;
1398 tree op = VEC_index (operand_entry_t, *ops, i)->op;
1400 /* If we undistributed in this chain already this may be
1401 a constant. */
1402 if (TREE_CODE (op) != SSA_NAME)
1403 continue;
1405 oedef = SSA_NAME_DEF_STMT (op);
1406 oecode = gimple_assign_rhs_code (oedef);
1407 if (oecode != c->oecode)
1408 continue;
1410 FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1412 if (oe1->op == c->op)
1414 SET_BIT (candidates2, i);
1415 ++nr_candidates2;
1416 break;
1421 if (nr_candidates2 >= 2)
1423 operand_entry_t oe1, oe2;
1424 tree tmpvar;
1425 gimple prod;
1426 int first = sbitmap_first_set_bit (candidates2);
1428 /* Build the new addition chain. */
1429 oe1 = VEC_index (operand_entry_t, *ops, first);
1430 if (dump_file && (dump_flags & TDF_DETAILS))
1432 fprintf (dump_file, "Building (");
1433 print_generic_expr (dump_file, oe1->op, 0);
1435 tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
1436 add_referenced_var (tmpvar);
1437 zero_one_operation (&oe1->op, c->oecode, c->op);
1438 EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1440 gimple sum;
1441 oe2 = VEC_index (operand_entry_t, *ops, i);
1442 if (dump_file && (dump_flags & TDF_DETAILS))
1444 fprintf (dump_file, " + ");
1445 print_generic_expr (dump_file, oe2->op, 0);
1447 zero_one_operation (&oe2->op, c->oecode, c->op);
1448 sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1449 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1450 oe2->rank = 0;
1451 oe1->op = gimple_get_lhs (sum);
1454 /* Apply the multiplication/division. */
1455 prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1456 if (dump_file && (dump_flags & TDF_DETAILS))
1458 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1459 print_generic_expr (dump_file, c->op, 0);
1460 fprintf (dump_file, "\n");
1463 /* Record it in the addition chain and disable further
1464 undistribution with this op. */
1465 oe1->op = gimple_assign_lhs (prod);
1466 oe1->rank = get_rank (oe1->op);
1467 VEC_free (operand_entry_t, heap, subops[first]);
1469 changed = true;
1472 VEC_pop (oecount, cvec);
1475 for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1476 VEC_free (operand_entry_t, heap, subops[i]);
1477 free (subops);
1478 VEC_free (oecount, heap, cvec);
1479 sbitmap_free (candidates);
1480 sbitmap_free (candidates2);
1482 return changed;
1485 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1486 expression, examine the other OPS to see if any of them are comparisons
1487 of the same values, which we may be able to combine or eliminate.
1488 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1490 static bool
1491 eliminate_redundant_comparison (enum tree_code opcode,
1492 VEC (operand_entry_t, heap) **ops,
1493 unsigned int currindex,
1494 operand_entry_t curr)
1496 tree op1, op2;
1497 enum tree_code lcode, rcode;
1498 gimple def1, def2;
1499 int i;
1500 operand_entry_t oe;
1502 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1503 return false;
1505 /* Check that CURR is a comparison. */
1506 if (TREE_CODE (curr->op) != SSA_NAME)
1507 return false;
1508 def1 = SSA_NAME_DEF_STMT (curr->op);
1509 if (!is_gimple_assign (def1))
1510 return false;
1511 lcode = gimple_assign_rhs_code (def1);
1512 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1513 return false;
1514 op1 = gimple_assign_rhs1 (def1);
1515 op2 = gimple_assign_rhs2 (def1);
1517 /* Now look for a similar comparison in the remaining OPS. */
1518 for (i = currindex + 1;
1519 VEC_iterate (operand_entry_t, *ops, i, oe);
1520 i++)
1522 tree t;
1524 if (TREE_CODE (oe->op) != SSA_NAME)
1525 continue;
1526 def2 = SSA_NAME_DEF_STMT (oe->op);
1527 if (!is_gimple_assign (def2))
1528 continue;
1529 rcode = gimple_assign_rhs_code (def2);
1530 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1531 continue;
1533 /* If we got here, we have a match. See if we can combine the
1534 two comparisons. */
1535 if (opcode == BIT_IOR_EXPR)
1536 t = maybe_fold_or_comparisons (lcode, op1, op2,
1537 rcode, gimple_assign_rhs1 (def2),
1538 gimple_assign_rhs2 (def2));
1539 else
1540 t = maybe_fold_and_comparisons (lcode, op1, op2,
1541 rcode, gimple_assign_rhs1 (def2),
1542 gimple_assign_rhs2 (def2));
1543 if (!t)
1544 continue;
1546 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1547 always give us a boolean_type_node value back. If the original
1548 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1549 we need to convert. */
1550 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1551 t = fold_convert (TREE_TYPE (curr->op), t);
1553 if (TREE_CODE (t) != INTEGER_CST
1554 && !operand_equal_p (t, curr->op, 0))
1556 enum tree_code subcode;
1557 tree newop1, newop2;
1558 if (!COMPARISON_CLASS_P (t))
1559 continue;
1560 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1561 STRIP_USELESS_TYPE_CONVERSION (newop1);
1562 STRIP_USELESS_TYPE_CONVERSION (newop2);
1563 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1564 continue;
1567 if (dump_file && (dump_flags & TDF_DETAILS))
1569 fprintf (dump_file, "Equivalence: ");
1570 print_generic_expr (dump_file, curr->op, 0);
1571 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1572 print_generic_expr (dump_file, oe->op, 0);
1573 fprintf (dump_file, " -> ");
1574 print_generic_expr (dump_file, t, 0);
1575 fprintf (dump_file, "\n");
1578 /* Now we can delete oe, as it has been subsumed by the new combined
1579 expression t. */
1580 VEC_ordered_remove (operand_entry_t, *ops, i);
1581 reassociate_stats.ops_eliminated ++;
1583 /* If t is the same as curr->op, we're done. Otherwise we must
1584 replace curr->op with t. Special case is if we got a constant
1585 back, in which case we add it to the end instead of in place of
1586 the current entry. */
1587 if (TREE_CODE (t) == INTEGER_CST)
1589 VEC_ordered_remove (operand_entry_t, *ops, currindex);
1590 add_to_ops_vec (ops, t);
1592 else if (!operand_equal_p (t, curr->op, 0))
1594 tree tmpvar;
1595 gimple sum;
1596 enum tree_code subcode;
1597 tree newop1;
1598 tree newop2;
1599 gcc_assert (COMPARISON_CLASS_P (t));
1600 tmpvar = create_tmp_var (TREE_TYPE (t), NULL);
1601 add_referenced_var (tmpvar);
1602 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1603 STRIP_USELESS_TYPE_CONVERSION (newop1);
1604 STRIP_USELESS_TYPE_CONVERSION (newop2);
1605 gcc_checking_assert (is_gimple_val (newop1)
1606 && is_gimple_val (newop2));
1607 sum = build_and_add_sum (tmpvar, newop1, newop2, subcode);
1608 curr->op = gimple_get_lhs (sum);
1610 return true;
1613 return false;
1616 /* Perform various identities and other optimizations on the list of
1617 operand entries, stored in OPS. The tree code for the binary
1618 operation between all the operands is OPCODE. */
1620 static void
1621 optimize_ops_list (enum tree_code opcode,
1622 VEC (operand_entry_t, heap) **ops)
1624 unsigned int length = VEC_length (operand_entry_t, *ops);
1625 unsigned int i;
1626 operand_entry_t oe;
1627 operand_entry_t oelast = NULL;
1628 bool iterate = false;
1630 if (length == 1)
1631 return;
1633 oelast = VEC_last (operand_entry_t, *ops);
1635 /* If the last two are constants, pop the constants off, merge them
1636 and try the next two. */
1637 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1639 operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1641 if (oelm1->rank == 0
1642 && is_gimple_min_invariant (oelm1->op)
1643 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1644 TREE_TYPE (oelast->op)))
1646 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1647 oelm1->op, oelast->op);
1649 if (folded && is_gimple_min_invariant (folded))
1651 if (dump_file && (dump_flags & TDF_DETAILS))
1652 fprintf (dump_file, "Merging constants\n");
1654 VEC_pop (operand_entry_t, *ops);
1655 VEC_pop (operand_entry_t, *ops);
1657 add_to_ops_vec (ops, folded);
1658 reassociate_stats.constants_eliminated++;
1660 optimize_ops_list (opcode, ops);
1661 return;
1666 eliminate_using_constants (opcode, ops);
1667 oelast = NULL;
1669 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1671 bool done = false;
1673 if (eliminate_not_pairs (opcode, ops, i, oe))
1674 return;
1675 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1676 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1677 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1679 if (done)
1680 return;
1681 iterate = true;
1682 oelast = NULL;
1683 continue;
1685 oelast = oe;
1686 i++;
1689 length = VEC_length (operand_entry_t, *ops);
1690 oelast = VEC_last (operand_entry_t, *ops);
1692 if (iterate)
1693 optimize_ops_list (opcode, ops);
1696 /* The following functions are subroutines to optimize_range_tests and allow
1697 it to try to change a logical combination of comparisons into a range
1698 test.
1700 For example, both
1701 X == 2 || X == 5 || X == 3 || X == 4
1703 X >= 2 && X <= 5
1704 are converted to
1705 (unsigned) (X - 2) <= 3
1707 For more information see comments above fold_test_range in fold-const.c,
1708 this implementation is for GIMPLE. */
1710 struct range_entry
1712 tree exp;
1713 tree low;
1714 tree high;
1715 bool in_p;
1716 bool strict_overflow_p;
1717 unsigned int idx, next;
1720 /* This is similar to make_range in fold-const.c, but on top of
1721 GIMPLE instead of trees. */
1723 static void
1724 init_range_entry (struct range_entry *r, tree exp)
1726 int in_p;
1727 tree low, high;
1728 bool is_bool, strict_overflow_p;
1730 r->exp = NULL_TREE;
1731 r->in_p = false;
1732 r->strict_overflow_p = false;
1733 r->low = NULL_TREE;
1734 r->high = NULL_TREE;
1735 if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
1736 return;
1738 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1739 and see if we can refine the range. Some of the cases below may not
1740 happen, but it doesn't seem worth worrying about this. We "continue"
1741 the outer loop when we've changed something; otherwise we "break"
1742 the switch, which will "break" the while. */
1743 low = build_int_cst (TREE_TYPE (exp), 0);
1744 high = low;
1745 in_p = 0;
1746 strict_overflow_p = false;
1747 is_bool = false;
1748 if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1750 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1751 is_bool = true;
1752 else
1753 return;
1755 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1756 is_bool = true;
1758 while (1)
1760 gimple stmt;
1761 enum tree_code code;
1762 tree arg0, arg1, exp_type;
1763 tree nexp;
1764 location_t loc;
1766 if (TREE_CODE (exp) != SSA_NAME)
1767 break;
1769 stmt = SSA_NAME_DEF_STMT (exp);
1770 if (!is_gimple_assign (stmt))
1771 break;
1773 code = gimple_assign_rhs_code (stmt);
1774 arg0 = gimple_assign_rhs1 (stmt);
1775 if (TREE_CODE (arg0) != SSA_NAME)
1776 break;
1777 arg1 = gimple_assign_rhs2 (stmt);
1778 exp_type = TREE_TYPE (exp);
1779 loc = gimple_location (stmt);
1780 switch (code)
1782 case BIT_NOT_EXPR:
1783 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1785 in_p = !in_p;
1786 exp = arg0;
1787 continue;
1789 break;
1790 case SSA_NAME:
1791 exp = arg0;
1792 continue;
1793 CASE_CONVERT:
1794 if (is_bool)
1795 goto do_default;
1796 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1798 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1799 is_bool = true;
1800 else
1801 return;
1803 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1804 is_bool = true;
1805 goto do_default;
1806 case EQ_EXPR:
1807 case NE_EXPR:
1808 case LT_EXPR:
1809 case LE_EXPR:
1810 case GE_EXPR:
1811 case GT_EXPR:
1812 is_bool = true;
1813 /* FALLTHRU */
1814 default:
1815 if (!is_bool)
1816 return;
1817 do_default:
1818 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1819 &low, &high, &in_p,
1820 &strict_overflow_p);
1821 if (nexp != NULL_TREE)
1823 exp = nexp;
1824 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1825 continue;
1827 break;
1829 break;
1831 if (is_bool)
1833 r->exp = exp;
1834 r->in_p = in_p;
1835 r->low = low;
1836 r->high = high;
1837 r->strict_overflow_p = strict_overflow_p;
1841 /* Comparison function for qsort. Sort entries
1842 without SSA_NAME exp first, then with SSA_NAMEs sorted
1843 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1844 by increasing ->low and if ->low is the same, by increasing
1845 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1846 maximum. */
1848 static int
1849 range_entry_cmp (const void *a, const void *b)
1851 const struct range_entry *p = (const struct range_entry *) a;
1852 const struct range_entry *q = (const struct range_entry *) b;
1854 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1856 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1858 /* Group range_entries for the same SSA_NAME together. */
1859 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1860 return -1;
1861 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1862 return 1;
1863 /* If ->low is different, NULL low goes first, then by
1864 ascending low. */
1865 if (p->low != NULL_TREE)
1867 if (q->low != NULL_TREE)
1869 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1870 p->low, q->low);
1871 if (tem && integer_onep (tem))
1872 return -1;
1873 tem = fold_binary (GT_EXPR, boolean_type_node,
1874 p->low, q->low);
1875 if (tem && integer_onep (tem))
1876 return 1;
1878 else
1879 return 1;
1881 else if (q->low != NULL_TREE)
1882 return -1;
1883 /* If ->high is different, NULL high goes last, before that by
1884 ascending high. */
1885 if (p->high != NULL_TREE)
1887 if (q->high != NULL_TREE)
1889 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1890 p->high, q->high);
1891 if (tem && integer_onep (tem))
1892 return -1;
1893 tem = fold_binary (GT_EXPR, boolean_type_node,
1894 p->high, q->high);
1895 if (tem && integer_onep (tem))
1896 return 1;
1898 else
1899 return -1;
1901 else if (p->high != NULL_TREE)
1902 return 1;
1903 /* If both ranges are the same, sort below by ascending idx. */
1905 else
1906 return 1;
1908 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1909 return -1;
1911 if (p->idx < q->idx)
1912 return -1;
1913 else
1915 gcc_checking_assert (p->idx > q->idx);
1916 return 1;
1920 /* Helper routine of optimize_range_test.
1921 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1922 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1923 OPCODE and OPS are arguments of optimize_range_tests. Return
1924 true if the range merge has been successful. */
1926 static bool
1927 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1928 unsigned int count, enum tree_code opcode,
1929 VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
1930 tree low, tree high, bool strict_overflow_p)
1932 tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
1933 location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
1934 tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
1935 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1936 gimple_stmt_iterator gsi;
1938 if (tem == NULL_TREE)
1939 return false;
1941 if (strict_overflow_p && issue_strict_overflow_warning (wc))
1942 warning_at (loc, OPT_Wstrict_overflow,
1943 "assuming signed overflow does not occur "
1944 "when simplifying range test");
1946 if (dump_file && (dump_flags & TDF_DETAILS))
1948 struct range_entry *r;
1949 fprintf (dump_file, "Optimizing range tests ");
1950 print_generic_expr (dump_file, range->exp, 0);
1951 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1952 print_generic_expr (dump_file, range->low, 0);
1953 fprintf (dump_file, ", ");
1954 print_generic_expr (dump_file, range->high, 0);
1955 fprintf (dump_file, "]");
1956 for (r = otherrange; r < otherrange + count; r++)
1958 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1959 print_generic_expr (dump_file, r->low, 0);
1960 fprintf (dump_file, ", ");
1961 print_generic_expr (dump_file, r->high, 0);
1962 fprintf (dump_file, "]");
1964 fprintf (dump_file, "\n into ");
1965 print_generic_expr (dump_file, tem, 0);
1966 fprintf (dump_file, "\n");
1969 if (opcode == BIT_IOR_EXPR)
1970 tem = invert_truthvalue_loc (loc, tem);
1972 tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
1973 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
1974 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1975 GSI_SAME_STMT);
1977 VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
1978 range->exp = exp;
1979 range->low = low;
1980 range->high = high;
1981 range->in_p = in_p;
1982 range->strict_overflow_p = false;
1984 for (range = otherrange; range < otherrange + count; range++)
1986 VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
1987 range->exp = NULL_TREE;
1989 return true;
1992 /* Optimize range tests, similarly how fold_range_test optimizes
1993 it on trees. The tree code for the binary
1994 operation between all the operands is OPCODE. */
1996 static void
1997 optimize_range_tests (enum tree_code opcode,
1998 VEC (operand_entry_t, heap) **ops)
2000 unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
2001 operand_entry_t oe;
2002 struct range_entry *ranges;
2003 bool any_changes = false;
2005 if (length == 1)
2006 return;
2008 ranges = XNEWVEC (struct range_entry, length);
2009 for (i = 0; i < length; i++)
2011 ranges[i].idx = i;
2012 init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op);
2013 /* For | invert it now, we will invert it again before emitting
2014 the optimized expression. */
2015 if (opcode == BIT_IOR_EXPR)
2016 ranges[i].in_p = !ranges[i].in_p;
2019 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2020 for (i = 0; i < length; i++)
2021 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2022 break;
2024 /* Try to merge ranges. */
2025 for (first = i; i < length; i++)
2027 tree low = ranges[i].low;
2028 tree high = ranges[i].high;
2029 int in_p = ranges[i].in_p;
2030 bool strict_overflow_p = ranges[i].strict_overflow_p;
2031 int update_fail_count = 0;
2033 for (j = i + 1; j < length; j++)
2035 if (ranges[i].exp != ranges[j].exp)
2036 break;
2037 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2038 ranges[j].in_p, ranges[j].low, ranges[j].high))
2039 break;
2040 strict_overflow_p |= ranges[j].strict_overflow_p;
2043 if (j == i + 1)
2044 continue;
2046 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2047 ops, ranges[i].exp, in_p, low, high,
2048 strict_overflow_p))
2050 i = j - 1;
2051 any_changes = true;
2053 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2054 while update_range_test would fail. */
2055 else if (update_fail_count == 64)
2056 i = j - 1;
2057 else
2058 ++update_fail_count;
2061 /* Optimize X == CST1 || X == CST2
2062 if popcount (CST1 ^ CST2) == 1 into
2063 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2064 Similarly for ranges. E.g.
2065 X != 2 && X != 3 && X != 10 && X != 11
2066 will be transformed by the above loop into
2067 (X - 2U) <= 1U && (X - 10U) <= 1U
2068 and this loop can transform that into
2069 ((X & ~8) - 2U) <= 1U. */
2070 for (i = first; i < length; i++)
2072 tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2074 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2075 continue;
2076 type = TREE_TYPE (ranges[i].exp);
2077 if (!INTEGRAL_TYPE_P (type))
2078 continue;
2079 lowi = ranges[i].low;
2080 if (lowi == NULL_TREE)
2081 lowi = TYPE_MIN_VALUE (type);
2082 highi = ranges[i].high;
2083 if (highi == NULL_TREE)
2084 continue;
2085 for (j = i + 1; j < length && j < i + 64; j++)
2087 if (ranges[j].exp == NULL_TREE)
2088 continue;
2089 if (ranges[i].exp != ranges[j].exp)
2090 break;
2091 if (ranges[j].in_p)
2092 continue;
2093 lowj = ranges[j].low;
2094 if (lowj == NULL_TREE)
2095 continue;
2096 highj = ranges[j].high;
2097 if (highj == NULL_TREE)
2098 highj = TYPE_MAX_VALUE (type);
2099 tem = fold_binary (GT_EXPR, boolean_type_node,
2100 lowj, highi);
2101 if (tem == NULL_TREE || !integer_onep (tem))
2102 continue;
2103 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2104 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2105 continue;
2106 gcc_checking_assert (!integer_zerop (lowxor));
2107 tem = fold_binary (MINUS_EXPR, type, lowxor,
2108 build_int_cst (type, 1));
2109 if (tem == NULL_TREE)
2110 continue;
2111 tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2112 if (tem == NULL_TREE || !integer_zerop (tem))
2113 continue;
2114 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2115 if (!tree_int_cst_equal (lowxor, highxor))
2116 continue;
2117 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2118 exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2119 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2120 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2121 if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2122 ranges[i].in_p, lowj, highj,
2123 ranges[i].strict_overflow_p
2124 || ranges[j].strict_overflow_p))
2126 any_changes = true;
2127 break;
2132 if (any_changes)
2134 j = 0;
2135 FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
2137 if (oe->op == error_mark_node)
2138 continue;
2139 else if (i != j)
2140 VEC_replace (operand_entry_t, *ops, j, oe);
2141 j++;
2143 VEC_truncate (operand_entry_t, *ops, j);
2146 XDELETEVEC (ranges);
2149 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2150 of STMT in it's operands. This is also known as a "destructive
2151 update" operation. */
2153 static bool
2154 is_phi_for_stmt (gimple stmt, tree operand)
2156 gimple def_stmt;
2157 tree lhs;
2158 use_operand_p arg_p;
2159 ssa_op_iter i;
2161 if (TREE_CODE (operand) != SSA_NAME)
2162 return false;
2164 lhs = gimple_assign_lhs (stmt);
2166 def_stmt = SSA_NAME_DEF_STMT (operand);
2167 if (gimple_code (def_stmt) != GIMPLE_PHI)
2168 return false;
2170 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2171 if (lhs == USE_FROM_PTR (arg_p))
2172 return true;
2173 return false;
2176 /* Remove def stmt of VAR if VAR has zero uses and recurse
2177 on rhs1 operand if so. */
2179 static void
2180 remove_visited_stmt_chain (tree var)
2182 gimple stmt;
2183 gimple_stmt_iterator gsi;
2185 while (1)
2187 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2188 return;
2189 stmt = SSA_NAME_DEF_STMT (var);
2190 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2192 var = gimple_assign_rhs1 (stmt);
2193 gsi = gsi_for_stmt (stmt);
2194 gsi_remove (&gsi, true);
2195 release_defs (stmt);
2197 else
2198 return;
2202 /* This function checks three consequtive operands in
2203 passed operands vector OPS starting from OPINDEX and
2204 swaps two operands if it is profitable for binary operation
2205 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2207 We pair ops with the same rank if possible.
2209 The alternative we try is to see if STMT is a destructive
2210 update style statement, which is like:
2211 b = phi (a, ...)
2212 a = c + b;
2213 In that case, we want to use the destructive update form to
2214 expose the possible vectorizer sum reduction opportunity.
2215 In that case, the third operand will be the phi node. This
2216 check is not performed if STMT is null.
2218 We could, of course, try to be better as noted above, and do a
2219 lot of work to try to find these opportunities in >3 operand
2220 cases, but it is unlikely to be worth it. */
2222 static void
2223 swap_ops_for_binary_stmt (VEC(operand_entry_t, heap) * ops,
2224 unsigned int opindex, gimple stmt)
2226 operand_entry_t oe1, oe2, oe3;
2228 oe1 = VEC_index (operand_entry_t, ops, opindex);
2229 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2230 oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
2232 if ((oe1->rank == oe2->rank
2233 && oe2->rank != oe3->rank)
2234 || (stmt && is_phi_for_stmt (stmt, oe3->op)
2235 && !is_phi_for_stmt (stmt, oe1->op)
2236 && !is_phi_for_stmt (stmt, oe2->op)))
2238 struct operand_entry temp = *oe3;
2239 oe3->op = oe1->op;
2240 oe3->rank = oe1->rank;
2241 oe1->op = temp.op;
2242 oe1->rank= temp.rank;
2244 else if ((oe1->rank == oe3->rank
2245 && oe2->rank != oe3->rank)
2246 || (stmt && is_phi_for_stmt (stmt, oe2->op)
2247 && !is_phi_for_stmt (stmt, oe1->op)
2248 && !is_phi_for_stmt (stmt, oe3->op)))
2250 struct operand_entry temp = *oe2;
2251 oe2->op = oe1->op;
2252 oe2->rank = oe1->rank;
2253 oe1->op = temp.op;
2254 oe1->rank= temp.rank;
2258 /* Recursively rewrite our linearized statements so that the operators
2259 match those in OPS[OPINDEX], putting the computation in rank
2260 order. */
2262 static void
2263 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2264 VEC(operand_entry_t, heap) * ops, bool moved)
2266 tree rhs1 = gimple_assign_rhs1 (stmt);
2267 tree rhs2 = gimple_assign_rhs2 (stmt);
2268 operand_entry_t oe;
2270 /* If we have three operands left, then we want to make sure the ones
2271 that get the double binary op are chosen wisely. */
2272 if (opindex + 3 == VEC_length (operand_entry_t, ops))
2273 swap_ops_for_binary_stmt (ops, opindex, stmt);
2275 /* The final recursion case for this function is that you have
2276 exactly two operations left.
2277 If we had one exactly one op in the entire list to start with, we
2278 would have never called this function, and the tail recursion
2279 rewrites them one at a time. */
2280 if (opindex + 2 == VEC_length (operand_entry_t, ops))
2282 operand_entry_t oe1, oe2;
2284 oe1 = VEC_index (operand_entry_t, ops, opindex);
2285 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2287 if (rhs1 != oe1->op || rhs2 != oe2->op)
2289 if (dump_file && (dump_flags & TDF_DETAILS))
2291 fprintf (dump_file, "Transforming ");
2292 print_gimple_stmt (dump_file, stmt, 0, 0);
2295 gimple_assign_set_rhs1 (stmt, oe1->op);
2296 gimple_assign_set_rhs2 (stmt, oe2->op);
2297 update_stmt (stmt);
2298 if (rhs1 != oe1->op && rhs1 != oe2->op)
2299 remove_visited_stmt_chain (rhs1);
2301 if (dump_file && (dump_flags & TDF_DETAILS))
2303 fprintf (dump_file, " into ");
2304 print_gimple_stmt (dump_file, stmt, 0, 0);
2307 return;
2310 /* If we hit here, we should have 3 or more ops left. */
2311 gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
2313 /* Rewrite the next operator. */
2314 oe = VEC_index (operand_entry_t, ops, opindex);
2316 if (oe->op != rhs2)
2318 if (!moved)
2320 gimple_stmt_iterator gsinow, gsirhs1;
2321 gimple stmt1 = stmt, stmt2;
2322 unsigned int count;
2324 gsinow = gsi_for_stmt (stmt);
2325 count = VEC_length (operand_entry_t, ops) - opindex - 2;
2326 while (count-- != 0)
2328 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2329 gsirhs1 = gsi_for_stmt (stmt2);
2330 gsi_move_before (&gsirhs1, &gsinow);
2331 gsi_prev (&gsinow);
2332 stmt1 = stmt2;
2334 moved = true;
2337 if (dump_file && (dump_flags & TDF_DETAILS))
2339 fprintf (dump_file, "Transforming ");
2340 print_gimple_stmt (dump_file, stmt, 0, 0);
2343 gimple_assign_set_rhs2 (stmt, oe->op);
2344 update_stmt (stmt);
2346 if (dump_file && (dump_flags & TDF_DETAILS))
2348 fprintf (dump_file, " into ");
2349 print_gimple_stmt (dump_file, stmt, 0, 0);
2352 /* Recurse on the LHS of the binary operator, which is guaranteed to
2353 be the non-leaf side. */
2354 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2357 /* Find out how many cycles we need to compute statements chain.
2358 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
2359 maximum number of independent statements we may execute per cycle. */
2361 static int
2362 get_required_cycles (int ops_num, int cpu_width)
2364 int res;
2365 int elog;
2366 unsigned int rest;
2368 /* While we have more than 2 * cpu_width operands
2369 we may reduce number of operands by cpu_width
2370 per cycle. */
2371 res = ops_num / (2 * cpu_width);
2373 /* Remained operands count may be reduced twice per cycle
2374 until we have only one operand. */
2375 rest = (unsigned)(ops_num - res * cpu_width);
2376 elog = exact_log2 (rest);
2377 if (elog >= 0)
2378 res += elog;
2379 else
2380 res += floor_log2 (rest) + 1;
2382 return res;
2385 /* Returns an optimal number of registers to use for computation of
2386 given statements. */
2388 static int
2389 get_reassociation_width (int ops_num, enum tree_code opc,
2390 enum machine_mode mode)
2392 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2393 int width;
2394 int width_min;
2395 int cycles_best;
2397 if (param_width > 0)
2398 width = param_width;
2399 else
2400 width = targetm.sched.reassociation_width (opc, mode);
2402 if (width == 1)
2403 return width;
2405 /* Get the minimal time required for sequence computation. */
2406 cycles_best = get_required_cycles (ops_num, width);
2408 /* Check if we may use less width and still compute sequence for
2409 the same time. It will allow us to reduce registers usage.
2410 get_required_cycles is monotonically increasing with lower width
2411 so we can perform a binary search for the minimal width that still
2412 results in the optimal cycle count. */
2413 width_min = 1;
2414 while (width > width_min)
2416 int width_mid = (width + width_min) / 2;
2418 if (get_required_cycles (ops_num, width_mid) == cycles_best)
2419 width = width_mid;
2420 else if (width_min < width_mid)
2421 width_min = width_mid;
2422 else
2423 break;
2426 return width;
2429 /* Recursively rewrite our linearized statements so that the operators
2430 match those in OPS[OPINDEX], putting the computation in rank
2431 order and trying to allow operations to be executed in
2432 parallel. */
2434 static void
2435 rewrite_expr_tree_parallel (gimple stmt, int width,
2436 VEC(operand_entry_t, heap) * ops)
2438 enum tree_code opcode = gimple_assign_rhs_code (stmt);
2439 int op_num = VEC_length (operand_entry_t, ops);
2440 int stmt_num = op_num - 1;
2441 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
2442 int op_index = op_num - 1;
2443 int stmt_index = 0;
2444 int ready_stmts_end = 0;
2445 int i = 0;
2446 tree last_rhs1 = gimple_assign_rhs1 (stmt);
2447 tree lhs_var;
2449 /* We start expression rewriting from the top statements.
2450 So, in this loop we create a full list of statements
2451 we will work with. */
2452 stmts[stmt_num - 1] = stmt;
2453 for (i = stmt_num - 2; i >= 0; i--)
2454 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
2456 lhs_var = create_tmp_reg (TREE_TYPE (last_rhs1), NULL);
2457 add_referenced_var (lhs_var);
2459 for (i = 0; i < stmt_num; i++)
2461 tree op1, op2;
2463 /* Determine whether we should use results of
2464 already handled statements or not. */
2465 if (ready_stmts_end == 0
2466 && (i - stmt_index >= width || op_index < 1))
2467 ready_stmts_end = i;
2469 /* Now we choose operands for the next statement. Non zero
2470 value in ready_stmts_end means here that we should use
2471 the result of already generated statements as new operand. */
2472 if (ready_stmts_end > 0)
2474 op1 = gimple_assign_lhs (stmts[stmt_index++]);
2475 if (ready_stmts_end > stmt_index)
2476 op2 = gimple_assign_lhs (stmts[stmt_index++]);
2477 else if (op_index >= 0)
2478 op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2479 else
2481 gcc_assert (stmt_index < i);
2482 op2 = gimple_assign_lhs (stmts[stmt_index++]);
2485 if (stmt_index >= ready_stmts_end)
2486 ready_stmts_end = 0;
2488 else
2490 if (op_index > 1)
2491 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
2492 op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2493 op1 = VEC_index (operand_entry_t, ops, op_index--)->op;
2496 /* If we emit the last statement then we should put
2497 operands into the last statement. It will also
2498 break the loop. */
2499 if (op_index < 0 && stmt_index == i)
2500 i = stmt_num - 1;
2502 if (dump_file && (dump_flags & TDF_DETAILS))
2504 fprintf (dump_file, "Transforming ");
2505 print_gimple_stmt (dump_file, stmts[i], 0, 0);
2508 /* We keep original statement only for the last one. All
2509 others are recreated. */
2510 if (i == stmt_num - 1)
2512 gimple_assign_set_rhs1 (stmts[i], op1);
2513 gimple_assign_set_rhs2 (stmts[i], op2);
2514 update_stmt (stmts[i]);
2516 else
2517 stmts[i] = build_and_add_sum (lhs_var, op1, op2, opcode);
2519 if (dump_file && (dump_flags & TDF_DETAILS))
2521 fprintf (dump_file, " into ");
2522 print_gimple_stmt (dump_file, stmts[i], 0, 0);
2526 remove_visited_stmt_chain (last_rhs1);
2529 /* Transform STMT, which is really (A +B) + (C + D) into the left
2530 linear form, ((A+B)+C)+D.
2531 Recurse on D if necessary. */
2533 static void
2534 linearize_expr (gimple stmt)
2536 gimple_stmt_iterator gsinow, gsirhs;
2537 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2538 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2539 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2540 gimple newbinrhs = NULL;
2541 struct loop *loop = loop_containing_stmt (stmt);
2543 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
2544 && is_reassociable_op (binrhs, rhscode, loop));
2546 gsinow = gsi_for_stmt (stmt);
2547 gsirhs = gsi_for_stmt (binrhs);
2548 gsi_move_before (&gsirhs, &gsinow);
2550 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
2551 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
2552 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
2554 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
2555 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2557 if (dump_file && (dump_flags & TDF_DETAILS))
2559 fprintf (dump_file, "Linearized: ");
2560 print_gimple_stmt (dump_file, stmt, 0, 0);
2563 reassociate_stats.linearized++;
2564 update_stmt (binrhs);
2565 update_stmt (binlhs);
2566 update_stmt (stmt);
2568 gimple_set_visited (stmt, true);
2569 gimple_set_visited (binlhs, true);
2570 gimple_set_visited (binrhs, true);
2572 /* Tail recurse on the new rhs if it still needs reassociation. */
2573 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
2574 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
2575 want to change the algorithm while converting to tuples. */
2576 linearize_expr (stmt);
2579 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
2580 it. Otherwise, return NULL. */
2582 static gimple
2583 get_single_immediate_use (tree lhs)
2585 use_operand_p immuse;
2586 gimple immusestmt;
2588 if (TREE_CODE (lhs) == SSA_NAME
2589 && single_imm_use (lhs, &immuse, &immusestmt)
2590 && is_gimple_assign (immusestmt))
2591 return immusestmt;
2593 return NULL;
2596 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
2597 representing the negated value. Insertions of any necessary
2598 instructions go before GSI.
2599 This function is recursive in that, if you hand it "a_5" as the
2600 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
2601 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
2603 static tree
2604 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
2606 gimple negatedefstmt= NULL;
2607 tree resultofnegate;
2609 /* If we are trying to negate a name, defined by an add, negate the
2610 add operands instead. */
2611 if (TREE_CODE (tonegate) == SSA_NAME)
2612 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
2613 if (TREE_CODE (tonegate) == SSA_NAME
2614 && is_gimple_assign (negatedefstmt)
2615 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
2616 && has_single_use (gimple_assign_lhs (negatedefstmt))
2617 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
2619 gimple_stmt_iterator gsi;
2620 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
2621 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
2623 gsi = gsi_for_stmt (negatedefstmt);
2624 rhs1 = negate_value (rhs1, &gsi);
2625 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
2627 gsi = gsi_for_stmt (negatedefstmt);
2628 rhs2 = negate_value (rhs2, &gsi);
2629 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
2631 update_stmt (negatedefstmt);
2632 return gimple_assign_lhs (negatedefstmt);
2635 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
2636 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
2637 NULL_TREE, true, GSI_SAME_STMT);
2638 return resultofnegate;
2641 /* Return true if we should break up the subtract in STMT into an add
2642 with negate. This is true when we the subtract operands are really
2643 adds, or the subtract itself is used in an add expression. In
2644 either case, breaking up the subtract into an add with negate
2645 exposes the adds to reassociation. */
2647 static bool
2648 should_break_up_subtract (gimple stmt)
2650 tree lhs = gimple_assign_lhs (stmt);
2651 tree binlhs = gimple_assign_rhs1 (stmt);
2652 tree binrhs = gimple_assign_rhs2 (stmt);
2653 gimple immusestmt;
2654 struct loop *loop = loop_containing_stmt (stmt);
2656 if (TREE_CODE (binlhs) == SSA_NAME
2657 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
2658 return true;
2660 if (TREE_CODE (binrhs) == SSA_NAME
2661 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
2662 return true;
2664 if (TREE_CODE (lhs) == SSA_NAME
2665 && (immusestmt = get_single_immediate_use (lhs))
2666 && is_gimple_assign (immusestmt)
2667 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
2668 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
2669 return true;
2670 return false;
2673 /* Transform STMT from A - B into A + -B. */
2675 static void
2676 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
2678 tree rhs1 = gimple_assign_rhs1 (stmt);
2679 tree rhs2 = gimple_assign_rhs2 (stmt);
2681 if (dump_file && (dump_flags & TDF_DETAILS))
2683 fprintf (dump_file, "Breaking up subtract ");
2684 print_gimple_stmt (dump_file, stmt, 0, 0);
2687 rhs2 = negate_value (rhs2, gsip);
2688 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
2689 update_stmt (stmt);
2692 /* Determine whether STMT is a builtin call that raises an SSA name
2693 to an integer power and has only one use. If so, and this is early
2694 reassociation and unsafe math optimizations are permitted, place
2695 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
2696 If any of these conditions does not hold, return FALSE. */
2698 static bool
2699 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
2701 tree fndecl, arg1;
2702 REAL_VALUE_TYPE c, cint;
2704 if (!first_pass_instance
2705 || !flag_unsafe_math_optimizations
2706 || !is_gimple_call (stmt)
2707 || !has_single_use (gimple_call_lhs (stmt)))
2708 return false;
2710 fndecl = gimple_call_fndecl (stmt);
2712 if (!fndecl
2713 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
2714 return false;
2716 switch (DECL_FUNCTION_CODE (fndecl))
2718 CASE_FLT_FN (BUILT_IN_POW):
2719 *base = gimple_call_arg (stmt, 0);
2720 arg1 = gimple_call_arg (stmt, 1);
2722 if (TREE_CODE (arg1) != REAL_CST)
2723 return false;
2725 c = TREE_REAL_CST (arg1);
2727 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
2728 return false;
2730 *exponent = real_to_integer (&c);
2731 real_from_integer (&cint, VOIDmode, *exponent,
2732 *exponent < 0 ? -1 : 0, 0);
2733 if (!real_identical (&c, &cint))
2734 return false;
2736 break;
2738 CASE_FLT_FN (BUILT_IN_POWI):
2739 *base = gimple_call_arg (stmt, 0);
2740 arg1 = gimple_call_arg (stmt, 1);
2742 if (!host_integerp (arg1, 0))
2743 return false;
2745 *exponent = TREE_INT_CST_LOW (arg1);
2746 break;
2748 default:
2749 return false;
2752 /* Expanding negative exponents is generally unproductive, so we don't
2753 complicate matters with those. Exponents of zero and one should
2754 have been handled by expression folding. */
2755 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
2756 return false;
2758 return true;
2761 /* Recursively linearize a binary expression that is the RHS of STMT.
2762 Place the operands of the expression tree in the vector named OPS. */
2764 static void
2765 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
2766 bool is_associative, bool set_visited)
2768 tree binlhs = gimple_assign_rhs1 (stmt);
2769 tree binrhs = gimple_assign_rhs2 (stmt);
2770 gimple binlhsdef = NULL, binrhsdef = NULL;
2771 bool binlhsisreassoc = false;
2772 bool binrhsisreassoc = false;
2773 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2774 struct loop *loop = loop_containing_stmt (stmt);
2775 tree base = NULL_TREE;
2776 HOST_WIDE_INT exponent = 0;
2778 if (set_visited)
2779 gimple_set_visited (stmt, true);
2781 if (TREE_CODE (binlhs) == SSA_NAME)
2783 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
2784 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
2785 && !stmt_could_throw_p (binlhsdef));
2788 if (TREE_CODE (binrhs) == SSA_NAME)
2790 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
2791 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
2792 && !stmt_could_throw_p (binrhsdef));
2795 /* If the LHS is not reassociable, but the RHS is, we need to swap
2796 them. If neither is reassociable, there is nothing we can do, so
2797 just put them in the ops vector. If the LHS is reassociable,
2798 linearize it. If both are reassociable, then linearize the RHS
2799 and the LHS. */
2801 if (!binlhsisreassoc)
2803 tree temp;
2805 /* If this is not a associative operation like division, give up. */
2806 if (!is_associative)
2808 add_to_ops_vec (ops, binrhs);
2809 return;
2812 if (!binrhsisreassoc)
2814 if (rhscode == MULT_EXPR
2815 && TREE_CODE (binrhs) == SSA_NAME
2816 && acceptable_pow_call (binrhsdef, &base, &exponent))
2818 add_repeat_to_ops_vec (ops, base, exponent);
2819 gimple_set_visited (binrhsdef, true);
2821 else
2822 add_to_ops_vec (ops, binrhs);
2824 if (rhscode == MULT_EXPR
2825 && TREE_CODE (binlhs) == SSA_NAME
2826 && acceptable_pow_call (binlhsdef, &base, &exponent))
2828 add_repeat_to_ops_vec (ops, base, exponent);
2829 gimple_set_visited (binlhsdef, true);
2831 else
2832 add_to_ops_vec (ops, binlhs);
2834 return;
2837 if (dump_file && (dump_flags & TDF_DETAILS))
2839 fprintf (dump_file, "swapping operands of ");
2840 print_gimple_stmt (dump_file, stmt, 0, 0);
2843 swap_tree_operands (stmt,
2844 gimple_assign_rhs1_ptr (stmt),
2845 gimple_assign_rhs2_ptr (stmt));
2846 update_stmt (stmt);
2848 if (dump_file && (dump_flags & TDF_DETAILS))
2850 fprintf (dump_file, " is now ");
2851 print_gimple_stmt (dump_file, stmt, 0, 0);
2854 /* We want to make it so the lhs is always the reassociative op,
2855 so swap. */
2856 temp = binlhs;
2857 binlhs = binrhs;
2858 binrhs = temp;
2860 else if (binrhsisreassoc)
2862 linearize_expr (stmt);
2863 binlhs = gimple_assign_rhs1 (stmt);
2864 binrhs = gimple_assign_rhs2 (stmt);
2867 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
2868 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
2869 rhscode, loop));
2870 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
2871 is_associative, set_visited);
2873 if (rhscode == MULT_EXPR
2874 && TREE_CODE (binrhs) == SSA_NAME
2875 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
2877 add_repeat_to_ops_vec (ops, base, exponent);
2878 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
2880 else
2881 add_to_ops_vec (ops, binrhs);
2884 /* Repropagate the negates back into subtracts, since no other pass
2885 currently does it. */
2887 static void
2888 repropagate_negates (void)
2890 unsigned int i = 0;
2891 tree negate;
2893 FOR_EACH_VEC_ELT (tree, plus_negates, i, negate)
2895 gimple user = get_single_immediate_use (negate);
2897 if (!user || !is_gimple_assign (user))
2898 continue;
2900 /* The negate operand can be either operand of a PLUS_EXPR
2901 (it can be the LHS if the RHS is a constant for example).
2903 Force the negate operand to the RHS of the PLUS_EXPR, then
2904 transform the PLUS_EXPR into a MINUS_EXPR. */
2905 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
2907 /* If the negated operand appears on the LHS of the
2908 PLUS_EXPR, exchange the operands of the PLUS_EXPR
2909 to force the negated operand to the RHS of the PLUS_EXPR. */
2910 if (gimple_assign_rhs1 (user) == negate)
2912 swap_tree_operands (user,
2913 gimple_assign_rhs1_ptr (user),
2914 gimple_assign_rhs2_ptr (user));
2917 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
2918 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
2919 if (gimple_assign_rhs2 (user) == negate)
2921 tree rhs1 = gimple_assign_rhs1 (user);
2922 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
2923 gimple_stmt_iterator gsi = gsi_for_stmt (user);
2924 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
2925 update_stmt (user);
2928 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
2930 if (gimple_assign_rhs1 (user) == negate)
2932 /* We have
2933 x = -a
2934 y = x - b
2935 which we transform into
2936 x = a + b
2937 y = -x .
2938 This pushes down the negate which we possibly can merge
2939 into some other operation, hence insert it into the
2940 plus_negates vector. */
2941 gimple feed = SSA_NAME_DEF_STMT (negate);
2942 tree a = gimple_assign_rhs1 (feed);
2943 tree rhs2 = gimple_assign_rhs2 (user);
2944 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
2945 gimple_replace_lhs (feed, negate);
2946 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
2947 update_stmt (gsi_stmt (gsi));
2948 gsi2 = gsi_for_stmt (user);
2949 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
2950 update_stmt (gsi_stmt (gsi2));
2951 gsi_move_before (&gsi, &gsi2);
2952 VEC_safe_push (tree, heap, plus_negates,
2953 gimple_assign_lhs (gsi_stmt (gsi2)));
2955 else
2957 /* Transform "x = -a; y = b - x" into "y = b + a", getting
2958 rid of one operation. */
2959 gimple feed = SSA_NAME_DEF_STMT (negate);
2960 tree a = gimple_assign_rhs1 (feed);
2961 tree rhs1 = gimple_assign_rhs1 (user);
2962 gimple_stmt_iterator gsi = gsi_for_stmt (user);
2963 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
2964 update_stmt (gsi_stmt (gsi));
2970 /* Returns true if OP is of a type for which we can do reassociation.
2971 That is for integral or non-saturating fixed-point types, and for
2972 floating point type when associative-math is enabled. */
2974 static bool
2975 can_reassociate_p (tree op)
2977 tree type = TREE_TYPE (op);
2978 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
2979 || NON_SAT_FIXED_POINT_TYPE_P (type)
2980 || (flag_associative_math && FLOAT_TYPE_P (type)))
2981 return true;
2982 return false;
2985 /* Break up subtract operations in block BB.
2987 We do this top down because we don't know whether the subtract is
2988 part of a possible chain of reassociation except at the top.
2990 IE given
2991 d = f + g
2992 c = a + e
2993 b = c - d
2994 q = b - r
2995 k = t - q
2997 we want to break up k = t - q, but we won't until we've transformed q
2998 = b - r, which won't be broken up until we transform b = c - d.
3000 En passant, clear the GIMPLE visited flag on every statement. */
3002 static void
3003 break_up_subtract_bb (basic_block bb)
3005 gimple_stmt_iterator gsi;
3006 basic_block son;
3008 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3010 gimple stmt = gsi_stmt (gsi);
3011 gimple_set_visited (stmt, false);
3013 if (!is_gimple_assign (stmt)
3014 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3015 continue;
3017 /* Look for simple gimple subtract operations. */
3018 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3020 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3021 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3022 continue;
3024 /* Check for a subtract used only in an addition. If this
3025 is the case, transform it into add of a negate for better
3026 reassociation. IE transform C = A-B into C = A + -B if C
3027 is only used in an addition. */
3028 if (should_break_up_subtract (stmt))
3029 break_up_subtract (stmt, &gsi);
3031 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3032 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3033 VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt));
3035 for (son = first_dom_son (CDI_DOMINATORS, bb);
3036 son;
3037 son = next_dom_son (CDI_DOMINATORS, son))
3038 break_up_subtract_bb (son);
3041 /* Used for repeated factor analysis. */
3042 struct repeat_factor_d
3044 /* An SSA name that occurs in a multiply chain. */
3045 tree factor;
3047 /* Cached rank of the factor. */
3048 unsigned rank;
3050 /* Number of occurrences of the factor in the chain. */
3051 HOST_WIDE_INT count;
3053 /* An SSA name representing the product of this factor and
3054 all factors appearing later in the repeated factor vector. */
3055 tree repr;
3058 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3059 typedef const struct repeat_factor_d *const_repeat_factor_t;
3061 DEF_VEC_O (repeat_factor);
3062 DEF_VEC_ALLOC_O (repeat_factor, heap);
3064 static VEC (repeat_factor, heap) *repeat_factor_vec;
3066 /* Used for sorting the repeat factor vector. Sort primarily by
3067 ascending occurrence count, secondarily by descending rank. */
3069 static int
3070 compare_repeat_factors (const void *x1, const void *x2)
3072 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3073 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3075 if (rf1->count != rf2->count)
3076 return rf1->count - rf2->count;
3078 return rf2->rank - rf1->rank;
3081 /* Get a new SSA name for register variable *TARGET of type TYPE.
3082 If *TARGET is null or incompatible with TYPE, create the variable
3083 first. */
3085 static tree
3086 get_reassoc_pow_ssa_name (tree *target, tree type)
3088 if (!*target || !types_compatible_p (type, TREE_TYPE (*target)))
3090 *target = create_tmp_reg (type, "reassocpow");
3091 add_referenced_var (*target);
3094 return make_ssa_name (*target, NULL);
3097 /* Look for repeated operands in OPS in the multiply tree rooted at
3098 STMT. Replace them with an optimal sequence of multiplies and powi
3099 builtin calls, and remove the used operands from OPS. Return an
3100 SSA name representing the value of the replacement sequence. */
3102 static tree
3103 attempt_builtin_powi (gimple stmt, VEC(operand_entry_t, heap) **ops,
3104 tree *target)
3106 unsigned i, j, vec_len;
3107 int ii;
3108 operand_entry_t oe;
3109 repeat_factor_t rf1, rf2;
3110 repeat_factor rfnew;
3111 tree result = NULL_TREE;
3112 tree target_ssa, iter_result;
3113 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3114 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3115 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3116 gimple mul_stmt, pow_stmt;
3118 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3119 target. */
3120 if (!powi_fndecl)
3121 return NULL_TREE;
3123 /* Allocate the repeated factor vector. */
3124 repeat_factor_vec = VEC_alloc (repeat_factor, heap, 10);
3126 /* Scan the OPS vector for all SSA names in the product and build
3127 up a vector of occurrence counts for each factor. */
3128 FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
3130 if (TREE_CODE (oe->op) == SSA_NAME)
3132 FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3134 if (rf1->factor == oe->op)
3136 rf1->count += oe->count;
3137 break;
3141 if (j >= VEC_length (repeat_factor, repeat_factor_vec))
3143 rfnew.factor = oe->op;
3144 rfnew.rank = oe->rank;
3145 rfnew.count = oe->count;
3146 rfnew.repr = NULL_TREE;
3147 VEC_safe_push (repeat_factor, heap, repeat_factor_vec, &rfnew);
3152 /* Sort the repeated factor vector by (a) increasing occurrence count,
3153 and (b) decreasing rank. */
3154 VEC_qsort (repeat_factor, repeat_factor_vec, compare_repeat_factors);
3156 /* It is generally best to combine as many base factors as possible
3157 into a product before applying __builtin_powi to the result.
3158 However, the sort order chosen for the repeated factor vector
3159 allows us to cache partial results for the product of the base
3160 factors for subsequent use. When we already have a cached partial
3161 result from a previous iteration, it is best to make use of it
3162 before looking for another __builtin_pow opportunity.
3164 As an example, consider x * x * y * y * y * z * z * z * z.
3165 We want to first compose the product x * y * z, raise it to the
3166 second power, then multiply this by y * z, and finally multiply
3167 by z. This can be done in 5 multiplies provided we cache y * z
3168 for use in both expressions:
3170 t1 = y * z
3171 t2 = t1 * x
3172 t3 = t2 * t2
3173 t4 = t1 * t3
3174 result = t4 * z
3176 If we instead ignored the cached y * z and first multiplied by
3177 the __builtin_pow opportunity z * z, we would get the inferior:
3179 t1 = y * z
3180 t2 = t1 * x
3181 t3 = t2 * t2
3182 t4 = z * z
3183 t5 = t3 * t4
3184 result = t5 * y */
3186 vec_len = VEC_length (repeat_factor, repeat_factor_vec);
3188 /* Repeatedly look for opportunities to create a builtin_powi call. */
3189 while (true)
3191 HOST_WIDE_INT power;
3193 /* First look for the largest cached product of factors from
3194 preceding iterations. If found, create a builtin_powi for
3195 it if the minimum occurrence count for its factors is at
3196 least 2, or just use this cached product as our next
3197 multiplicand if the minimum occurrence count is 1. */
3198 FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3200 if (rf1->repr && rf1->count > 0)
3201 break;
3204 if (j < vec_len)
3206 power = rf1->count;
3208 if (power == 1)
3210 iter_result = rf1->repr;
3212 if (dump_file && (dump_flags & TDF_DETAILS))
3214 unsigned elt;
3215 repeat_factor_t rf;
3216 fputs ("Multiplying by cached product ", dump_file);
3217 for (elt = j; elt < vec_len; elt++)
3219 rf = VEC_index (repeat_factor, repeat_factor_vec, elt);
3220 print_generic_expr (dump_file, rf->factor, 0);
3221 if (elt < vec_len - 1)
3222 fputs (" * ", dump_file);
3224 fputs ("\n", dump_file);
3227 else
3229 iter_result = get_reassoc_pow_ssa_name (target, type);
3230 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3231 build_int_cst (integer_type_node,
3232 power));
3233 gimple_call_set_lhs (pow_stmt, iter_result);
3234 gimple_set_location (pow_stmt, gimple_location (stmt));
3235 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3237 if (dump_file && (dump_flags & TDF_DETAILS))
3239 unsigned elt;
3240 repeat_factor_t rf;
3241 fputs ("Building __builtin_pow call for cached product (",
3242 dump_file);
3243 for (elt = j; elt < vec_len; elt++)
3245 rf = VEC_index (repeat_factor, repeat_factor_vec, elt);
3246 print_generic_expr (dump_file, rf->factor, 0);
3247 if (elt < vec_len - 1)
3248 fputs (" * ", dump_file);
3250 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3251 power);
3255 else
3257 /* Otherwise, find the first factor in the repeated factor
3258 vector whose occurrence count is at least 2. If no such
3259 factor exists, there are no builtin_powi opportunities
3260 remaining. */
3261 FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3263 if (rf1->count >= 2)
3264 break;
3267 if (j >= vec_len)
3268 break;
3270 power = rf1->count;
3272 if (dump_file && (dump_flags & TDF_DETAILS))
3274 unsigned elt;
3275 repeat_factor_t rf;
3276 fputs ("Building __builtin_pow call for (", dump_file);
3277 for (elt = j; elt < vec_len; elt++)
3279 rf = VEC_index (repeat_factor, repeat_factor_vec, elt);
3280 print_generic_expr (dump_file, rf->factor, 0);
3281 if (elt < vec_len - 1)
3282 fputs (" * ", dump_file);
3284 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3287 reassociate_stats.pows_created++;
3289 /* Visit each element of the vector in reverse order (so that
3290 high-occurrence elements are visited first, and within the
3291 same occurrence count, lower-ranked elements are visited
3292 first). Form a linear product of all elements in this order
3293 whose occurrencce count is at least that of element J.
3294 Record the SSA name representing the product of each element
3295 with all subsequent elements in the vector. */
3296 if (j == vec_len - 1)
3297 rf1->repr = rf1->factor;
3298 else
3300 for (ii = vec_len - 2; ii >= (int)j; ii--)
3302 tree op1, op2;
3304 rf1 = VEC_index (repeat_factor, repeat_factor_vec, ii);
3305 rf2 = VEC_index (repeat_factor, repeat_factor_vec, ii + 1);
3307 /* Init the last factor's representative to be itself. */
3308 if (!rf2->repr)
3309 rf2->repr = rf2->factor;
3311 op1 = rf1->factor;
3312 op2 = rf2->repr;
3314 target_ssa = get_reassoc_pow_ssa_name (target, type);
3315 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3316 target_ssa,
3317 op1, op2);
3318 gimple_set_location (mul_stmt, gimple_location (stmt));
3319 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3320 rf1->repr = target_ssa;
3322 /* Don't reprocess the multiply we just introduced. */
3323 gimple_set_visited (mul_stmt, true);
3327 /* Form a call to __builtin_powi for the maximum product
3328 just formed, raised to the power obtained earlier. */
3329 rf1 = VEC_index (repeat_factor, repeat_factor_vec, j);
3330 iter_result = get_reassoc_pow_ssa_name (target, type);
3331 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3332 build_int_cst (integer_type_node,
3333 power));
3334 gimple_call_set_lhs (pow_stmt, iter_result);
3335 gimple_set_location (pow_stmt, gimple_location (stmt));
3336 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3339 /* If we previously formed at least one other builtin_powi call,
3340 form the product of this one and those others. */
3341 if (result)
3343 tree new_result = get_reassoc_pow_ssa_name (target, type);
3344 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3345 result, iter_result);
3346 gimple_set_location (mul_stmt, gimple_location (stmt));
3347 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3348 gimple_set_visited (mul_stmt, true);
3349 result = new_result;
3351 else
3352 result = iter_result;
3354 /* Decrement the occurrence count of each element in the product
3355 by the count found above, and remove this many copies of each
3356 factor from OPS. */
3357 for (i = j; i < vec_len; i++)
3359 unsigned k = power;
3360 unsigned n;
3362 rf1 = VEC_index (repeat_factor, repeat_factor_vec, i);
3363 rf1->count -= power;
3365 FOR_EACH_VEC_ELT_REVERSE (operand_entry_t, *ops, n, oe)
3367 if (oe->op == rf1->factor)
3369 if (oe->count <= k)
3371 VEC_ordered_remove (operand_entry_t, *ops, n);
3372 k -= oe->count;
3374 if (k == 0)
3375 break;
3377 else
3379 oe->count -= k;
3380 break;
3387 /* At this point all elements in the repeated factor vector have a
3388 remaining occurrence count of 0 or 1, and those with a count of 1
3389 don't have cached representatives. Re-sort the ops vector and
3390 clean up. */
3391 VEC_qsort (operand_entry_t, *ops, sort_by_operand_rank);
3392 VEC_free (repeat_factor, heap, repeat_factor_vec);
3394 /* Return the final product computed herein. Note that there may
3395 still be some elements with single occurrence count left in OPS;
3396 those will be handled by the normal reassociation logic. */
3397 return result;
3400 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
3402 static void
3403 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3405 tree rhs1;
3407 if (dump_file && (dump_flags & TDF_DETAILS))
3409 fprintf (dump_file, "Transforming ");
3410 print_gimple_stmt (dump_file, stmt, 0, 0);
3413 rhs1 = gimple_assign_rhs1 (stmt);
3414 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3415 update_stmt (stmt);
3416 remove_visited_stmt_chain (rhs1);
3418 if (dump_file && (dump_flags & TDF_DETAILS))
3420 fprintf (dump_file, " into ");
3421 print_gimple_stmt (dump_file, stmt, 0, 0);
3425 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
3427 static void
3428 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3429 tree rhs1, tree rhs2)
3431 if (dump_file && (dump_flags & TDF_DETAILS))
3433 fprintf (dump_file, "Transforming ");
3434 print_gimple_stmt (dump_file, stmt, 0, 0);
3437 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
3438 update_stmt (gsi_stmt (*gsi));
3439 remove_visited_stmt_chain (rhs1);
3441 if (dump_file && (dump_flags & TDF_DETAILS))
3443 fprintf (dump_file, " into ");
3444 print_gimple_stmt (dump_file, stmt, 0, 0);
3448 /* Reassociate expressions in basic block BB and its post-dominator as
3449 children. */
3451 static void
3452 reassociate_bb (basic_block bb)
3454 gimple_stmt_iterator gsi;
3455 basic_block son;
3456 tree target = NULL_TREE;
3458 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
3460 gimple stmt = gsi_stmt (gsi);
3462 if (is_gimple_assign (stmt)
3463 && !stmt_could_throw_p (stmt))
3465 tree lhs, rhs1, rhs2;
3466 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
3468 /* If this is not a gimple binary expression, there is
3469 nothing for us to do with it. */
3470 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
3471 continue;
3473 /* If this was part of an already processed statement,
3474 we don't need to touch it again. */
3475 if (gimple_visited_p (stmt))
3477 /* This statement might have become dead because of previous
3478 reassociations. */
3479 if (has_zero_uses (gimple_get_lhs (stmt)))
3481 gsi_remove (&gsi, true);
3482 release_defs (stmt);
3483 /* We might end up removing the last stmt above which
3484 places the iterator to the end of the sequence.
3485 Reset it to the last stmt in this case which might
3486 be the end of the sequence as well if we removed
3487 the last statement of the sequence. In which case
3488 we need to bail out. */
3489 if (gsi_end_p (gsi))
3491 gsi = gsi_last_bb (bb);
3492 if (gsi_end_p (gsi))
3493 break;
3496 continue;
3499 lhs = gimple_assign_lhs (stmt);
3500 rhs1 = gimple_assign_rhs1 (stmt);
3501 rhs2 = gimple_assign_rhs2 (stmt);
3503 /* For non-bit or min/max operations we can't associate
3504 all types. Verify that here. */
3505 if (rhs_code != BIT_IOR_EXPR
3506 && rhs_code != BIT_AND_EXPR
3507 && rhs_code != BIT_XOR_EXPR
3508 && rhs_code != MIN_EXPR
3509 && rhs_code != MAX_EXPR
3510 && (!can_reassociate_p (lhs)
3511 || !can_reassociate_p (rhs1)
3512 || !can_reassociate_p (rhs2)))
3513 continue;
3515 if (associative_tree_code (rhs_code))
3517 VEC(operand_entry_t, heap) *ops = NULL;
3518 tree powi_result = NULL_TREE;
3520 /* There may be no immediate uses left by the time we
3521 get here because we may have eliminated them all. */
3522 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
3523 continue;
3525 gimple_set_visited (stmt, true);
3526 linearize_expr_tree (&ops, stmt, true, true);
3527 VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
3528 optimize_ops_list (rhs_code, &ops);
3529 if (undistribute_ops_list (rhs_code, &ops,
3530 loop_containing_stmt (stmt)))
3532 VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
3533 optimize_ops_list (rhs_code, &ops);
3536 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
3537 optimize_range_tests (rhs_code, &ops);
3539 if (first_pass_instance
3540 && rhs_code == MULT_EXPR
3541 && flag_unsafe_math_optimizations)
3542 powi_result = attempt_builtin_powi (stmt, &ops, &target);
3544 /* If the operand vector is now empty, all operands were
3545 consumed by the __builtin_powi optimization. */
3546 if (VEC_length (operand_entry_t, ops) == 0)
3547 transform_stmt_to_copy (&gsi, stmt, powi_result);
3548 else if (VEC_length (operand_entry_t, ops) == 1)
3550 tree last_op = VEC_last (operand_entry_t, ops)->op;
3552 if (powi_result)
3553 transform_stmt_to_multiply (&gsi, stmt, last_op,
3554 powi_result);
3555 else
3556 transform_stmt_to_copy (&gsi, stmt, last_op);
3558 else
3560 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
3561 int ops_num = VEC_length (operand_entry_t, ops);
3562 int width = get_reassociation_width (ops_num, rhs_code, mode);
3564 if (dump_file && (dump_flags & TDF_DETAILS))
3565 fprintf (dump_file,
3566 "Width = %d was chosen for reassociation\n", width);
3568 if (width > 1
3569 && VEC_length (operand_entry_t, ops) > 3)
3570 rewrite_expr_tree_parallel (stmt, width, ops);
3571 else
3572 rewrite_expr_tree (stmt, 0, ops, false);
3574 /* If we combined some repeated factors into a
3575 __builtin_powi call, multiply that result by the
3576 reassociated operands. */
3577 if (powi_result)
3579 gimple mul_stmt;
3580 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3581 tree target_ssa = get_reassoc_pow_ssa_name (&target,
3582 type);
3583 gimple_set_lhs (stmt, target_ssa);
3584 update_stmt (stmt);
3585 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
3586 powi_result,
3587 target_ssa);
3588 gimple_set_location (mul_stmt, gimple_location (stmt));
3589 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
3593 VEC_free (operand_entry_t, heap, ops);
3597 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
3598 son;
3599 son = next_dom_son (CDI_POST_DOMINATORS, son))
3600 reassociate_bb (son);
3603 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
3604 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
3606 /* Dump the operand entry vector OPS to FILE. */
3608 void
3609 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
3611 operand_entry_t oe;
3612 unsigned int i;
3614 FOR_EACH_VEC_ELT (operand_entry_t, ops, i, oe)
3616 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
3617 print_generic_expr (file, oe->op, 0);
3621 /* Dump the operand entry vector OPS to STDERR. */
3623 DEBUG_FUNCTION void
3624 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
3626 dump_ops_vector (stderr, ops);
3629 static void
3630 do_reassoc (void)
3632 break_up_subtract_bb (ENTRY_BLOCK_PTR);
3633 reassociate_bb (EXIT_BLOCK_PTR);
3636 /* Initialize the reassociation pass. */
3638 static void
3639 init_reassoc (void)
3641 int i;
3642 long rank = 2;
3643 int *bbs = XNEWVEC (int, last_basic_block + 1);
3645 /* Find the loops, so that we can prevent moving calculations in
3646 them. */
3647 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3649 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
3651 operand_entry_pool = create_alloc_pool ("operand entry pool",
3652 sizeof (struct operand_entry), 30);
3653 next_operand_entry_id = 0;
3655 /* Reverse RPO (Reverse Post Order) will give us something where
3656 deeper loops come later. */
3657 pre_and_rev_post_order_compute (NULL, bbs, false);
3658 bb_rank = XCNEWVEC (long, last_basic_block + 1);
3659 operand_rank = pointer_map_create ();
3661 /* Give each default definition a distinct rank. This includes
3662 parameters and the static chain. Walk backwards over all
3663 SSA names so that we get proper rank ordering according
3664 to tree_swap_operands_p. */
3665 for (i = num_ssa_names - 1; i > 0; --i)
3667 tree name = ssa_name (i);
3668 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
3669 insert_operand_rank (name, ++rank);
3672 /* Set up rank for each BB */
3673 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
3674 bb_rank[bbs[i]] = ++rank << 16;
3676 free (bbs);
3677 calculate_dominance_info (CDI_POST_DOMINATORS);
3678 plus_negates = NULL;
3681 /* Cleanup after the reassociation pass, and print stats if
3682 requested. */
3684 static void
3685 fini_reassoc (void)
3687 statistics_counter_event (cfun, "Linearized",
3688 reassociate_stats.linearized);
3689 statistics_counter_event (cfun, "Constants eliminated",
3690 reassociate_stats.constants_eliminated);
3691 statistics_counter_event (cfun, "Ops eliminated",
3692 reassociate_stats.ops_eliminated);
3693 statistics_counter_event (cfun, "Statements rewritten",
3694 reassociate_stats.rewritten);
3695 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
3696 reassociate_stats.pows_encountered);
3697 statistics_counter_event (cfun, "Built-in powi calls created",
3698 reassociate_stats.pows_created);
3700 pointer_map_destroy (operand_rank);
3701 free_alloc_pool (operand_entry_pool);
3702 free (bb_rank);
3703 VEC_free (tree, heap, plus_negates);
3704 free_dominance_info (CDI_POST_DOMINATORS);
3705 loop_optimizer_finalize ();
3708 /* Gate and execute functions for Reassociation. */
3710 static unsigned int
3711 execute_reassoc (void)
3713 init_reassoc ();
3715 do_reassoc ();
3716 repropagate_negates ();
3718 fini_reassoc ();
3719 return 0;
3722 static bool
3723 gate_tree_ssa_reassoc (void)
3725 return flag_tree_reassoc != 0;
3728 struct gimple_opt_pass pass_reassoc =
3731 GIMPLE_PASS,
3732 "reassoc", /* name */
3733 gate_tree_ssa_reassoc, /* gate */
3734 execute_reassoc, /* execute */
3735 NULL, /* sub */
3736 NULL, /* next */
3737 0, /* static_pass_number */
3738 TV_TREE_REASSOC, /* tv_id */
3739 PROP_cfg | PROP_ssa, /* properties_required */
3740 0, /* properties_provided */
3741 0, /* properties_destroyed */
3742 0, /* todo_flags_start */
3743 TODO_verify_ssa
3744 | TODO_verify_flow
3745 | TODO_ggc_collect /* todo_flags_finish */