* dwarf2out.c (is_unit_die): New.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobb4f442de7a80c74e8fdfec38f7c03e4817dd6cb3
1 /* Reassociation for trees.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Daniel Berlin <dan@dberlin.org>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "tree.h"
27 #include "basic-block.h"
28 #include "tree-pretty-print.h"
29 #include "gimple-pretty-print.h"
30 #include "tree-inline.h"
31 #include "tree-flow.h"
32 #include "gimple.h"
33 #include "tree-dump.h"
34 #include "timevar.h"
35 #include "tree-iterator.h"
36 #include "tree-pass.h"
37 #include "alloc-pool.h"
38 #include "vec.h"
39 #include "langhooks.h"
40 #include "pointer-set.h"
41 #include "cfgloop.h"
42 #include "flags.h"
43 #include "target.h"
44 #include "params.h"
45 #include "diagnostic-core.h"
47 /* This is a simple global reassociation pass. It is, in part, based
48 on the LLVM pass of the same name (They do some things more/less
49 than we do, in different orders, etc).
51 It consists of five steps:
53 1. Breaking up subtract operations into addition + negate, where
54 it would promote the reassociation of adds.
56 2. Left linearization of the expression trees, so that (A+B)+(C+D)
57 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
58 During linearization, we place the operands of the binary
59 expressions into a vector of operand_entry_t
61 3. Optimization of the operand lists, eliminating things like a +
62 -a, a & a, etc.
64 3a. Combine repeated factors with the same occurrence counts
65 into a __builtin_powi call that will later be optimized into
66 an optimal number of multiplies.
68 4. Rewrite the expression trees we linearized and optimized so
69 they are in proper rank order.
71 5. Repropagate negates, as nothing else will clean it up ATM.
73 A bit of theory on #4, since nobody seems to write anything down
74 about why it makes sense to do it the way they do it:
76 We could do this much nicer theoretically, but don't (for reasons
77 explained after how to do it theoretically nice :P).
79 In order to promote the most redundancy elimination, you want
80 binary expressions whose operands are the same rank (or
81 preferably, the same value) exposed to the redundancy eliminator,
82 for possible elimination.
84 So the way to do this if we really cared, is to build the new op
85 tree from the leaves to the roots, merging as you go, and putting the
86 new op on the end of the worklist, until you are left with one
87 thing on the worklist.
89 IE if you have to rewrite the following set of operands (listed with
90 rank in parentheses), with opcode PLUS_EXPR:
92 a (1), b (1), c (1), d (2), e (2)
95 We start with our merge worklist empty, and the ops list with all of
96 those on it.
98 You want to first merge all leaves of the same rank, as much as
99 possible.
101 So first build a binary op of
103 mergetmp = a + b, and put "mergetmp" on the merge worklist.
105 Because there is no three operand form of PLUS_EXPR, c is not going to
106 be exposed to redundancy elimination as a rank 1 operand.
108 So you might as well throw it on the merge worklist (you could also
109 consider it to now be a rank two operand, and merge it with d and e,
110 but in this case, you then have evicted e from a binary op. So at
111 least in this situation, you can't win.)
113 Then build a binary op of d + e
114 mergetmp2 = d + e
116 and put mergetmp2 on the merge worklist.
118 so merge worklist = {mergetmp, c, mergetmp2}
120 Continue building binary ops of these operations until you have only
121 one operation left on the worklist.
123 So we have
125 build binary op
126 mergetmp3 = mergetmp + c
128 worklist = {mergetmp2, mergetmp3}
130 mergetmp4 = mergetmp2 + mergetmp3
132 worklist = {mergetmp4}
134 because we have one operation left, we can now just set the original
135 statement equal to the result of that operation.
137 This will at least expose a + b and d + e to redundancy elimination
138 as binary operations.
140 For extra points, you can reuse the old statements to build the
141 mergetmps, since you shouldn't run out.
143 So why don't we do this?
145 Because it's expensive, and rarely will help. Most trees we are
146 reassociating have 3 or less ops. If they have 2 ops, they already
147 will be written into a nice single binary op. If you have 3 ops, a
148 single simple check suffices to tell you whether the first two are of the
149 same rank. If so, you know to order it
151 mergetmp = op1 + op2
152 newstmt = mergetmp + op3
154 instead of
155 mergetmp = op2 + op3
156 newstmt = mergetmp + op1
158 If all three are of the same rank, you can't expose them all in a
159 single binary operator anyway, so the above is *still* the best you
160 can do.
162 Thus, this is what we do. When we have three ops left, we check to see
163 what order to put them in, and call it a day. As a nod to vector sum
164 reduction, we check if any of the ops are really a phi node that is a
165 destructive update for the associating op, and keep the destructive
166 update together for vector sum reduction recognition. */
169 /* Statistics */
170 static struct
172 int linearized;
173 int constants_eliminated;
174 int ops_eliminated;
175 int rewritten;
176 int pows_encountered;
177 int pows_created;
178 } reassociate_stats;
180 /* Operator, rank pair. */
181 typedef struct operand_entry
183 unsigned int rank;
184 int id;
185 tree op;
186 unsigned int count;
187 } *operand_entry_t;
189 static alloc_pool operand_entry_pool;
191 /* This is used to assign a unique ID to each struct operand_entry
192 so that qsort results are identical on different hosts. */
193 static int next_operand_entry_id;
195 /* Starting rank number for a given basic block, so that we can rank
196 operations using unmovable instructions in that BB based on the bb
197 depth. */
198 static long *bb_rank;
200 /* Operand->rank hashtable. */
201 static struct pointer_map_t *operand_rank;
203 /* Forward decls. */
204 static long get_rank (tree);
207 /* Bias amount for loop-carried phis. We want this to be larger than
208 the depth of any reassociation tree we can see, but not larger than
209 the rank difference between two blocks. */
210 #define PHI_LOOP_BIAS (1 << 15)
212 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
213 an innermost loop, and the phi has only a single use which is inside
214 the loop, then the rank is the block rank of the loop latch plus an
215 extra bias for the loop-carried dependence. This causes expressions
216 calculated into an accumulator variable to be independent for each
217 iteration of the loop. If STMT is some other phi, the rank is the
218 block rank of its containing block. */
219 static long
220 phi_rank (gimple stmt)
222 basic_block bb = gimple_bb (stmt);
223 struct loop *father = bb->loop_father;
224 tree res;
225 unsigned i;
226 use_operand_p use;
227 gimple use_stmt;
229 /* We only care about real loops (those with a latch). */
230 if (!father->latch)
231 return bb_rank[bb->index];
233 /* Interesting phis must be in headers of innermost loops. */
234 if (bb != father->header
235 || father->inner)
236 return bb_rank[bb->index];
238 /* Ignore virtual SSA_NAMEs. */
239 res = gimple_phi_result (stmt);
240 if (!is_gimple_reg (SSA_NAME_VAR (res)))
241 return bb_rank[bb->index];
243 /* The phi definition must have a single use, and that use must be
244 within the loop. Otherwise this isn't an accumulator pattern. */
245 if (!single_imm_use (res, &use, &use_stmt)
246 || gimple_bb (use_stmt)->loop_father != father)
247 return bb_rank[bb->index];
249 /* Look for phi arguments from within the loop. If found, bias this phi. */
250 for (i = 0; i < gimple_phi_num_args (stmt); i++)
252 tree arg = gimple_phi_arg_def (stmt, i);
253 if (TREE_CODE (arg) == SSA_NAME
254 && !SSA_NAME_IS_DEFAULT_DEF (arg))
256 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
257 if (gimple_bb (def_stmt)->loop_father == father)
258 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
262 /* Must be an uninteresting phi. */
263 return bb_rank[bb->index];
266 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
267 loop-carried dependence of an innermost loop, return TRUE; else
268 return FALSE. */
269 static bool
270 loop_carried_phi (tree exp)
272 gimple phi_stmt;
273 long block_rank;
275 if (TREE_CODE (exp) != SSA_NAME
276 || SSA_NAME_IS_DEFAULT_DEF (exp))
277 return false;
279 phi_stmt = SSA_NAME_DEF_STMT (exp);
281 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
282 return false;
284 /* Non-loop-carried phis have block rank. Loop-carried phis have
285 an additional bias added in. If this phi doesn't have block rank,
286 it's biased and should not be propagated. */
287 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
289 if (phi_rank (phi_stmt) != block_rank)
290 return true;
292 return false;
295 /* Return the maximum of RANK and the rank that should be propagated
296 from expression OP. For most operands, this is just the rank of OP.
297 For loop-carried phis, the value is zero to avoid undoing the bias
298 in favor of the phi. */
299 static long
300 propagate_rank (long rank, tree op)
302 long op_rank;
304 if (loop_carried_phi (op))
305 return rank;
307 op_rank = get_rank (op);
309 return MAX (rank, op_rank);
312 /* Look up the operand rank structure for expression E. */
314 static inline long
315 find_operand_rank (tree e)
317 void **slot = pointer_map_contains (operand_rank, e);
318 return slot ? (long) (intptr_t) *slot : -1;
321 /* Insert {E,RANK} into the operand rank hashtable. */
323 static inline void
324 insert_operand_rank (tree e, long rank)
326 void **slot;
327 gcc_assert (rank > 0);
328 slot = pointer_map_insert (operand_rank, e);
329 gcc_assert (!*slot);
330 *slot = (void *) (intptr_t) rank;
333 /* Given an expression E, return the rank of the expression. */
335 static long
336 get_rank (tree e)
338 /* Constants have rank 0. */
339 if (is_gimple_min_invariant (e))
340 return 0;
342 /* SSA_NAME's have the rank of the expression they are the result
344 For globals and uninitialized values, the rank is 0.
345 For function arguments, use the pre-setup rank.
346 For PHI nodes, stores, asm statements, etc, we use the rank of
347 the BB.
348 For simple operations, the rank is the maximum rank of any of
349 its operands, or the bb_rank, whichever is less.
350 I make no claims that this is optimal, however, it gives good
351 results. */
353 /* We make an exception to the normal ranking system to break
354 dependences of accumulator variables in loops. Suppose we
355 have a simple one-block loop containing:
357 x_1 = phi(x_0, x_2)
358 b = a + x_1
359 c = b + d
360 x_2 = c + e
362 As shown, each iteration of the calculation into x is fully
363 dependent upon the iteration before it. We would prefer to
364 see this in the form:
366 x_1 = phi(x_0, x_2)
367 b = a + d
368 c = b + e
369 x_2 = c + x_1
371 If the loop is unrolled, the calculations of b and c from
372 different iterations can be interleaved.
374 To obtain this result during reassociation, we bias the rank
375 of the phi definition x_1 upward, when it is recognized as an
376 accumulator pattern. The artificial rank causes it to be
377 added last, providing the desired independence. */
379 if (TREE_CODE (e) == SSA_NAME)
381 gimple stmt;
382 long rank;
383 int i, n;
384 tree op;
386 if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
387 && SSA_NAME_IS_DEFAULT_DEF (e))
388 return find_operand_rank (e);
390 stmt = SSA_NAME_DEF_STMT (e);
391 if (gimple_bb (stmt) == NULL)
392 return 0;
394 if (gimple_code (stmt) == GIMPLE_PHI)
395 return phi_rank (stmt);
397 if (!is_gimple_assign (stmt)
398 || gimple_vdef (stmt))
399 return bb_rank[gimple_bb (stmt)->index];
401 /* If we already have a rank for this expression, use that. */
402 rank = find_operand_rank (e);
403 if (rank != -1)
404 return rank;
406 /* Otherwise, find the maximum rank for the operands. As an
407 exception, remove the bias from loop-carried phis when propagating
408 the rank so that dependent operations are not also biased. */
409 rank = 0;
410 if (gimple_assign_single_p (stmt))
412 tree rhs = gimple_assign_rhs1 (stmt);
413 n = TREE_OPERAND_LENGTH (rhs);
414 if (n == 0)
415 rank = propagate_rank (rank, rhs);
416 else
418 for (i = 0; i < n; i++)
420 op = TREE_OPERAND (rhs, i);
422 if (op != NULL_TREE)
423 rank = propagate_rank (rank, op);
427 else
429 n = gimple_num_ops (stmt);
430 for (i = 1; i < n; i++)
432 op = gimple_op (stmt, i);
433 gcc_assert (op);
434 rank = propagate_rank (rank, op);
438 if (dump_file && (dump_flags & TDF_DETAILS))
440 fprintf (dump_file, "Rank for ");
441 print_generic_expr (dump_file, e, 0);
442 fprintf (dump_file, " is %ld\n", (rank + 1));
445 /* Note the rank in the hashtable so we don't recompute it. */
446 insert_operand_rank (e, (rank + 1));
447 return (rank + 1);
450 /* Globals, etc, are rank 0 */
451 return 0;
454 DEF_VEC_P(operand_entry_t);
455 DEF_VEC_ALLOC_P(operand_entry_t, heap);
457 /* We want integer ones to end up last no matter what, since they are
458 the ones we can do the most with. */
459 #define INTEGER_CONST_TYPE 1 << 3
460 #define FLOAT_CONST_TYPE 1 << 2
461 #define OTHER_CONST_TYPE 1 << 1
463 /* Classify an invariant tree into integer, float, or other, so that
464 we can sort them to be near other constants of the same type. */
465 static inline int
466 constant_type (tree t)
468 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
469 return INTEGER_CONST_TYPE;
470 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
471 return FLOAT_CONST_TYPE;
472 else
473 return OTHER_CONST_TYPE;
476 /* qsort comparison function to sort operand entries PA and PB by rank
477 so that the sorted array is ordered by rank in decreasing order. */
478 static int
479 sort_by_operand_rank (const void *pa, const void *pb)
481 const operand_entry_t oea = *(const operand_entry_t *)pa;
482 const operand_entry_t oeb = *(const operand_entry_t *)pb;
484 /* It's nicer for optimize_expression if constants that are likely
485 to fold when added/multiplied//whatever are put next to each
486 other. Since all constants have rank 0, order them by type. */
487 if (oeb->rank == 0 && oea->rank == 0)
489 if (constant_type (oeb->op) != constant_type (oea->op))
490 return constant_type (oeb->op) - constant_type (oea->op);
491 else
492 /* To make sorting result stable, we use unique IDs to determine
493 order. */
494 return oeb->id - oea->id;
497 /* Lastly, make sure the versions that are the same go next to each
498 other. We use SSA_NAME_VERSION because it's stable. */
499 if ((oeb->rank - oea->rank == 0)
500 && TREE_CODE (oea->op) == SSA_NAME
501 && TREE_CODE (oeb->op) == SSA_NAME)
503 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
504 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
505 else
506 return oeb->id - oea->id;
509 if (oeb->rank != oea->rank)
510 return oeb->rank - oea->rank;
511 else
512 return oeb->id - oea->id;
515 /* Add an operand entry to *OPS for the tree operand OP. */
517 static void
518 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
520 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
522 oe->op = op;
523 oe->rank = get_rank (op);
524 oe->id = next_operand_entry_id++;
525 oe->count = 1;
526 VEC_safe_push (operand_entry_t, heap, *ops, oe);
529 /* Add an operand entry to *OPS for the tree operand OP with repeat
530 count REPEAT. */
532 static void
533 add_repeat_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op,
534 HOST_WIDE_INT repeat)
536 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
538 oe->op = op;
539 oe->rank = get_rank (op);
540 oe->id = next_operand_entry_id++;
541 oe->count = repeat;
542 VEC_safe_push (operand_entry_t, heap, *ops, oe);
544 reassociate_stats.pows_encountered++;
547 /* Return true if STMT is reassociable operation containing a binary
548 operation with tree code CODE, and is inside LOOP. */
550 static bool
551 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
553 basic_block bb = gimple_bb (stmt);
555 if (gimple_bb (stmt) == NULL)
556 return false;
558 if (!flow_bb_inside_loop_p (loop, bb))
559 return false;
561 if (is_gimple_assign (stmt)
562 && gimple_assign_rhs_code (stmt) == code
563 && has_single_use (gimple_assign_lhs (stmt)))
564 return true;
566 return false;
570 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
571 operand of the negate operation. Otherwise, return NULL. */
573 static tree
574 get_unary_op (tree name, enum tree_code opcode)
576 gimple stmt = SSA_NAME_DEF_STMT (name);
578 if (!is_gimple_assign (stmt))
579 return NULL_TREE;
581 if (gimple_assign_rhs_code (stmt) == opcode)
582 return gimple_assign_rhs1 (stmt);
583 return NULL_TREE;
586 /* If CURR and LAST are a pair of ops that OPCODE allows us to
587 eliminate through equivalences, do so, remove them from OPS, and
588 return true. Otherwise, return false. */
590 static bool
591 eliminate_duplicate_pair (enum tree_code opcode,
592 VEC (operand_entry_t, heap) **ops,
593 bool *all_done,
594 unsigned int i,
595 operand_entry_t curr,
596 operand_entry_t last)
599 /* If we have two of the same op, and the opcode is & |, min, or max,
600 we can eliminate one of them.
601 If we have two of the same op, and the opcode is ^, we can
602 eliminate both of them. */
604 if (last && last->op == curr->op)
606 switch (opcode)
608 case MAX_EXPR:
609 case MIN_EXPR:
610 case BIT_IOR_EXPR:
611 case BIT_AND_EXPR:
612 if (dump_file && (dump_flags & TDF_DETAILS))
614 fprintf (dump_file, "Equivalence: ");
615 print_generic_expr (dump_file, curr->op, 0);
616 fprintf (dump_file, " [&|minmax] ");
617 print_generic_expr (dump_file, last->op, 0);
618 fprintf (dump_file, " -> ");
619 print_generic_stmt (dump_file, last->op, 0);
622 VEC_ordered_remove (operand_entry_t, *ops, i);
623 reassociate_stats.ops_eliminated ++;
625 return true;
627 case BIT_XOR_EXPR:
628 if (dump_file && (dump_flags & TDF_DETAILS))
630 fprintf (dump_file, "Equivalence: ");
631 print_generic_expr (dump_file, curr->op, 0);
632 fprintf (dump_file, " ^ ");
633 print_generic_expr (dump_file, last->op, 0);
634 fprintf (dump_file, " -> nothing\n");
637 reassociate_stats.ops_eliminated += 2;
639 if (VEC_length (operand_entry_t, *ops) == 2)
641 VEC_free (operand_entry_t, heap, *ops);
642 *ops = NULL;
643 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
644 *all_done = true;
646 else
648 VEC_ordered_remove (operand_entry_t, *ops, i-1);
649 VEC_ordered_remove (operand_entry_t, *ops, i-1);
652 return true;
654 default:
655 break;
658 return false;
661 static VEC(tree, heap) *plus_negates;
663 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
664 expression, look in OPS for a corresponding positive operation to cancel
665 it out. If we find one, remove the other from OPS, replace
666 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
667 return false. */
669 static bool
670 eliminate_plus_minus_pair (enum tree_code opcode,
671 VEC (operand_entry_t, heap) **ops,
672 unsigned int currindex,
673 operand_entry_t curr)
675 tree negateop;
676 tree notop;
677 unsigned int i;
678 operand_entry_t oe;
680 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
681 return false;
683 negateop = get_unary_op (curr->op, NEGATE_EXPR);
684 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
685 if (negateop == NULL_TREE && notop == NULL_TREE)
686 return false;
688 /* Any non-negated version will have a rank that is one less than
689 the current rank. So once we hit those ranks, if we don't find
690 one, we can stop. */
692 for (i = currindex + 1;
693 VEC_iterate (operand_entry_t, *ops, i, oe)
694 && oe->rank >= curr->rank - 1 ;
695 i++)
697 if (oe->op == negateop)
700 if (dump_file && (dump_flags & TDF_DETAILS))
702 fprintf (dump_file, "Equivalence: ");
703 print_generic_expr (dump_file, negateop, 0);
704 fprintf (dump_file, " + -");
705 print_generic_expr (dump_file, oe->op, 0);
706 fprintf (dump_file, " -> 0\n");
709 VEC_ordered_remove (operand_entry_t, *ops, i);
710 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
711 VEC_ordered_remove (operand_entry_t, *ops, currindex);
712 reassociate_stats.ops_eliminated ++;
714 return true;
716 else if (oe->op == notop)
718 tree op_type = TREE_TYPE (oe->op);
720 if (dump_file && (dump_flags & TDF_DETAILS))
722 fprintf (dump_file, "Equivalence: ");
723 print_generic_expr (dump_file, notop, 0);
724 fprintf (dump_file, " + ~");
725 print_generic_expr (dump_file, oe->op, 0);
726 fprintf (dump_file, " -> -1\n");
729 VEC_ordered_remove (operand_entry_t, *ops, i);
730 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
731 VEC_ordered_remove (operand_entry_t, *ops, currindex);
732 reassociate_stats.ops_eliminated ++;
734 return true;
738 /* CURR->OP is a negate expr in a plus expr: save it for later
739 inspection in repropagate_negates(). */
740 if (negateop != NULL_TREE)
741 VEC_safe_push (tree, heap, plus_negates, curr->op);
743 return false;
746 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
747 bitwise not expression, look in OPS for a corresponding operand to
748 cancel it out. If we find one, remove the other from OPS, replace
749 OPS[CURRINDEX] with 0, and return true. Otherwise, return
750 false. */
752 static bool
753 eliminate_not_pairs (enum tree_code opcode,
754 VEC (operand_entry_t, heap) **ops,
755 unsigned int currindex,
756 operand_entry_t curr)
758 tree notop;
759 unsigned int i;
760 operand_entry_t oe;
762 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
763 || TREE_CODE (curr->op) != SSA_NAME)
764 return false;
766 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
767 if (notop == NULL_TREE)
768 return false;
770 /* Any non-not version will have a rank that is one less than
771 the current rank. So once we hit those ranks, if we don't find
772 one, we can stop. */
774 for (i = currindex + 1;
775 VEC_iterate (operand_entry_t, *ops, i, oe)
776 && oe->rank >= curr->rank - 1;
777 i++)
779 if (oe->op == notop)
781 if (dump_file && (dump_flags & TDF_DETAILS))
783 fprintf (dump_file, "Equivalence: ");
784 print_generic_expr (dump_file, notop, 0);
785 if (opcode == BIT_AND_EXPR)
786 fprintf (dump_file, " & ~");
787 else if (opcode == BIT_IOR_EXPR)
788 fprintf (dump_file, " | ~");
789 print_generic_expr (dump_file, oe->op, 0);
790 if (opcode == BIT_AND_EXPR)
791 fprintf (dump_file, " -> 0\n");
792 else if (opcode == BIT_IOR_EXPR)
793 fprintf (dump_file, " -> -1\n");
796 if (opcode == BIT_AND_EXPR)
797 oe->op = build_zero_cst (TREE_TYPE (oe->op));
798 else if (opcode == BIT_IOR_EXPR)
799 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
800 TYPE_PRECISION (TREE_TYPE (oe->op)));
802 reassociate_stats.ops_eliminated
803 += VEC_length (operand_entry_t, *ops) - 1;
804 VEC_free (operand_entry_t, heap, *ops);
805 *ops = NULL;
806 VEC_safe_push (operand_entry_t, heap, *ops, oe);
807 return true;
811 return false;
814 /* Use constant value that may be present in OPS to try to eliminate
815 operands. Note that this function is only really used when we've
816 eliminated ops for other reasons, or merged constants. Across
817 single statements, fold already does all of this, plus more. There
818 is little point in duplicating logic, so I've only included the
819 identities that I could ever construct testcases to trigger. */
821 static void
822 eliminate_using_constants (enum tree_code opcode,
823 VEC(operand_entry_t, heap) **ops)
825 operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
826 tree type = TREE_TYPE (oelast->op);
828 if (oelast->rank == 0
829 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
831 switch (opcode)
833 case BIT_AND_EXPR:
834 if (integer_zerop (oelast->op))
836 if (VEC_length (operand_entry_t, *ops) != 1)
838 if (dump_file && (dump_flags & TDF_DETAILS))
839 fprintf (dump_file, "Found & 0, removing all other ops\n");
841 reassociate_stats.ops_eliminated
842 += VEC_length (operand_entry_t, *ops) - 1;
844 VEC_free (operand_entry_t, heap, *ops);
845 *ops = NULL;
846 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
847 return;
850 else if (integer_all_onesp (oelast->op))
852 if (VEC_length (operand_entry_t, *ops) != 1)
854 if (dump_file && (dump_flags & TDF_DETAILS))
855 fprintf (dump_file, "Found & -1, removing\n");
856 VEC_pop (operand_entry_t, *ops);
857 reassociate_stats.ops_eliminated++;
860 break;
861 case BIT_IOR_EXPR:
862 if (integer_all_onesp (oelast->op))
864 if (VEC_length (operand_entry_t, *ops) != 1)
866 if (dump_file && (dump_flags & TDF_DETAILS))
867 fprintf (dump_file, "Found | -1, removing all other ops\n");
869 reassociate_stats.ops_eliminated
870 += VEC_length (operand_entry_t, *ops) - 1;
872 VEC_free (operand_entry_t, heap, *ops);
873 *ops = NULL;
874 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
875 return;
878 else if (integer_zerop (oelast->op))
880 if (VEC_length (operand_entry_t, *ops) != 1)
882 if (dump_file && (dump_flags & TDF_DETAILS))
883 fprintf (dump_file, "Found | 0, removing\n");
884 VEC_pop (operand_entry_t, *ops);
885 reassociate_stats.ops_eliminated++;
888 break;
889 case MULT_EXPR:
890 if (integer_zerop (oelast->op)
891 || (FLOAT_TYPE_P (type)
892 && !HONOR_NANS (TYPE_MODE (type))
893 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
894 && real_zerop (oelast->op)))
896 if (VEC_length (operand_entry_t, *ops) != 1)
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "Found * 0, removing all other ops\n");
901 reassociate_stats.ops_eliminated
902 += VEC_length (operand_entry_t, *ops) - 1;
903 VEC_free (operand_entry_t, heap, *ops);
904 *ops = NULL;
905 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
906 return;
909 else if (integer_onep (oelast->op)
910 || (FLOAT_TYPE_P (type)
911 && !HONOR_SNANS (TYPE_MODE (type))
912 && real_onep (oelast->op)))
914 if (VEC_length (operand_entry_t, *ops) != 1)
916 if (dump_file && (dump_flags & TDF_DETAILS))
917 fprintf (dump_file, "Found * 1, removing\n");
918 VEC_pop (operand_entry_t, *ops);
919 reassociate_stats.ops_eliminated++;
920 return;
923 break;
924 case BIT_XOR_EXPR:
925 case PLUS_EXPR:
926 case MINUS_EXPR:
927 if (integer_zerop (oelast->op)
928 || (FLOAT_TYPE_P (type)
929 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
930 && fold_real_zero_addition_p (type, oelast->op,
931 opcode == MINUS_EXPR)))
933 if (VEC_length (operand_entry_t, *ops) != 1)
935 if (dump_file && (dump_flags & TDF_DETAILS))
936 fprintf (dump_file, "Found [|^+] 0, removing\n");
937 VEC_pop (operand_entry_t, *ops);
938 reassociate_stats.ops_eliminated++;
939 return;
942 break;
943 default:
944 break;
950 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
951 bool, bool);
953 /* Structure for tracking and counting operands. */
954 typedef struct oecount_s {
955 int cnt;
956 int id;
957 enum tree_code oecode;
958 tree op;
959 } oecount;
961 DEF_VEC_O(oecount);
962 DEF_VEC_ALLOC_O(oecount,heap);
964 /* The heap for the oecount hashtable and the sorted list of operands. */
965 static VEC (oecount, heap) *cvec;
967 /* Hash function for oecount. */
969 static hashval_t
970 oecount_hash (const void *p)
972 const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
973 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
976 /* Comparison function for oecount. */
978 static int
979 oecount_eq (const void *p1, const void *p2)
981 const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
982 const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
983 return (c1->oecode == c2->oecode
984 && c1->op == c2->op);
987 /* Comparison function for qsort sorting oecount elements by count. */
989 static int
990 oecount_cmp (const void *p1, const void *p2)
992 const oecount *c1 = (const oecount *)p1;
993 const oecount *c2 = (const oecount *)p2;
994 if (c1->cnt != c2->cnt)
995 return c1->cnt - c2->cnt;
996 else
997 /* If counts are identical, use unique IDs to stabilize qsort. */
998 return c1->id - c2->id;
1001 /* Return TRUE iff STMT represents a builtin call that raises OP
1002 to some exponent. */
1004 static bool
1005 stmt_is_power_of_op (gimple stmt, tree op)
1007 tree fndecl;
1009 if (!is_gimple_call (stmt))
1010 return false;
1012 fndecl = gimple_call_fndecl (stmt);
1014 if (!fndecl
1015 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1016 return false;
1018 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1020 CASE_FLT_FN (BUILT_IN_POW):
1021 CASE_FLT_FN (BUILT_IN_POWI):
1022 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1024 default:
1025 return false;
1029 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1030 in place and return the result. Assumes that stmt_is_power_of_op
1031 was previously called for STMT and returned TRUE. */
1033 static HOST_WIDE_INT
1034 decrement_power (gimple stmt)
1036 REAL_VALUE_TYPE c, cint;
1037 HOST_WIDE_INT power;
1038 tree arg1;
1040 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1042 CASE_FLT_FN (BUILT_IN_POW):
1043 arg1 = gimple_call_arg (stmt, 1);
1044 c = TREE_REAL_CST (arg1);
1045 power = real_to_integer (&c) - 1;
1046 real_from_integer (&cint, VOIDmode, power, 0, 0);
1047 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1048 return power;
1050 CASE_FLT_FN (BUILT_IN_POWI):
1051 arg1 = gimple_call_arg (stmt, 1);
1052 power = TREE_INT_CST_LOW (arg1) - 1;
1053 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1054 return power;
1056 default:
1057 gcc_unreachable ();
1061 /* Find the single immediate use of STMT's LHS, and replace it
1062 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1063 replace *DEF with OP as well. */
1065 static void
1066 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1068 tree lhs;
1069 gimple use_stmt;
1070 use_operand_p use;
1071 gimple_stmt_iterator gsi;
1073 if (is_gimple_call (stmt))
1074 lhs = gimple_call_lhs (stmt);
1075 else
1076 lhs = gimple_assign_lhs (stmt);
1078 gcc_assert (has_single_use (lhs));
1079 single_imm_use (lhs, &use, &use_stmt);
1080 if (lhs == *def)
1081 *def = op;
1082 SET_USE (use, op);
1083 if (TREE_CODE (op) != SSA_NAME)
1084 update_stmt (use_stmt);
1085 gsi = gsi_for_stmt (stmt);
1086 gsi_remove (&gsi, true);
1087 release_defs (stmt);
1089 if (is_gimple_call (stmt))
1090 unlink_stmt_vdef (stmt);
1093 /* Walks the linear chain with result *DEF searching for an operation
1094 with operand OP and code OPCODE removing that from the chain. *DEF
1095 is updated if there is only one operand but no operation left. */
1097 static void
1098 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1100 gimple stmt = SSA_NAME_DEF_STMT (*def);
1104 tree name;
1106 if (opcode == MULT_EXPR
1107 && stmt_is_power_of_op (stmt, op))
1109 if (decrement_power (stmt) == 1)
1110 propagate_op_to_single_use (op, stmt, def);
1111 return;
1114 name = gimple_assign_rhs1 (stmt);
1116 /* If this is the operation we look for and one of the operands
1117 is ours simply propagate the other operand into the stmts
1118 single use. */
1119 if (gimple_assign_rhs_code (stmt) == opcode
1120 && (name == op
1121 || gimple_assign_rhs2 (stmt) == op))
1123 if (name == op)
1124 name = gimple_assign_rhs2 (stmt);
1125 propagate_op_to_single_use (name, stmt, def);
1126 return;
1129 /* We might have a multiply of two __builtin_pow* calls, and
1130 the operand might be hiding in the rightmost one. */
1131 if (opcode == MULT_EXPR
1132 && gimple_assign_rhs_code (stmt) == opcode
1133 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1135 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1136 if (stmt_is_power_of_op (stmt2, op))
1138 if (decrement_power (stmt2) == 1)
1139 propagate_op_to_single_use (op, stmt2, def);
1140 return;
1144 /* Continue walking the chain. */
1145 gcc_assert (name != op
1146 && TREE_CODE (name) == SSA_NAME);
1147 stmt = SSA_NAME_DEF_STMT (name);
1149 while (1);
1152 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1153 the result. Places the statement after the definition of either
1154 OP1 or OP2. Returns the new statement. */
1156 static gimple
1157 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
1159 gimple op1def = NULL, op2def = NULL;
1160 gimple_stmt_iterator gsi;
1161 tree op;
1162 gimple sum;
1164 /* Create the addition statement. */
1165 sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
1166 op = make_ssa_name (tmpvar, sum);
1167 gimple_assign_set_lhs (sum, op);
1169 /* Find an insertion place and insert. */
1170 if (TREE_CODE (op1) == SSA_NAME)
1171 op1def = SSA_NAME_DEF_STMT (op1);
1172 if (TREE_CODE (op2) == SSA_NAME)
1173 op2def = SSA_NAME_DEF_STMT (op2);
1174 if ((!op1def || gimple_nop_p (op1def))
1175 && (!op2def || gimple_nop_p (op2def)))
1177 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1178 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1180 else if ((!op1def || gimple_nop_p (op1def))
1181 || (op2def && !gimple_nop_p (op2def)
1182 && stmt_dominates_stmt_p (op1def, op2def)))
1184 if (gimple_code (op2def) == GIMPLE_PHI)
1186 gsi = gsi_after_labels (gimple_bb (op2def));
1187 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1189 else
1191 if (!stmt_ends_bb_p (op2def))
1193 gsi = gsi_for_stmt (op2def);
1194 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1196 else
1198 edge e;
1199 edge_iterator ei;
1201 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1202 if (e->flags & EDGE_FALLTHRU)
1203 gsi_insert_on_edge_immediate (e, sum);
1207 else
1209 if (gimple_code (op1def) == GIMPLE_PHI)
1211 gsi = gsi_after_labels (gimple_bb (op1def));
1212 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1214 else
1216 if (!stmt_ends_bb_p (op1def))
1218 gsi = gsi_for_stmt (op1def);
1219 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1221 else
1223 edge e;
1224 edge_iterator ei;
1226 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1227 if (e->flags & EDGE_FALLTHRU)
1228 gsi_insert_on_edge_immediate (e, sum);
1232 update_stmt (sum);
1234 return sum;
1237 /* Perform un-distribution of divisions and multiplications.
1238 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1239 to (A + B) / X for real X.
1241 The algorithm is organized as follows.
1243 - First we walk the addition chain *OPS looking for summands that
1244 are defined by a multiplication or a real division. This results
1245 in the candidates bitmap with relevant indices into *OPS.
1247 - Second we build the chains of multiplications or divisions for
1248 these candidates, counting the number of occurrences of (operand, code)
1249 pairs in all of the candidates chains.
1251 - Third we sort the (operand, code) pairs by number of occurrence and
1252 process them starting with the pair with the most uses.
1254 * For each such pair we walk the candidates again to build a
1255 second candidate bitmap noting all multiplication/division chains
1256 that have at least one occurrence of (operand, code).
1258 * We build an alternate addition chain only covering these
1259 candidates with one (operand, code) operation removed from their
1260 multiplication/division chain.
1262 * The first candidate gets replaced by the alternate addition chain
1263 multiplied/divided by the operand.
1265 * All candidate chains get disabled for further processing and
1266 processing of (operand, code) pairs continues.
1268 The alternate addition chains built are re-processed by the main
1269 reassociation algorithm which allows optimizing a * x * y + b * y * x
1270 to (a + b ) * x * y in one invocation of the reassociation pass. */
1272 static bool
1273 undistribute_ops_list (enum tree_code opcode,
1274 VEC (operand_entry_t, heap) **ops, struct loop *loop)
1276 unsigned int length = VEC_length (operand_entry_t, *ops);
1277 operand_entry_t oe1;
1278 unsigned i, j;
1279 sbitmap candidates, candidates2;
1280 unsigned nr_candidates, nr_candidates2;
1281 sbitmap_iterator sbi0;
1282 VEC (operand_entry_t, heap) **subops;
1283 htab_t ctable;
1284 bool changed = false;
1285 int next_oecount_id = 0;
1287 if (length <= 1
1288 || opcode != PLUS_EXPR)
1289 return false;
1291 /* Build a list of candidates to process. */
1292 candidates = sbitmap_alloc (length);
1293 sbitmap_zero (candidates);
1294 nr_candidates = 0;
1295 FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe1)
1297 enum tree_code dcode;
1298 gimple oe1def;
1300 if (TREE_CODE (oe1->op) != SSA_NAME)
1301 continue;
1302 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1303 if (!is_gimple_assign (oe1def))
1304 continue;
1305 dcode = gimple_assign_rhs_code (oe1def);
1306 if ((dcode != MULT_EXPR
1307 && dcode != RDIV_EXPR)
1308 || !is_reassociable_op (oe1def, dcode, loop))
1309 continue;
1311 SET_BIT (candidates, i);
1312 nr_candidates++;
1315 if (nr_candidates < 2)
1317 sbitmap_free (candidates);
1318 return false;
1321 if (dump_file && (dump_flags & TDF_DETAILS))
1323 fprintf (dump_file, "searching for un-distribute opportunities ");
1324 print_generic_expr (dump_file,
1325 VEC_index (operand_entry_t, *ops,
1326 sbitmap_first_set_bit (candidates))->op, 0);
1327 fprintf (dump_file, " %d\n", nr_candidates);
1330 /* Build linearized sub-operand lists and the counting table. */
1331 cvec = NULL;
1332 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1333 subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
1334 VEC_length (operand_entry_t, *ops));
1335 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1337 gimple oedef;
1338 enum tree_code oecode;
1339 unsigned j;
1341 oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
1342 oecode = gimple_assign_rhs_code (oedef);
1343 linearize_expr_tree (&subops[i], oedef,
1344 associative_tree_code (oecode), false);
1346 FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1348 oecount c;
1349 void **slot;
1350 size_t idx;
1351 c.oecode = oecode;
1352 c.cnt = 1;
1353 c.id = next_oecount_id++;
1354 c.op = oe1->op;
1355 VEC_safe_push (oecount, heap, cvec, &c);
1356 idx = VEC_length (oecount, cvec) + 41;
1357 slot = htab_find_slot (ctable, (void *)idx, INSERT);
1358 if (!*slot)
1360 *slot = (void *)idx;
1362 else
1364 VEC_pop (oecount, cvec);
1365 VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
1369 htab_delete (ctable);
1371 /* Sort the counting table. */
1372 VEC_qsort (oecount, cvec, oecount_cmp);
1374 if (dump_file && (dump_flags & TDF_DETAILS))
1376 oecount *c;
1377 fprintf (dump_file, "Candidates:\n");
1378 FOR_EACH_VEC_ELT (oecount, cvec, j, c)
1380 fprintf (dump_file, " %u %s: ", c->cnt,
1381 c->oecode == MULT_EXPR
1382 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1383 print_generic_expr (dump_file, c->op, 0);
1384 fprintf (dump_file, "\n");
1388 /* Process the (operand, code) pairs in order of most occurence. */
1389 candidates2 = sbitmap_alloc (length);
1390 while (!VEC_empty (oecount, cvec))
1392 oecount *c = VEC_last (oecount, cvec);
1393 if (c->cnt < 2)
1394 break;
1396 /* Now collect the operands in the outer chain that contain
1397 the common operand in their inner chain. */
1398 sbitmap_zero (candidates2);
1399 nr_candidates2 = 0;
1400 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
1402 gimple oedef;
1403 enum tree_code oecode;
1404 unsigned j;
1405 tree op = VEC_index (operand_entry_t, *ops, i)->op;
1407 /* If we undistributed in this chain already this may be
1408 a constant. */
1409 if (TREE_CODE (op) != SSA_NAME)
1410 continue;
1412 oedef = SSA_NAME_DEF_STMT (op);
1413 oecode = gimple_assign_rhs_code (oedef);
1414 if (oecode != c->oecode)
1415 continue;
1417 FOR_EACH_VEC_ELT (operand_entry_t, subops[i], j, oe1)
1419 if (oe1->op == c->op)
1421 SET_BIT (candidates2, i);
1422 ++nr_candidates2;
1423 break;
1428 if (nr_candidates2 >= 2)
1430 operand_entry_t oe1, oe2;
1431 tree tmpvar;
1432 gimple prod;
1433 int first = sbitmap_first_set_bit (candidates2);
1435 /* Build the new addition chain. */
1436 oe1 = VEC_index (operand_entry_t, *ops, first);
1437 if (dump_file && (dump_flags & TDF_DETAILS))
1439 fprintf (dump_file, "Building (");
1440 print_generic_expr (dump_file, oe1->op, 0);
1442 tmpvar = create_tmp_reg (TREE_TYPE (oe1->op), NULL);
1443 add_referenced_var (tmpvar);
1444 zero_one_operation (&oe1->op, c->oecode, c->op);
1445 EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
1447 gimple sum;
1448 oe2 = VEC_index (operand_entry_t, *ops, i);
1449 if (dump_file && (dump_flags & TDF_DETAILS))
1451 fprintf (dump_file, " + ");
1452 print_generic_expr (dump_file, oe2->op, 0);
1454 zero_one_operation (&oe2->op, c->oecode, c->op);
1455 sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
1456 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1457 oe2->rank = 0;
1458 oe1->op = gimple_get_lhs (sum);
1461 /* Apply the multiplication/division. */
1462 prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
1463 if (dump_file && (dump_flags & TDF_DETAILS))
1465 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1466 print_generic_expr (dump_file, c->op, 0);
1467 fprintf (dump_file, "\n");
1470 /* Record it in the addition chain and disable further
1471 undistribution with this op. */
1472 oe1->op = gimple_assign_lhs (prod);
1473 oe1->rank = get_rank (oe1->op);
1474 VEC_free (operand_entry_t, heap, subops[first]);
1476 changed = true;
1479 VEC_pop (oecount, cvec);
1482 for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
1483 VEC_free (operand_entry_t, heap, subops[i]);
1484 free (subops);
1485 VEC_free (oecount, heap, cvec);
1486 sbitmap_free (candidates);
1487 sbitmap_free (candidates2);
1489 return changed;
1492 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1493 expression, examine the other OPS to see if any of them are comparisons
1494 of the same values, which we may be able to combine or eliminate.
1495 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1497 static bool
1498 eliminate_redundant_comparison (enum tree_code opcode,
1499 VEC (operand_entry_t, heap) **ops,
1500 unsigned int currindex,
1501 operand_entry_t curr)
1503 tree op1, op2;
1504 enum tree_code lcode, rcode;
1505 gimple def1, def2;
1506 int i;
1507 operand_entry_t oe;
1509 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1510 return false;
1512 /* Check that CURR is a comparison. */
1513 if (TREE_CODE (curr->op) != SSA_NAME)
1514 return false;
1515 def1 = SSA_NAME_DEF_STMT (curr->op);
1516 if (!is_gimple_assign (def1))
1517 return false;
1518 lcode = gimple_assign_rhs_code (def1);
1519 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1520 return false;
1521 op1 = gimple_assign_rhs1 (def1);
1522 op2 = gimple_assign_rhs2 (def1);
1524 /* Now look for a similar comparison in the remaining OPS. */
1525 for (i = currindex + 1;
1526 VEC_iterate (operand_entry_t, *ops, i, oe);
1527 i++)
1529 tree t;
1531 if (TREE_CODE (oe->op) != SSA_NAME)
1532 continue;
1533 def2 = SSA_NAME_DEF_STMT (oe->op);
1534 if (!is_gimple_assign (def2))
1535 continue;
1536 rcode = gimple_assign_rhs_code (def2);
1537 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1538 continue;
1540 /* If we got here, we have a match. See if we can combine the
1541 two comparisons. */
1542 if (opcode == BIT_IOR_EXPR)
1543 t = maybe_fold_or_comparisons (lcode, op1, op2,
1544 rcode, gimple_assign_rhs1 (def2),
1545 gimple_assign_rhs2 (def2));
1546 else
1547 t = maybe_fold_and_comparisons (lcode, op1, op2,
1548 rcode, gimple_assign_rhs1 (def2),
1549 gimple_assign_rhs2 (def2));
1550 if (!t)
1551 continue;
1553 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1554 always give us a boolean_type_node value back. If the original
1555 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1556 we need to convert. */
1557 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1558 t = fold_convert (TREE_TYPE (curr->op), t);
1560 if (TREE_CODE (t) != INTEGER_CST
1561 && !operand_equal_p (t, curr->op, 0))
1563 enum tree_code subcode;
1564 tree newop1, newop2;
1565 if (!COMPARISON_CLASS_P (t))
1566 continue;
1567 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1568 STRIP_USELESS_TYPE_CONVERSION (newop1);
1569 STRIP_USELESS_TYPE_CONVERSION (newop2);
1570 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1571 continue;
1574 if (dump_file && (dump_flags & TDF_DETAILS))
1576 fprintf (dump_file, "Equivalence: ");
1577 print_generic_expr (dump_file, curr->op, 0);
1578 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1579 print_generic_expr (dump_file, oe->op, 0);
1580 fprintf (dump_file, " -> ");
1581 print_generic_expr (dump_file, t, 0);
1582 fprintf (dump_file, "\n");
1585 /* Now we can delete oe, as it has been subsumed by the new combined
1586 expression t. */
1587 VEC_ordered_remove (operand_entry_t, *ops, i);
1588 reassociate_stats.ops_eliminated ++;
1590 /* If t is the same as curr->op, we're done. Otherwise we must
1591 replace curr->op with t. Special case is if we got a constant
1592 back, in which case we add it to the end instead of in place of
1593 the current entry. */
1594 if (TREE_CODE (t) == INTEGER_CST)
1596 VEC_ordered_remove (operand_entry_t, *ops, currindex);
1597 add_to_ops_vec (ops, t);
1599 else if (!operand_equal_p (t, curr->op, 0))
1601 tree tmpvar;
1602 gimple sum;
1603 enum tree_code subcode;
1604 tree newop1;
1605 tree newop2;
1606 gcc_assert (COMPARISON_CLASS_P (t));
1607 tmpvar = create_tmp_var (TREE_TYPE (t), NULL);
1608 add_referenced_var (tmpvar);
1609 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1610 STRIP_USELESS_TYPE_CONVERSION (newop1);
1611 STRIP_USELESS_TYPE_CONVERSION (newop2);
1612 gcc_checking_assert (is_gimple_val (newop1)
1613 && is_gimple_val (newop2));
1614 sum = build_and_add_sum (tmpvar, newop1, newop2, subcode);
1615 curr->op = gimple_get_lhs (sum);
1617 return true;
1620 return false;
1623 /* Perform various identities and other optimizations on the list of
1624 operand entries, stored in OPS. The tree code for the binary
1625 operation between all the operands is OPCODE. */
1627 static void
1628 optimize_ops_list (enum tree_code opcode,
1629 VEC (operand_entry_t, heap) **ops)
1631 unsigned int length = VEC_length (operand_entry_t, *ops);
1632 unsigned int i;
1633 operand_entry_t oe;
1634 operand_entry_t oelast = NULL;
1635 bool iterate = false;
1637 if (length == 1)
1638 return;
1640 oelast = VEC_last (operand_entry_t, *ops);
1642 /* If the last two are constants, pop the constants off, merge them
1643 and try the next two. */
1644 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1646 operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
1648 if (oelm1->rank == 0
1649 && is_gimple_min_invariant (oelm1->op)
1650 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1651 TREE_TYPE (oelast->op)))
1653 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1654 oelm1->op, oelast->op);
1656 if (folded && is_gimple_min_invariant (folded))
1658 if (dump_file && (dump_flags & TDF_DETAILS))
1659 fprintf (dump_file, "Merging constants\n");
1661 VEC_pop (operand_entry_t, *ops);
1662 VEC_pop (operand_entry_t, *ops);
1664 add_to_ops_vec (ops, folded);
1665 reassociate_stats.constants_eliminated++;
1667 optimize_ops_list (opcode, ops);
1668 return;
1673 eliminate_using_constants (opcode, ops);
1674 oelast = NULL;
1676 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
1678 bool done = false;
1680 if (eliminate_not_pairs (opcode, ops, i, oe))
1681 return;
1682 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1683 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1684 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1686 if (done)
1687 return;
1688 iterate = true;
1689 oelast = NULL;
1690 continue;
1692 oelast = oe;
1693 i++;
1696 length = VEC_length (operand_entry_t, *ops);
1697 oelast = VEC_last (operand_entry_t, *ops);
1699 if (iterate)
1700 optimize_ops_list (opcode, ops);
1703 /* The following functions are subroutines to optimize_range_tests and allow
1704 it to try to change a logical combination of comparisons into a range
1705 test.
1707 For example, both
1708 X == 2 || X == 5 || X == 3 || X == 4
1710 X >= 2 && X <= 5
1711 are converted to
1712 (unsigned) (X - 2) <= 3
1714 For more information see comments above fold_test_range in fold-const.c,
1715 this implementation is for GIMPLE. */
1717 struct range_entry
1719 tree exp;
1720 tree low;
1721 tree high;
1722 bool in_p;
1723 bool strict_overflow_p;
1724 unsigned int idx, next;
1727 /* This is similar to make_range in fold-const.c, but on top of
1728 GIMPLE instead of trees. */
1730 static void
1731 init_range_entry (struct range_entry *r, tree exp)
1733 int in_p;
1734 tree low, high;
1735 bool is_bool, strict_overflow_p;
1737 r->exp = NULL_TREE;
1738 r->in_p = false;
1739 r->strict_overflow_p = false;
1740 r->low = NULL_TREE;
1741 r->high = NULL_TREE;
1742 if (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))
1743 return;
1745 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1746 and see if we can refine the range. Some of the cases below may not
1747 happen, but it doesn't seem worth worrying about this. We "continue"
1748 the outer loop when we've changed something; otherwise we "break"
1749 the switch, which will "break" the while. */
1750 low = build_int_cst (TREE_TYPE (exp), 0);
1751 high = low;
1752 in_p = 0;
1753 strict_overflow_p = false;
1754 is_bool = false;
1755 if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1757 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1758 is_bool = true;
1759 else
1760 return;
1762 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1763 is_bool = true;
1765 while (1)
1767 gimple stmt;
1768 enum tree_code code;
1769 tree arg0, arg1, exp_type;
1770 tree nexp;
1771 location_t loc;
1773 if (TREE_CODE (exp) != SSA_NAME)
1774 break;
1776 stmt = SSA_NAME_DEF_STMT (exp);
1777 if (!is_gimple_assign (stmt))
1778 break;
1780 code = gimple_assign_rhs_code (stmt);
1781 arg0 = gimple_assign_rhs1 (stmt);
1782 if (TREE_CODE (arg0) != SSA_NAME)
1783 break;
1784 arg1 = gimple_assign_rhs2 (stmt);
1785 exp_type = TREE_TYPE (exp);
1786 loc = gimple_location (stmt);
1787 switch (code)
1789 case BIT_NOT_EXPR:
1790 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1792 in_p = !in_p;
1793 exp = arg0;
1794 continue;
1796 break;
1797 case SSA_NAME:
1798 exp = arg0;
1799 continue;
1800 CASE_CONVERT:
1801 if (is_bool)
1802 goto do_default;
1803 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1805 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1806 is_bool = true;
1807 else
1808 return;
1810 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1811 is_bool = true;
1812 goto do_default;
1813 case EQ_EXPR:
1814 case NE_EXPR:
1815 case LT_EXPR:
1816 case LE_EXPR:
1817 case GE_EXPR:
1818 case GT_EXPR:
1819 is_bool = true;
1820 /* FALLTHRU */
1821 default:
1822 if (!is_bool)
1823 return;
1824 do_default:
1825 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1826 &low, &high, &in_p,
1827 &strict_overflow_p);
1828 if (nexp != NULL_TREE)
1830 exp = nexp;
1831 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1832 continue;
1834 break;
1836 break;
1838 if (is_bool)
1840 r->exp = exp;
1841 r->in_p = in_p;
1842 r->low = low;
1843 r->high = high;
1844 r->strict_overflow_p = strict_overflow_p;
1848 /* Comparison function for qsort. Sort entries
1849 without SSA_NAME exp first, then with SSA_NAMEs sorted
1850 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1851 by increasing ->low and if ->low is the same, by increasing
1852 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1853 maximum. */
1855 static int
1856 range_entry_cmp (const void *a, const void *b)
1858 const struct range_entry *p = (const struct range_entry *) a;
1859 const struct range_entry *q = (const struct range_entry *) b;
1861 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1863 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1865 /* Group range_entries for the same SSA_NAME together. */
1866 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1867 return -1;
1868 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1869 return 1;
1870 /* If ->low is different, NULL low goes first, then by
1871 ascending low. */
1872 if (p->low != NULL_TREE)
1874 if (q->low != NULL_TREE)
1876 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1877 p->low, q->low);
1878 if (tem && integer_onep (tem))
1879 return -1;
1880 tem = fold_binary (GT_EXPR, boolean_type_node,
1881 p->low, q->low);
1882 if (tem && integer_onep (tem))
1883 return 1;
1885 else
1886 return 1;
1888 else if (q->low != NULL_TREE)
1889 return -1;
1890 /* If ->high is different, NULL high goes last, before that by
1891 ascending high. */
1892 if (p->high != NULL_TREE)
1894 if (q->high != NULL_TREE)
1896 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1897 p->high, q->high);
1898 if (tem && integer_onep (tem))
1899 return -1;
1900 tem = fold_binary (GT_EXPR, boolean_type_node,
1901 p->high, q->high);
1902 if (tem && integer_onep (tem))
1903 return 1;
1905 else
1906 return -1;
1908 else if (p->high != NULL_TREE)
1909 return 1;
1910 /* If both ranges are the same, sort below by ascending idx. */
1912 else
1913 return 1;
1915 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1916 return -1;
1918 if (p->idx < q->idx)
1919 return -1;
1920 else
1922 gcc_checking_assert (p->idx > q->idx);
1923 return 1;
1927 /* Helper routine of optimize_range_test.
1928 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1929 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1930 OPCODE and OPS are arguments of optimize_range_tests. Return
1931 true if the range merge has been successful. */
1933 static bool
1934 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1935 unsigned int count, enum tree_code opcode,
1936 VEC (operand_entry_t, heap) **ops, tree exp, bool in_p,
1937 tree low, tree high, bool strict_overflow_p)
1939 tree op = VEC_index (operand_entry_t, *ops, range->idx)->op;
1940 location_t loc = gimple_location (SSA_NAME_DEF_STMT (op));
1941 tree tem = build_range_check (loc, TREE_TYPE (op), exp, in_p, low, high);
1942 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1943 gimple_stmt_iterator gsi;
1945 if (tem == NULL_TREE)
1946 return false;
1948 if (strict_overflow_p && issue_strict_overflow_warning (wc))
1949 warning_at (loc, OPT_Wstrict_overflow,
1950 "assuming signed overflow does not occur "
1951 "when simplifying range test");
1953 if (dump_file && (dump_flags & TDF_DETAILS))
1955 struct range_entry *r;
1956 fprintf (dump_file, "Optimizing range tests ");
1957 print_generic_expr (dump_file, range->exp, 0);
1958 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1959 print_generic_expr (dump_file, range->low, 0);
1960 fprintf (dump_file, ", ");
1961 print_generic_expr (dump_file, range->high, 0);
1962 fprintf (dump_file, "]");
1963 for (r = otherrange; r < otherrange + count; r++)
1965 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1966 print_generic_expr (dump_file, r->low, 0);
1967 fprintf (dump_file, ", ");
1968 print_generic_expr (dump_file, r->high, 0);
1969 fprintf (dump_file, "]");
1971 fprintf (dump_file, "\n into ");
1972 print_generic_expr (dump_file, tem, 0);
1973 fprintf (dump_file, "\n");
1976 if (opcode == BIT_IOR_EXPR)
1977 tem = invert_truthvalue_loc (loc, tem);
1979 tem = fold_convert_loc (loc, TREE_TYPE (op), tem);
1980 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (op));
1981 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1982 GSI_SAME_STMT);
1984 VEC_index (operand_entry_t, *ops, range->idx)->op = tem;
1985 range->exp = exp;
1986 range->low = low;
1987 range->high = high;
1988 range->in_p = in_p;
1989 range->strict_overflow_p = false;
1991 for (range = otherrange; range < otherrange + count; range++)
1993 VEC_index (operand_entry_t, *ops, range->idx)->op = error_mark_node;
1994 range->exp = NULL_TREE;
1996 return true;
1999 /* Optimize range tests, similarly how fold_range_test optimizes
2000 it on trees. The tree code for the binary
2001 operation between all the operands is OPCODE. */
2003 static void
2004 optimize_range_tests (enum tree_code opcode,
2005 VEC (operand_entry_t, heap) **ops)
2007 unsigned int length = VEC_length (operand_entry_t, *ops), i, j, first;
2008 operand_entry_t oe;
2009 struct range_entry *ranges;
2010 bool any_changes = false;
2012 if (length == 1)
2013 return;
2015 ranges = XNEWVEC (struct range_entry, length);
2016 for (i = 0; i < length; i++)
2018 ranges[i].idx = i;
2019 init_range_entry (ranges + i, VEC_index (operand_entry_t, *ops, i)->op);
2020 /* For | invert it now, we will invert it again before emitting
2021 the optimized expression. */
2022 if (opcode == BIT_IOR_EXPR)
2023 ranges[i].in_p = !ranges[i].in_p;
2026 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2027 for (i = 0; i < length; i++)
2028 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2029 break;
2031 /* Try to merge ranges. */
2032 for (first = i; i < length; i++)
2034 tree low = ranges[i].low;
2035 tree high = ranges[i].high;
2036 int in_p = ranges[i].in_p;
2037 bool strict_overflow_p = ranges[i].strict_overflow_p;
2038 int update_fail_count = 0;
2040 for (j = i + 1; j < length; j++)
2042 if (ranges[i].exp != ranges[j].exp)
2043 break;
2044 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2045 ranges[j].in_p, ranges[j].low, ranges[j].high))
2046 break;
2047 strict_overflow_p |= ranges[j].strict_overflow_p;
2050 if (j == i + 1)
2051 continue;
2053 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2054 ops, ranges[i].exp, in_p, low, high,
2055 strict_overflow_p))
2057 i = j - 1;
2058 any_changes = true;
2060 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2061 while update_range_test would fail. */
2062 else if (update_fail_count == 64)
2063 i = j - 1;
2064 else
2065 ++update_fail_count;
2068 /* Optimize X == CST1 || X == CST2
2069 if popcount (CST1 ^ CST2) == 1 into
2070 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2071 Similarly for ranges. E.g.
2072 X != 2 && X != 3 && X != 10 && X != 11
2073 will be transformed by the above loop into
2074 (X - 2U) <= 1U && (X - 10U) <= 1U
2075 and this loop can transform that into
2076 ((X & ~8) - 2U) <= 1U. */
2077 for (i = first; i < length; i++)
2079 tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2081 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2082 continue;
2083 type = TREE_TYPE (ranges[i].exp);
2084 if (!INTEGRAL_TYPE_P (type))
2085 continue;
2086 lowi = ranges[i].low;
2087 if (lowi == NULL_TREE)
2088 lowi = TYPE_MIN_VALUE (type);
2089 highi = ranges[i].high;
2090 if (highi == NULL_TREE)
2091 continue;
2092 for (j = i + 1; j < length && j < i + 64; j++)
2094 if (ranges[j].exp == NULL_TREE)
2095 continue;
2096 if (ranges[i].exp != ranges[j].exp)
2097 break;
2098 if (ranges[j].in_p)
2099 continue;
2100 lowj = ranges[j].low;
2101 if (lowj == NULL_TREE)
2102 continue;
2103 highj = ranges[j].high;
2104 if (highj == NULL_TREE)
2105 highj = TYPE_MAX_VALUE (type);
2106 tem = fold_binary (GT_EXPR, boolean_type_node,
2107 lowj, highi);
2108 if (tem == NULL_TREE || !integer_onep (tem))
2109 continue;
2110 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2111 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2112 continue;
2113 gcc_checking_assert (!integer_zerop (lowxor));
2114 tem = fold_binary (MINUS_EXPR, type, lowxor,
2115 build_int_cst (type, 1));
2116 if (tem == NULL_TREE)
2117 continue;
2118 tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2119 if (tem == NULL_TREE || !integer_zerop (tem))
2120 continue;
2121 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2122 if (!tree_int_cst_equal (lowxor, highxor))
2123 continue;
2124 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2125 exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2126 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2127 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2128 if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2129 ranges[i].in_p, lowj, highj,
2130 ranges[i].strict_overflow_p
2131 || ranges[j].strict_overflow_p))
2133 any_changes = true;
2134 break;
2139 if (any_changes)
2141 j = 0;
2142 FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
2144 if (oe->op == error_mark_node)
2145 continue;
2146 else if (i != j)
2147 VEC_replace (operand_entry_t, *ops, j, oe);
2148 j++;
2150 VEC_truncate (operand_entry_t, *ops, j);
2153 XDELETEVEC (ranges);
2156 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2157 of STMT in it's operands. This is also known as a "destructive
2158 update" operation. */
2160 static bool
2161 is_phi_for_stmt (gimple stmt, tree operand)
2163 gimple def_stmt;
2164 tree lhs;
2165 use_operand_p arg_p;
2166 ssa_op_iter i;
2168 if (TREE_CODE (operand) != SSA_NAME)
2169 return false;
2171 lhs = gimple_assign_lhs (stmt);
2173 def_stmt = SSA_NAME_DEF_STMT (operand);
2174 if (gimple_code (def_stmt) != GIMPLE_PHI)
2175 return false;
2177 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2178 if (lhs == USE_FROM_PTR (arg_p))
2179 return true;
2180 return false;
2183 /* Remove def stmt of VAR if VAR has zero uses and recurse
2184 on rhs1 operand if so. */
2186 static void
2187 remove_visited_stmt_chain (tree var)
2189 gimple stmt;
2190 gimple_stmt_iterator gsi;
2192 while (1)
2194 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2195 return;
2196 stmt = SSA_NAME_DEF_STMT (var);
2197 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2199 var = gimple_assign_rhs1 (stmt);
2200 gsi = gsi_for_stmt (stmt);
2201 gsi_remove (&gsi, true);
2202 release_defs (stmt);
2204 else
2205 return;
2209 /* This function checks three consequtive operands in
2210 passed operands vector OPS starting from OPINDEX and
2211 swaps two operands if it is profitable for binary operation
2212 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2214 We pair ops with the same rank if possible.
2216 The alternative we try is to see if STMT is a destructive
2217 update style statement, which is like:
2218 b = phi (a, ...)
2219 a = c + b;
2220 In that case, we want to use the destructive update form to
2221 expose the possible vectorizer sum reduction opportunity.
2222 In that case, the third operand will be the phi node. This
2223 check is not performed if STMT is null.
2225 We could, of course, try to be better as noted above, and do a
2226 lot of work to try to find these opportunities in >3 operand
2227 cases, but it is unlikely to be worth it. */
2229 static void
2230 swap_ops_for_binary_stmt (VEC(operand_entry_t, heap) * ops,
2231 unsigned int opindex, gimple stmt)
2233 operand_entry_t oe1, oe2, oe3;
2235 oe1 = VEC_index (operand_entry_t, ops, opindex);
2236 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2237 oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
2239 if ((oe1->rank == oe2->rank
2240 && oe2->rank != oe3->rank)
2241 || (stmt && is_phi_for_stmt (stmt, oe3->op)
2242 && !is_phi_for_stmt (stmt, oe1->op)
2243 && !is_phi_for_stmt (stmt, oe2->op)))
2245 struct operand_entry temp = *oe3;
2246 oe3->op = oe1->op;
2247 oe3->rank = oe1->rank;
2248 oe1->op = temp.op;
2249 oe1->rank= temp.rank;
2251 else if ((oe1->rank == oe3->rank
2252 && oe2->rank != oe3->rank)
2253 || (stmt && is_phi_for_stmt (stmt, oe2->op)
2254 && !is_phi_for_stmt (stmt, oe1->op)
2255 && !is_phi_for_stmt (stmt, oe3->op)))
2257 struct operand_entry temp = *oe2;
2258 oe2->op = oe1->op;
2259 oe2->rank = oe1->rank;
2260 oe1->op = temp.op;
2261 oe1->rank= temp.rank;
2265 /* Recursively rewrite our linearized statements so that the operators
2266 match those in OPS[OPINDEX], putting the computation in rank
2267 order. */
2269 static void
2270 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2271 VEC(operand_entry_t, heap) * ops, bool moved)
2273 tree rhs1 = gimple_assign_rhs1 (stmt);
2274 tree rhs2 = gimple_assign_rhs2 (stmt);
2275 operand_entry_t oe;
2277 /* If we have three operands left, then we want to make sure the ones
2278 that get the double binary op are chosen wisely. */
2279 if (opindex + 3 == VEC_length (operand_entry_t, ops))
2280 swap_ops_for_binary_stmt (ops, opindex, stmt);
2282 /* The final recursion case for this function is that you have
2283 exactly two operations left.
2284 If we had one exactly one op in the entire list to start with, we
2285 would have never called this function, and the tail recursion
2286 rewrites them one at a time. */
2287 if (opindex + 2 == VEC_length (operand_entry_t, ops))
2289 operand_entry_t oe1, oe2;
2291 oe1 = VEC_index (operand_entry_t, ops, opindex);
2292 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
2294 if (rhs1 != oe1->op || rhs2 != oe2->op)
2296 if (dump_file && (dump_flags & TDF_DETAILS))
2298 fprintf (dump_file, "Transforming ");
2299 print_gimple_stmt (dump_file, stmt, 0, 0);
2302 gimple_assign_set_rhs1 (stmt, oe1->op);
2303 gimple_assign_set_rhs2 (stmt, oe2->op);
2304 update_stmt (stmt);
2305 if (rhs1 != oe1->op && rhs1 != oe2->op)
2306 remove_visited_stmt_chain (rhs1);
2308 if (dump_file && (dump_flags & TDF_DETAILS))
2310 fprintf (dump_file, " into ");
2311 print_gimple_stmt (dump_file, stmt, 0, 0);
2314 return;
2317 /* If we hit here, we should have 3 or more ops left. */
2318 gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
2320 /* Rewrite the next operator. */
2321 oe = VEC_index (operand_entry_t, ops, opindex);
2323 if (oe->op != rhs2)
2325 if (!moved)
2327 gimple_stmt_iterator gsinow, gsirhs1;
2328 gimple stmt1 = stmt, stmt2;
2329 unsigned int count;
2331 gsinow = gsi_for_stmt (stmt);
2332 count = VEC_length (operand_entry_t, ops) - opindex - 2;
2333 while (count-- != 0)
2335 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2336 gsirhs1 = gsi_for_stmt (stmt2);
2337 gsi_move_before (&gsirhs1, &gsinow);
2338 gsi_prev (&gsinow);
2339 stmt1 = stmt2;
2341 moved = true;
2344 if (dump_file && (dump_flags & TDF_DETAILS))
2346 fprintf (dump_file, "Transforming ");
2347 print_gimple_stmt (dump_file, stmt, 0, 0);
2350 gimple_assign_set_rhs2 (stmt, oe->op);
2351 update_stmt (stmt);
2353 if (dump_file && (dump_flags & TDF_DETAILS))
2355 fprintf (dump_file, " into ");
2356 print_gimple_stmt (dump_file, stmt, 0, 0);
2359 /* Recurse on the LHS of the binary operator, which is guaranteed to
2360 be the non-leaf side. */
2361 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2364 /* Find out how many cycles we need to compute statements chain.
2365 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
2366 maximum number of independent statements we may execute per cycle. */
2368 static int
2369 get_required_cycles (int ops_num, int cpu_width)
2371 int res;
2372 int elog;
2373 unsigned int rest;
2375 /* While we have more than 2 * cpu_width operands
2376 we may reduce number of operands by cpu_width
2377 per cycle. */
2378 res = ops_num / (2 * cpu_width);
2380 /* Remained operands count may be reduced twice per cycle
2381 until we have only one operand. */
2382 rest = (unsigned)(ops_num - res * cpu_width);
2383 elog = exact_log2 (rest);
2384 if (elog >= 0)
2385 res += elog;
2386 else
2387 res += floor_log2 (rest) + 1;
2389 return res;
2392 /* Returns an optimal number of registers to use for computation of
2393 given statements. */
2395 static int
2396 get_reassociation_width (int ops_num, enum tree_code opc,
2397 enum machine_mode mode)
2399 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2400 int width;
2401 int width_min;
2402 int cycles_best;
2404 if (param_width > 0)
2405 width = param_width;
2406 else
2407 width = targetm.sched.reassociation_width (opc, mode);
2409 if (width == 1)
2410 return width;
2412 /* Get the minimal time required for sequence computation. */
2413 cycles_best = get_required_cycles (ops_num, width);
2415 /* Check if we may use less width and still compute sequence for
2416 the same time. It will allow us to reduce registers usage.
2417 get_required_cycles is monotonically increasing with lower width
2418 so we can perform a binary search for the minimal width that still
2419 results in the optimal cycle count. */
2420 width_min = 1;
2421 while (width > width_min)
2423 int width_mid = (width + width_min) / 2;
2425 if (get_required_cycles (ops_num, width_mid) == cycles_best)
2426 width = width_mid;
2427 else if (width_min < width_mid)
2428 width_min = width_mid;
2429 else
2430 break;
2433 return width;
2436 /* Recursively rewrite our linearized statements so that the operators
2437 match those in OPS[OPINDEX], putting the computation in rank
2438 order and trying to allow operations to be executed in
2439 parallel. */
2441 static void
2442 rewrite_expr_tree_parallel (gimple stmt, int width,
2443 VEC(operand_entry_t, heap) * ops)
2445 enum tree_code opcode = gimple_assign_rhs_code (stmt);
2446 int op_num = VEC_length (operand_entry_t, ops);
2447 int stmt_num = op_num - 1;
2448 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
2449 int op_index = op_num - 1;
2450 int stmt_index = 0;
2451 int ready_stmts_end = 0;
2452 int i = 0;
2453 tree last_rhs1 = gimple_assign_rhs1 (stmt);
2454 tree lhs_var;
2456 /* We start expression rewriting from the top statements.
2457 So, in this loop we create a full list of statements
2458 we will work with. */
2459 stmts[stmt_num - 1] = stmt;
2460 for (i = stmt_num - 2; i >= 0; i--)
2461 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
2463 lhs_var = create_tmp_reg (TREE_TYPE (last_rhs1), NULL);
2464 add_referenced_var (lhs_var);
2466 for (i = 0; i < stmt_num; i++)
2468 tree op1, op2;
2470 /* Determine whether we should use results of
2471 already handled statements or not. */
2472 if (ready_stmts_end == 0
2473 && (i - stmt_index >= width || op_index < 1))
2474 ready_stmts_end = i;
2476 /* Now we choose operands for the next statement. Non zero
2477 value in ready_stmts_end means here that we should use
2478 the result of already generated statements as new operand. */
2479 if (ready_stmts_end > 0)
2481 op1 = gimple_assign_lhs (stmts[stmt_index++]);
2482 if (ready_stmts_end > stmt_index)
2483 op2 = gimple_assign_lhs (stmts[stmt_index++]);
2484 else if (op_index >= 0)
2485 op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2486 else
2488 gcc_assert (stmt_index < i);
2489 op2 = gimple_assign_lhs (stmts[stmt_index++]);
2492 if (stmt_index >= ready_stmts_end)
2493 ready_stmts_end = 0;
2495 else
2497 if (op_index > 1)
2498 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
2499 op2 = VEC_index (operand_entry_t, ops, op_index--)->op;
2500 op1 = VEC_index (operand_entry_t, ops, op_index--)->op;
2503 /* If we emit the last statement then we should put
2504 operands into the last statement. It will also
2505 break the loop. */
2506 if (op_index < 0 && stmt_index == i)
2507 i = stmt_num - 1;
2509 if (dump_file && (dump_flags & TDF_DETAILS))
2511 fprintf (dump_file, "Transforming ");
2512 print_gimple_stmt (dump_file, stmts[i], 0, 0);
2515 /* We keep original statement only for the last one. All
2516 others are recreated. */
2517 if (i == stmt_num - 1)
2519 gimple_assign_set_rhs1 (stmts[i], op1);
2520 gimple_assign_set_rhs2 (stmts[i], op2);
2521 update_stmt (stmts[i]);
2523 else
2524 stmts[i] = build_and_add_sum (lhs_var, op1, op2, opcode);
2526 if (dump_file && (dump_flags & TDF_DETAILS))
2528 fprintf (dump_file, " into ");
2529 print_gimple_stmt (dump_file, stmts[i], 0, 0);
2533 remove_visited_stmt_chain (last_rhs1);
2536 /* Transform STMT, which is really (A +B) + (C + D) into the left
2537 linear form, ((A+B)+C)+D.
2538 Recurse on D if necessary. */
2540 static void
2541 linearize_expr (gimple stmt)
2543 gimple_stmt_iterator gsinow, gsirhs;
2544 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2545 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2546 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2547 gimple newbinrhs = NULL;
2548 struct loop *loop = loop_containing_stmt (stmt);
2550 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
2551 && is_reassociable_op (binrhs, rhscode, loop));
2553 gsinow = gsi_for_stmt (stmt);
2554 gsirhs = gsi_for_stmt (binrhs);
2555 gsi_move_before (&gsirhs, &gsinow);
2557 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
2558 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
2559 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
2561 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
2562 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2564 if (dump_file && (dump_flags & TDF_DETAILS))
2566 fprintf (dump_file, "Linearized: ");
2567 print_gimple_stmt (dump_file, stmt, 0, 0);
2570 reassociate_stats.linearized++;
2571 update_stmt (binrhs);
2572 update_stmt (binlhs);
2573 update_stmt (stmt);
2575 gimple_set_visited (stmt, true);
2576 gimple_set_visited (binlhs, true);
2577 gimple_set_visited (binrhs, true);
2579 /* Tail recurse on the new rhs if it still needs reassociation. */
2580 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
2581 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
2582 want to change the algorithm while converting to tuples. */
2583 linearize_expr (stmt);
2586 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
2587 it. Otherwise, return NULL. */
2589 static gimple
2590 get_single_immediate_use (tree lhs)
2592 use_operand_p immuse;
2593 gimple immusestmt;
2595 if (TREE_CODE (lhs) == SSA_NAME
2596 && single_imm_use (lhs, &immuse, &immusestmt)
2597 && is_gimple_assign (immusestmt))
2598 return immusestmt;
2600 return NULL;
2603 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
2604 representing the negated value. Insertions of any necessary
2605 instructions go before GSI.
2606 This function is recursive in that, if you hand it "a_5" as the
2607 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
2608 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
2610 static tree
2611 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
2613 gimple negatedefstmt= NULL;
2614 tree resultofnegate;
2616 /* If we are trying to negate a name, defined by an add, negate the
2617 add operands instead. */
2618 if (TREE_CODE (tonegate) == SSA_NAME)
2619 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
2620 if (TREE_CODE (tonegate) == SSA_NAME
2621 && is_gimple_assign (negatedefstmt)
2622 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
2623 && has_single_use (gimple_assign_lhs (negatedefstmt))
2624 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
2626 gimple_stmt_iterator gsi;
2627 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
2628 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
2630 gsi = gsi_for_stmt (negatedefstmt);
2631 rhs1 = negate_value (rhs1, &gsi);
2632 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
2634 gsi = gsi_for_stmt (negatedefstmt);
2635 rhs2 = negate_value (rhs2, &gsi);
2636 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
2638 update_stmt (negatedefstmt);
2639 return gimple_assign_lhs (negatedefstmt);
2642 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
2643 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
2644 NULL_TREE, true, GSI_SAME_STMT);
2645 return resultofnegate;
2648 /* Return true if we should break up the subtract in STMT into an add
2649 with negate. This is true when we the subtract operands are really
2650 adds, or the subtract itself is used in an add expression. In
2651 either case, breaking up the subtract into an add with negate
2652 exposes the adds to reassociation. */
2654 static bool
2655 should_break_up_subtract (gimple stmt)
2657 tree lhs = gimple_assign_lhs (stmt);
2658 tree binlhs = gimple_assign_rhs1 (stmt);
2659 tree binrhs = gimple_assign_rhs2 (stmt);
2660 gimple immusestmt;
2661 struct loop *loop = loop_containing_stmt (stmt);
2663 if (TREE_CODE (binlhs) == SSA_NAME
2664 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
2665 return true;
2667 if (TREE_CODE (binrhs) == SSA_NAME
2668 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
2669 return true;
2671 if (TREE_CODE (lhs) == SSA_NAME
2672 && (immusestmt = get_single_immediate_use (lhs))
2673 && is_gimple_assign (immusestmt)
2674 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
2675 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
2676 return true;
2677 return false;
2680 /* Transform STMT from A - B into A + -B. */
2682 static void
2683 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
2685 tree rhs1 = gimple_assign_rhs1 (stmt);
2686 tree rhs2 = gimple_assign_rhs2 (stmt);
2688 if (dump_file && (dump_flags & TDF_DETAILS))
2690 fprintf (dump_file, "Breaking up subtract ");
2691 print_gimple_stmt (dump_file, stmt, 0, 0);
2694 rhs2 = negate_value (rhs2, gsip);
2695 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
2696 update_stmt (stmt);
2699 /* Determine whether STMT is a builtin call that raises an SSA name
2700 to an integer power and has only one use. If so, and this is early
2701 reassociation and unsafe math optimizations are permitted, place
2702 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
2703 If any of these conditions does not hold, return FALSE. */
2705 static bool
2706 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
2708 tree fndecl, arg1;
2709 REAL_VALUE_TYPE c, cint;
2711 if (!first_pass_instance
2712 || !flag_unsafe_math_optimizations
2713 || !is_gimple_call (stmt)
2714 || !has_single_use (gimple_call_lhs (stmt)))
2715 return false;
2717 fndecl = gimple_call_fndecl (stmt);
2719 if (!fndecl
2720 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
2721 return false;
2723 switch (DECL_FUNCTION_CODE (fndecl))
2725 CASE_FLT_FN (BUILT_IN_POW):
2726 *base = gimple_call_arg (stmt, 0);
2727 arg1 = gimple_call_arg (stmt, 1);
2729 if (TREE_CODE (arg1) != REAL_CST)
2730 return false;
2732 c = TREE_REAL_CST (arg1);
2734 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
2735 return false;
2737 *exponent = real_to_integer (&c);
2738 real_from_integer (&cint, VOIDmode, *exponent,
2739 *exponent < 0 ? -1 : 0, 0);
2740 if (!real_identical (&c, &cint))
2741 return false;
2743 break;
2745 CASE_FLT_FN (BUILT_IN_POWI):
2746 *base = gimple_call_arg (stmt, 0);
2747 arg1 = gimple_call_arg (stmt, 1);
2749 if (!host_integerp (arg1, 0))
2750 return false;
2752 *exponent = TREE_INT_CST_LOW (arg1);
2753 break;
2755 default:
2756 return false;
2759 /* Expanding negative exponents is generally unproductive, so we don't
2760 complicate matters with those. Exponents of zero and one should
2761 have been handled by expression folding. */
2762 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
2763 return false;
2765 return true;
2768 /* Recursively linearize a binary expression that is the RHS of STMT.
2769 Place the operands of the expression tree in the vector named OPS. */
2771 static void
2772 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
2773 bool is_associative, bool set_visited)
2775 tree binlhs = gimple_assign_rhs1 (stmt);
2776 tree binrhs = gimple_assign_rhs2 (stmt);
2777 gimple binlhsdef = NULL, binrhsdef = NULL;
2778 bool binlhsisreassoc = false;
2779 bool binrhsisreassoc = false;
2780 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
2781 struct loop *loop = loop_containing_stmt (stmt);
2782 tree base = NULL_TREE;
2783 HOST_WIDE_INT exponent = 0;
2785 if (set_visited)
2786 gimple_set_visited (stmt, true);
2788 if (TREE_CODE (binlhs) == SSA_NAME)
2790 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
2791 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
2792 && !stmt_could_throw_p (binlhsdef));
2795 if (TREE_CODE (binrhs) == SSA_NAME)
2797 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
2798 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
2799 && !stmt_could_throw_p (binrhsdef));
2802 /* If the LHS is not reassociable, but the RHS is, we need to swap
2803 them. If neither is reassociable, there is nothing we can do, so
2804 just put them in the ops vector. If the LHS is reassociable,
2805 linearize it. If both are reassociable, then linearize the RHS
2806 and the LHS. */
2808 if (!binlhsisreassoc)
2810 tree temp;
2812 /* If this is not a associative operation like division, give up. */
2813 if (!is_associative)
2815 add_to_ops_vec (ops, binrhs);
2816 return;
2819 if (!binrhsisreassoc)
2821 if (rhscode == MULT_EXPR
2822 && TREE_CODE (binrhs) == SSA_NAME
2823 && acceptable_pow_call (binrhsdef, &base, &exponent))
2825 add_repeat_to_ops_vec (ops, base, exponent);
2826 gimple_set_visited (binrhsdef, true);
2828 else
2829 add_to_ops_vec (ops, binrhs);
2831 if (rhscode == MULT_EXPR
2832 && TREE_CODE (binlhs) == SSA_NAME
2833 && acceptable_pow_call (binlhsdef, &base, &exponent))
2835 add_repeat_to_ops_vec (ops, base, exponent);
2836 gimple_set_visited (binlhsdef, true);
2838 else
2839 add_to_ops_vec (ops, binlhs);
2841 return;
2844 if (dump_file && (dump_flags & TDF_DETAILS))
2846 fprintf (dump_file, "swapping operands of ");
2847 print_gimple_stmt (dump_file, stmt, 0, 0);
2850 swap_tree_operands (stmt,
2851 gimple_assign_rhs1_ptr (stmt),
2852 gimple_assign_rhs2_ptr (stmt));
2853 update_stmt (stmt);
2855 if (dump_file && (dump_flags & TDF_DETAILS))
2857 fprintf (dump_file, " is now ");
2858 print_gimple_stmt (dump_file, stmt, 0, 0);
2861 /* We want to make it so the lhs is always the reassociative op,
2862 so swap. */
2863 temp = binlhs;
2864 binlhs = binrhs;
2865 binrhs = temp;
2867 else if (binrhsisreassoc)
2869 linearize_expr (stmt);
2870 binlhs = gimple_assign_rhs1 (stmt);
2871 binrhs = gimple_assign_rhs2 (stmt);
2874 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
2875 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
2876 rhscode, loop));
2877 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
2878 is_associative, set_visited);
2880 if (rhscode == MULT_EXPR
2881 && TREE_CODE (binrhs) == SSA_NAME
2882 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
2884 add_repeat_to_ops_vec (ops, base, exponent);
2885 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
2887 else
2888 add_to_ops_vec (ops, binrhs);
2891 /* Repropagate the negates back into subtracts, since no other pass
2892 currently does it. */
2894 static void
2895 repropagate_negates (void)
2897 unsigned int i = 0;
2898 tree negate;
2900 FOR_EACH_VEC_ELT (tree, plus_negates, i, negate)
2902 gimple user = get_single_immediate_use (negate);
2904 if (!user || !is_gimple_assign (user))
2905 continue;
2907 /* The negate operand can be either operand of a PLUS_EXPR
2908 (it can be the LHS if the RHS is a constant for example).
2910 Force the negate operand to the RHS of the PLUS_EXPR, then
2911 transform the PLUS_EXPR into a MINUS_EXPR. */
2912 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
2914 /* If the negated operand appears on the LHS of the
2915 PLUS_EXPR, exchange the operands of the PLUS_EXPR
2916 to force the negated operand to the RHS of the PLUS_EXPR. */
2917 if (gimple_assign_rhs1 (user) == negate)
2919 swap_tree_operands (user,
2920 gimple_assign_rhs1_ptr (user),
2921 gimple_assign_rhs2_ptr (user));
2924 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
2925 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
2926 if (gimple_assign_rhs2 (user) == negate)
2928 tree rhs1 = gimple_assign_rhs1 (user);
2929 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
2930 gimple_stmt_iterator gsi = gsi_for_stmt (user);
2931 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
2932 update_stmt (user);
2935 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
2937 if (gimple_assign_rhs1 (user) == negate)
2939 /* We have
2940 x = -a
2941 y = x - b
2942 which we transform into
2943 x = a + b
2944 y = -x .
2945 This pushes down the negate which we possibly can merge
2946 into some other operation, hence insert it into the
2947 plus_negates vector. */
2948 gimple feed = SSA_NAME_DEF_STMT (negate);
2949 tree a = gimple_assign_rhs1 (feed);
2950 tree rhs2 = gimple_assign_rhs2 (user);
2951 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
2952 gimple_replace_lhs (feed, negate);
2953 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
2954 update_stmt (gsi_stmt (gsi));
2955 gsi2 = gsi_for_stmt (user);
2956 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
2957 update_stmt (gsi_stmt (gsi2));
2958 gsi_move_before (&gsi, &gsi2);
2959 VEC_safe_push (tree, heap, plus_negates,
2960 gimple_assign_lhs (gsi_stmt (gsi2)));
2962 else
2964 /* Transform "x = -a; y = b - x" into "y = b + a", getting
2965 rid of one operation. */
2966 gimple feed = SSA_NAME_DEF_STMT (negate);
2967 tree a = gimple_assign_rhs1 (feed);
2968 tree rhs1 = gimple_assign_rhs1 (user);
2969 gimple_stmt_iterator gsi = gsi_for_stmt (user);
2970 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
2971 update_stmt (gsi_stmt (gsi));
2977 /* Returns true if OP is of a type for which we can do reassociation.
2978 That is for integral or non-saturating fixed-point types, and for
2979 floating point type when associative-math is enabled. */
2981 static bool
2982 can_reassociate_p (tree op)
2984 tree type = TREE_TYPE (op);
2985 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
2986 || NON_SAT_FIXED_POINT_TYPE_P (type)
2987 || (flag_associative_math && FLOAT_TYPE_P (type)))
2988 return true;
2989 return false;
2992 /* Break up subtract operations in block BB.
2994 We do this top down because we don't know whether the subtract is
2995 part of a possible chain of reassociation except at the top.
2997 IE given
2998 d = f + g
2999 c = a + e
3000 b = c - d
3001 q = b - r
3002 k = t - q
3004 we want to break up k = t - q, but we won't until we've transformed q
3005 = b - r, which won't be broken up until we transform b = c - d.
3007 En passant, clear the GIMPLE visited flag on every statement. */
3009 static void
3010 break_up_subtract_bb (basic_block bb)
3012 gimple_stmt_iterator gsi;
3013 basic_block son;
3015 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3017 gimple stmt = gsi_stmt (gsi);
3018 gimple_set_visited (stmt, false);
3020 if (!is_gimple_assign (stmt)
3021 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3022 continue;
3024 /* Look for simple gimple subtract operations. */
3025 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3027 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3028 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3029 continue;
3031 /* Check for a subtract used only in an addition. If this
3032 is the case, transform it into add of a negate for better
3033 reassociation. IE transform C = A-B into C = A + -B if C
3034 is only used in an addition. */
3035 if (should_break_up_subtract (stmt))
3036 break_up_subtract (stmt, &gsi);
3038 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3039 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3040 VEC_safe_push (tree, heap, plus_negates, gimple_assign_lhs (stmt));
3042 for (son = first_dom_son (CDI_DOMINATORS, bb);
3043 son;
3044 son = next_dom_son (CDI_DOMINATORS, son))
3045 break_up_subtract_bb (son);
3048 /* Used for repeated factor analysis. */
3049 struct repeat_factor_d
3051 /* An SSA name that occurs in a multiply chain. */
3052 tree factor;
3054 /* Cached rank of the factor. */
3055 unsigned rank;
3057 /* Number of occurrences of the factor in the chain. */
3058 HOST_WIDE_INT count;
3060 /* An SSA name representing the product of this factor and
3061 all factors appearing later in the repeated factor vector. */
3062 tree repr;
3065 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3066 typedef const struct repeat_factor_d *const_repeat_factor_t;
3068 DEF_VEC_O (repeat_factor);
3069 DEF_VEC_ALLOC_O (repeat_factor, heap);
3071 static VEC (repeat_factor, heap) *repeat_factor_vec;
3073 /* Used for sorting the repeat factor vector. Sort primarily by
3074 ascending occurrence count, secondarily by descending rank. */
3076 static int
3077 compare_repeat_factors (const void *x1, const void *x2)
3079 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3080 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3082 if (rf1->count != rf2->count)
3083 return rf1->count - rf2->count;
3085 return rf2->rank - rf1->rank;
3088 /* Get a new SSA name for register variable *TARGET of type TYPE.
3089 If *TARGET is null or incompatible with TYPE, create the variable
3090 first. */
3092 static tree
3093 get_reassoc_pow_ssa_name (tree *target, tree type)
3095 if (!*target || !types_compatible_p (type, TREE_TYPE (*target)))
3097 *target = create_tmp_reg (type, "reassocpow");
3098 add_referenced_var (*target);
3101 return make_ssa_name (*target, NULL);
3104 /* Look for repeated operands in OPS in the multiply tree rooted at
3105 STMT. Replace them with an optimal sequence of multiplies and powi
3106 builtin calls, and remove the used operands from OPS. Return an
3107 SSA name representing the value of the replacement sequence. */
3109 static tree
3110 attempt_builtin_powi (gimple stmt, VEC(operand_entry_t, heap) **ops,
3111 tree *target)
3113 unsigned i, j, vec_len;
3114 int ii;
3115 operand_entry_t oe;
3116 repeat_factor_t rf1, rf2;
3117 repeat_factor rfnew;
3118 tree result = NULL_TREE;
3119 tree target_ssa, iter_result;
3120 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3121 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3122 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3123 gimple mul_stmt, pow_stmt;
3125 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3126 target. */
3127 if (!powi_fndecl)
3128 return NULL_TREE;
3130 /* Allocate the repeated factor vector. */
3131 repeat_factor_vec = VEC_alloc (repeat_factor, heap, 10);
3133 /* Scan the OPS vector for all SSA names in the product and build
3134 up a vector of occurrence counts for each factor. */
3135 FOR_EACH_VEC_ELT (operand_entry_t, *ops, i, oe)
3137 if (TREE_CODE (oe->op) == SSA_NAME)
3139 FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3141 if (rf1->factor == oe->op)
3143 rf1->count += oe->count;
3144 break;
3148 if (j >= VEC_length (repeat_factor, repeat_factor_vec))
3150 rfnew.factor = oe->op;
3151 rfnew.rank = oe->rank;
3152 rfnew.count = oe->count;
3153 rfnew.repr = NULL_TREE;
3154 VEC_safe_push (repeat_factor, heap, repeat_factor_vec, &rfnew);
3159 /* Sort the repeated factor vector by (a) increasing occurrence count,
3160 and (b) decreasing rank. */
3161 VEC_qsort (repeat_factor, repeat_factor_vec, compare_repeat_factors);
3163 /* It is generally best to combine as many base factors as possible
3164 into a product before applying __builtin_powi to the result.
3165 However, the sort order chosen for the repeated factor vector
3166 allows us to cache partial results for the product of the base
3167 factors for subsequent use. When we already have a cached partial
3168 result from a previous iteration, it is best to make use of it
3169 before looking for another __builtin_pow opportunity.
3171 As an example, consider x * x * y * y * y * z * z * z * z.
3172 We want to first compose the product x * y * z, raise it to the
3173 second power, then multiply this by y * z, and finally multiply
3174 by z. This can be done in 5 multiplies provided we cache y * z
3175 for use in both expressions:
3177 t1 = y * z
3178 t2 = t1 * x
3179 t3 = t2 * t2
3180 t4 = t1 * t3
3181 result = t4 * z
3183 If we instead ignored the cached y * z and first multiplied by
3184 the __builtin_pow opportunity z * z, we would get the inferior:
3186 t1 = y * z
3187 t2 = t1 * x
3188 t3 = t2 * t2
3189 t4 = z * z
3190 t5 = t3 * t4
3191 result = t5 * y */
3193 vec_len = VEC_length (repeat_factor, repeat_factor_vec);
3195 /* Repeatedly look for opportunities to create a builtin_powi call. */
3196 while (true)
3198 HOST_WIDE_INT power;
3200 /* First look for the largest cached product of factors from
3201 preceding iterations. If found, create a builtin_powi for
3202 it if the minimum occurrence count for its factors is at
3203 least 2, or just use this cached product as our next
3204 multiplicand if the minimum occurrence count is 1. */
3205 FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3207 if (rf1->repr && rf1->count > 0)
3208 break;
3211 if (j < vec_len)
3213 power = rf1->count;
3215 if (power == 1)
3217 iter_result = rf1->repr;
3219 if (dump_file && (dump_flags & TDF_DETAILS))
3221 unsigned elt;
3222 repeat_factor_t rf;
3223 fputs ("Multiplying by cached product ", dump_file);
3224 for (elt = j; elt < vec_len; elt++)
3226 rf = VEC_index (repeat_factor, repeat_factor_vec, elt);
3227 print_generic_expr (dump_file, rf->factor, 0);
3228 if (elt < vec_len - 1)
3229 fputs (" * ", dump_file);
3231 fputs ("\n", dump_file);
3234 else
3236 iter_result = get_reassoc_pow_ssa_name (target, type);
3237 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3238 build_int_cst (integer_type_node,
3239 power));
3240 gimple_call_set_lhs (pow_stmt, iter_result);
3241 gimple_set_location (pow_stmt, gimple_location (stmt));
3242 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3244 if (dump_file && (dump_flags & TDF_DETAILS))
3246 unsigned elt;
3247 repeat_factor_t rf;
3248 fputs ("Building __builtin_pow call for cached product (",
3249 dump_file);
3250 for (elt = j; elt < vec_len; elt++)
3252 rf = VEC_index (repeat_factor, repeat_factor_vec, elt);
3253 print_generic_expr (dump_file, rf->factor, 0);
3254 if (elt < vec_len - 1)
3255 fputs (" * ", dump_file);
3257 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3258 power);
3262 else
3264 /* Otherwise, find the first factor in the repeated factor
3265 vector whose occurrence count is at least 2. If no such
3266 factor exists, there are no builtin_powi opportunities
3267 remaining. */
3268 FOR_EACH_VEC_ELT (repeat_factor, repeat_factor_vec, j, rf1)
3270 if (rf1->count >= 2)
3271 break;
3274 if (j >= vec_len)
3275 break;
3277 power = rf1->count;
3279 if (dump_file && (dump_flags & TDF_DETAILS))
3281 unsigned elt;
3282 repeat_factor_t rf;
3283 fputs ("Building __builtin_pow call for (", dump_file);
3284 for (elt = j; elt < vec_len; elt++)
3286 rf = VEC_index (repeat_factor, repeat_factor_vec, elt);
3287 print_generic_expr (dump_file, rf->factor, 0);
3288 if (elt < vec_len - 1)
3289 fputs (" * ", dump_file);
3291 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3294 reassociate_stats.pows_created++;
3296 /* Visit each element of the vector in reverse order (so that
3297 high-occurrence elements are visited first, and within the
3298 same occurrence count, lower-ranked elements are visited
3299 first). Form a linear product of all elements in this order
3300 whose occurrencce count is at least that of element J.
3301 Record the SSA name representing the product of each element
3302 with all subsequent elements in the vector. */
3303 if (j == vec_len - 1)
3304 rf1->repr = rf1->factor;
3305 else
3307 for (ii = vec_len - 2; ii >= (int)j; ii--)
3309 tree op1, op2;
3311 rf1 = VEC_index (repeat_factor, repeat_factor_vec, ii);
3312 rf2 = VEC_index (repeat_factor, repeat_factor_vec, ii + 1);
3314 /* Init the last factor's representative to be itself. */
3315 if (!rf2->repr)
3316 rf2->repr = rf2->factor;
3318 op1 = rf1->factor;
3319 op2 = rf2->repr;
3321 target_ssa = get_reassoc_pow_ssa_name (target, type);
3322 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3323 target_ssa,
3324 op1, op2);
3325 gimple_set_location (mul_stmt, gimple_location (stmt));
3326 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3327 rf1->repr = target_ssa;
3329 /* Don't reprocess the multiply we just introduced. */
3330 gimple_set_visited (mul_stmt, true);
3334 /* Form a call to __builtin_powi for the maximum product
3335 just formed, raised to the power obtained earlier. */
3336 rf1 = VEC_index (repeat_factor, repeat_factor_vec, j);
3337 iter_result = get_reassoc_pow_ssa_name (target, type);
3338 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3339 build_int_cst (integer_type_node,
3340 power));
3341 gimple_call_set_lhs (pow_stmt, iter_result);
3342 gimple_set_location (pow_stmt, gimple_location (stmt));
3343 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3346 /* If we previously formed at least one other builtin_powi call,
3347 form the product of this one and those others. */
3348 if (result)
3350 tree new_result = get_reassoc_pow_ssa_name (target, type);
3351 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3352 result, iter_result);
3353 gimple_set_location (mul_stmt, gimple_location (stmt));
3354 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3355 gimple_set_visited (mul_stmt, true);
3356 result = new_result;
3358 else
3359 result = iter_result;
3361 /* Decrement the occurrence count of each element in the product
3362 by the count found above, and remove this many copies of each
3363 factor from OPS. */
3364 for (i = j; i < vec_len; i++)
3366 unsigned k = power;
3367 unsigned n;
3369 rf1 = VEC_index (repeat_factor, repeat_factor_vec, i);
3370 rf1->count -= power;
3372 FOR_EACH_VEC_ELT_REVERSE (operand_entry_t, *ops, n, oe)
3374 if (oe->op == rf1->factor)
3376 if (oe->count <= k)
3378 VEC_ordered_remove (operand_entry_t, *ops, n);
3379 k -= oe->count;
3381 if (k == 0)
3382 break;
3384 else
3386 oe->count -= k;
3387 break;
3394 /* At this point all elements in the repeated factor vector have a
3395 remaining occurrence count of 0 or 1, and those with a count of 1
3396 don't have cached representatives. Re-sort the ops vector and
3397 clean up. */
3398 VEC_qsort (operand_entry_t, *ops, sort_by_operand_rank);
3399 VEC_free (repeat_factor, heap, repeat_factor_vec);
3401 /* Return the final product computed herein. Note that there may
3402 still be some elements with single occurrence count left in OPS;
3403 those will be handled by the normal reassociation logic. */
3404 return result;
3407 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
3409 static void
3410 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3412 tree rhs1;
3414 if (dump_file && (dump_flags & TDF_DETAILS))
3416 fprintf (dump_file, "Transforming ");
3417 print_gimple_stmt (dump_file, stmt, 0, 0);
3420 rhs1 = gimple_assign_rhs1 (stmt);
3421 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3422 update_stmt (stmt);
3423 remove_visited_stmt_chain (rhs1);
3425 if (dump_file && (dump_flags & TDF_DETAILS))
3427 fprintf (dump_file, " into ");
3428 print_gimple_stmt (dump_file, stmt, 0, 0);
3432 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
3434 static void
3435 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3436 tree rhs1, tree rhs2)
3438 if (dump_file && (dump_flags & TDF_DETAILS))
3440 fprintf (dump_file, "Transforming ");
3441 print_gimple_stmt (dump_file, stmt, 0, 0);
3444 gimple_assign_set_rhs_with_ops_1 (gsi, MULT_EXPR, rhs1, rhs2, NULL_TREE);
3445 update_stmt (gsi_stmt (*gsi));
3446 remove_visited_stmt_chain (rhs1);
3448 if (dump_file && (dump_flags & TDF_DETAILS))
3450 fprintf (dump_file, " into ");
3451 print_gimple_stmt (dump_file, stmt, 0, 0);
3455 /* Reassociate expressions in basic block BB and its post-dominator as
3456 children. */
3458 static void
3459 reassociate_bb (basic_block bb)
3461 gimple_stmt_iterator gsi;
3462 basic_block son;
3463 tree target = NULL_TREE;
3465 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
3467 gimple stmt = gsi_stmt (gsi);
3469 if (is_gimple_assign (stmt)
3470 && !stmt_could_throw_p (stmt))
3472 tree lhs, rhs1, rhs2;
3473 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
3475 /* If this is not a gimple binary expression, there is
3476 nothing for us to do with it. */
3477 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
3478 continue;
3480 /* If this was part of an already processed statement,
3481 we don't need to touch it again. */
3482 if (gimple_visited_p (stmt))
3484 /* This statement might have become dead because of previous
3485 reassociations. */
3486 if (has_zero_uses (gimple_get_lhs (stmt)))
3488 gsi_remove (&gsi, true);
3489 release_defs (stmt);
3490 /* We might end up removing the last stmt above which
3491 places the iterator to the end of the sequence.
3492 Reset it to the last stmt in this case which might
3493 be the end of the sequence as well if we removed
3494 the last statement of the sequence. In which case
3495 we need to bail out. */
3496 if (gsi_end_p (gsi))
3498 gsi = gsi_last_bb (bb);
3499 if (gsi_end_p (gsi))
3500 break;
3503 continue;
3506 lhs = gimple_assign_lhs (stmt);
3507 rhs1 = gimple_assign_rhs1 (stmt);
3508 rhs2 = gimple_assign_rhs2 (stmt);
3510 /* For non-bit or min/max operations we can't associate
3511 all types. Verify that here. */
3512 if (rhs_code != BIT_IOR_EXPR
3513 && rhs_code != BIT_AND_EXPR
3514 && rhs_code != BIT_XOR_EXPR
3515 && rhs_code != MIN_EXPR
3516 && rhs_code != MAX_EXPR
3517 && (!can_reassociate_p (lhs)
3518 || !can_reassociate_p (rhs1)
3519 || !can_reassociate_p (rhs2)))
3520 continue;
3522 if (associative_tree_code (rhs_code))
3524 VEC(operand_entry_t, heap) *ops = NULL;
3525 tree powi_result = NULL_TREE;
3527 /* There may be no immediate uses left by the time we
3528 get here because we may have eliminated them all. */
3529 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
3530 continue;
3532 gimple_set_visited (stmt, true);
3533 linearize_expr_tree (&ops, stmt, true, true);
3534 VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
3535 optimize_ops_list (rhs_code, &ops);
3536 if (undistribute_ops_list (rhs_code, &ops,
3537 loop_containing_stmt (stmt)))
3539 VEC_qsort (operand_entry_t, ops, sort_by_operand_rank);
3540 optimize_ops_list (rhs_code, &ops);
3543 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
3544 optimize_range_tests (rhs_code, &ops);
3546 if (first_pass_instance
3547 && rhs_code == MULT_EXPR
3548 && flag_unsafe_math_optimizations)
3549 powi_result = attempt_builtin_powi (stmt, &ops, &target);
3551 /* If the operand vector is now empty, all operands were
3552 consumed by the __builtin_powi optimization. */
3553 if (VEC_length (operand_entry_t, ops) == 0)
3554 transform_stmt_to_copy (&gsi, stmt, powi_result);
3555 else if (VEC_length (operand_entry_t, ops) == 1)
3557 tree last_op = VEC_last (operand_entry_t, ops)->op;
3559 if (powi_result)
3560 transform_stmt_to_multiply (&gsi, stmt, last_op,
3561 powi_result);
3562 else
3563 transform_stmt_to_copy (&gsi, stmt, last_op);
3565 else
3567 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
3568 int ops_num = VEC_length (operand_entry_t, ops);
3569 int width = get_reassociation_width (ops_num, rhs_code, mode);
3571 if (dump_file && (dump_flags & TDF_DETAILS))
3572 fprintf (dump_file,
3573 "Width = %d was chosen for reassociation\n", width);
3575 if (width > 1
3576 && VEC_length (operand_entry_t, ops) > 3)
3577 rewrite_expr_tree_parallel (stmt, width, ops);
3578 else
3579 rewrite_expr_tree (stmt, 0, ops, false);
3581 /* If we combined some repeated factors into a
3582 __builtin_powi call, multiply that result by the
3583 reassociated operands. */
3584 if (powi_result)
3586 gimple mul_stmt;
3587 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3588 tree target_ssa = get_reassoc_pow_ssa_name (&target,
3589 type);
3590 gimple_set_lhs (stmt, target_ssa);
3591 update_stmt (stmt);
3592 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
3593 powi_result,
3594 target_ssa);
3595 gimple_set_location (mul_stmt, gimple_location (stmt));
3596 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
3600 VEC_free (operand_entry_t, heap, ops);
3604 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
3605 son;
3606 son = next_dom_son (CDI_POST_DOMINATORS, son))
3607 reassociate_bb (son);
3610 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
3611 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
3613 /* Dump the operand entry vector OPS to FILE. */
3615 void
3616 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
3618 operand_entry_t oe;
3619 unsigned int i;
3621 FOR_EACH_VEC_ELT (operand_entry_t, ops, i, oe)
3623 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
3624 print_generic_expr (file, oe->op, 0);
3628 /* Dump the operand entry vector OPS to STDERR. */
3630 DEBUG_FUNCTION void
3631 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
3633 dump_ops_vector (stderr, ops);
3636 static void
3637 do_reassoc (void)
3639 break_up_subtract_bb (ENTRY_BLOCK_PTR);
3640 reassociate_bb (EXIT_BLOCK_PTR);
3643 /* Initialize the reassociation pass. */
3645 static void
3646 init_reassoc (void)
3648 int i;
3649 long rank = 2;
3650 tree param;
3651 int *bbs = XNEWVEC (int, last_basic_block + 1);
3653 /* Find the loops, so that we can prevent moving calculations in
3654 them. */
3655 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
3657 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
3659 operand_entry_pool = create_alloc_pool ("operand entry pool",
3660 sizeof (struct operand_entry), 30);
3661 next_operand_entry_id = 0;
3663 /* Reverse RPO (Reverse Post Order) will give us something where
3664 deeper loops come later. */
3665 pre_and_rev_post_order_compute (NULL, bbs, false);
3666 bb_rank = XCNEWVEC (long, last_basic_block + 1);
3667 operand_rank = pointer_map_create ();
3669 /* Give each argument a distinct rank. */
3670 for (param = DECL_ARGUMENTS (current_function_decl);
3671 param;
3672 param = DECL_CHAIN (param))
3674 if (gimple_default_def (cfun, param) != NULL)
3676 tree def = gimple_default_def (cfun, param);
3677 insert_operand_rank (def, ++rank);
3681 /* Give the chain decl a distinct rank. */
3682 if (cfun->static_chain_decl != NULL)
3684 tree def = gimple_default_def (cfun, cfun->static_chain_decl);
3685 if (def != NULL)
3686 insert_operand_rank (def, ++rank);
3689 /* Set up rank for each BB */
3690 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
3691 bb_rank[bbs[i]] = ++rank << 16;
3693 free (bbs);
3694 calculate_dominance_info (CDI_POST_DOMINATORS);
3695 plus_negates = NULL;
3698 /* Cleanup after the reassociation pass, and print stats if
3699 requested. */
3701 static void
3702 fini_reassoc (void)
3704 statistics_counter_event (cfun, "Linearized",
3705 reassociate_stats.linearized);
3706 statistics_counter_event (cfun, "Constants eliminated",
3707 reassociate_stats.constants_eliminated);
3708 statistics_counter_event (cfun, "Ops eliminated",
3709 reassociate_stats.ops_eliminated);
3710 statistics_counter_event (cfun, "Statements rewritten",
3711 reassociate_stats.rewritten);
3712 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
3713 reassociate_stats.pows_encountered);
3714 statistics_counter_event (cfun, "Built-in powi calls created",
3715 reassociate_stats.pows_created);
3717 pointer_map_destroy (operand_rank);
3718 free_alloc_pool (operand_entry_pool);
3719 free (bb_rank);
3720 VEC_free (tree, heap, plus_negates);
3721 free_dominance_info (CDI_POST_DOMINATORS);
3722 loop_optimizer_finalize ();
3725 /* Gate and execute functions for Reassociation. */
3727 static unsigned int
3728 execute_reassoc (void)
3730 init_reassoc ();
3732 do_reassoc ();
3733 repropagate_negates ();
3735 fini_reassoc ();
3736 return 0;
3739 static bool
3740 gate_tree_ssa_reassoc (void)
3742 return flag_tree_reassoc != 0;
3745 struct gimple_opt_pass pass_reassoc =
3748 GIMPLE_PASS,
3749 "reassoc", /* name */
3750 gate_tree_ssa_reassoc, /* gate */
3751 execute_reassoc, /* execute */
3752 NULL, /* sub */
3753 NULL, /* next */
3754 0, /* static_pass_number */
3755 TV_TREE_REASSOC, /* tv_id */
3756 PROP_cfg | PROP_ssa, /* properties_required */
3757 0, /* properties_provided */
3758 0, /* properties_destroyed */
3759 0, /* todo_flags_start */
3760 TODO_verify_ssa
3761 | TODO_verify_flow
3762 | TODO_ggc_collect /* todo_flags_finish */