PR sanitizer/59009
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob6e9188457f90531abc2f57cfe27e3ab280ad3b9e
1 /* Reassociation for trees.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-inline.h"
32 #include "gimplify.h"
33 #include "gimple-ssa.h"
34 #include "tree-cfg.h"
35 #include "tree-phinodes.h"
36 #include "ssa-iterators.h"
37 #include "tree-ssanames.h"
38 #include "tree-ssa-loop-niter.h"
39 #include "tree-ssa-loop.h"
40 #include "tree-dfa.h"
41 #include "tree-ssa.h"
42 #include "tree-iterator.h"
43 #include "tree-pass.h"
44 #include "alloc-pool.h"
45 #include "vec.h"
46 #include "langhooks.h"
47 #include "pointer-set.h"
48 #include "cfgloop.h"
49 #include "flags.h"
50 #include "target.h"
51 #include "params.h"
52 #include "diagnostic-core.h"
54 /* This is a simple global reassociation pass. It is, in part, based
55 on the LLVM pass of the same name (They do some things more/less
56 than we do, in different orders, etc).
58 It consists of five steps:
60 1. Breaking up subtract operations into addition + negate, where
61 it would promote the reassociation of adds.
63 2. Left linearization of the expression trees, so that (A+B)+(C+D)
64 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
65 During linearization, we place the operands of the binary
66 expressions into a vector of operand_entry_t
68 3. Optimization of the operand lists, eliminating things like a +
69 -a, a & a, etc.
71 3a. Combine repeated factors with the same occurrence counts
72 into a __builtin_powi call that will later be optimized into
73 an optimal number of multiplies.
75 4. Rewrite the expression trees we linearized and optimized so
76 they are in proper rank order.
78 5. Repropagate negates, as nothing else will clean it up ATM.
80 A bit of theory on #4, since nobody seems to write anything down
81 about why it makes sense to do it the way they do it:
83 We could do this much nicer theoretically, but don't (for reasons
84 explained after how to do it theoretically nice :P).
86 In order to promote the most redundancy elimination, you want
87 binary expressions whose operands are the same rank (or
88 preferably, the same value) exposed to the redundancy eliminator,
89 for possible elimination.
91 So the way to do this if we really cared, is to build the new op
92 tree from the leaves to the roots, merging as you go, and putting the
93 new op on the end of the worklist, until you are left with one
94 thing on the worklist.
96 IE if you have to rewrite the following set of operands (listed with
97 rank in parentheses), with opcode PLUS_EXPR:
99 a (1), b (1), c (1), d (2), e (2)
102 We start with our merge worklist empty, and the ops list with all of
103 those on it.
105 You want to first merge all leaves of the same rank, as much as
106 possible.
108 So first build a binary op of
110 mergetmp = a + b, and put "mergetmp" on the merge worklist.
112 Because there is no three operand form of PLUS_EXPR, c is not going to
113 be exposed to redundancy elimination as a rank 1 operand.
115 So you might as well throw it on the merge worklist (you could also
116 consider it to now be a rank two operand, and merge it with d and e,
117 but in this case, you then have evicted e from a binary op. So at
118 least in this situation, you can't win.)
120 Then build a binary op of d + e
121 mergetmp2 = d + e
123 and put mergetmp2 on the merge worklist.
125 so merge worklist = {mergetmp, c, mergetmp2}
127 Continue building binary ops of these operations until you have only
128 one operation left on the worklist.
130 So we have
132 build binary op
133 mergetmp3 = mergetmp + c
135 worklist = {mergetmp2, mergetmp3}
137 mergetmp4 = mergetmp2 + mergetmp3
139 worklist = {mergetmp4}
141 because we have one operation left, we can now just set the original
142 statement equal to the result of that operation.
144 This will at least expose a + b and d + e to redundancy elimination
145 as binary operations.
147 For extra points, you can reuse the old statements to build the
148 mergetmps, since you shouldn't run out.
150 So why don't we do this?
152 Because it's expensive, and rarely will help. Most trees we are
153 reassociating have 3 or less ops. If they have 2 ops, they already
154 will be written into a nice single binary op. If you have 3 ops, a
155 single simple check suffices to tell you whether the first two are of the
156 same rank. If so, you know to order it
158 mergetmp = op1 + op2
159 newstmt = mergetmp + op3
161 instead of
162 mergetmp = op2 + op3
163 newstmt = mergetmp + op1
165 If all three are of the same rank, you can't expose them all in a
166 single binary operator anyway, so the above is *still* the best you
167 can do.
169 Thus, this is what we do. When we have three ops left, we check to see
170 what order to put them in, and call it a day. As a nod to vector sum
171 reduction, we check if any of the ops are really a phi node that is a
172 destructive update for the associating op, and keep the destructive
173 update together for vector sum reduction recognition. */
176 /* Statistics */
177 static struct
179 int linearized;
180 int constants_eliminated;
181 int ops_eliminated;
182 int rewritten;
183 int pows_encountered;
184 int pows_created;
185 } reassociate_stats;
187 /* Operator, rank pair. */
188 typedef struct operand_entry
190 unsigned int rank;
191 int id;
192 tree op;
193 unsigned int count;
194 } *operand_entry_t;
196 static alloc_pool operand_entry_pool;
198 /* This is used to assign a unique ID to each struct operand_entry
199 so that qsort results are identical on different hosts. */
200 static int next_operand_entry_id;
202 /* Starting rank number for a given basic block, so that we can rank
203 operations using unmovable instructions in that BB based on the bb
204 depth. */
205 static long *bb_rank;
207 /* Operand->rank hashtable. */
208 static struct pointer_map_t *operand_rank;
210 /* Forward decls. */
211 static long get_rank (tree);
214 /* Bias amount for loop-carried phis. We want this to be larger than
215 the depth of any reassociation tree we can see, but not larger than
216 the rank difference between two blocks. */
217 #define PHI_LOOP_BIAS (1 << 15)
219 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
220 an innermost loop, and the phi has only a single use which is inside
221 the loop, then the rank is the block rank of the loop latch plus an
222 extra bias for the loop-carried dependence. This causes expressions
223 calculated into an accumulator variable to be independent for each
224 iteration of the loop. If STMT is some other phi, the rank is the
225 block rank of its containing block. */
226 static long
227 phi_rank (gimple stmt)
229 basic_block bb = gimple_bb (stmt);
230 struct loop *father = bb->loop_father;
231 tree res;
232 unsigned i;
233 use_operand_p use;
234 gimple use_stmt;
236 /* We only care about real loops (those with a latch). */
237 if (!father->latch)
238 return bb_rank[bb->index];
240 /* Interesting phis must be in headers of innermost loops. */
241 if (bb != father->header
242 || father->inner)
243 return bb_rank[bb->index];
245 /* Ignore virtual SSA_NAMEs. */
246 res = gimple_phi_result (stmt);
247 if (virtual_operand_p (res))
248 return bb_rank[bb->index];
250 /* The phi definition must have a single use, and that use must be
251 within the loop. Otherwise this isn't an accumulator pattern. */
252 if (!single_imm_use (res, &use, &use_stmt)
253 || gimple_bb (use_stmt)->loop_father != father)
254 return bb_rank[bb->index];
256 /* Look for phi arguments from within the loop. If found, bias this phi. */
257 for (i = 0; i < gimple_phi_num_args (stmt); i++)
259 tree arg = gimple_phi_arg_def (stmt, i);
260 if (TREE_CODE (arg) == SSA_NAME
261 && !SSA_NAME_IS_DEFAULT_DEF (arg))
263 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
264 if (gimple_bb (def_stmt)->loop_father == father)
265 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
269 /* Must be an uninteresting phi. */
270 return bb_rank[bb->index];
273 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
274 loop-carried dependence of an innermost loop, return TRUE; else
275 return FALSE. */
276 static bool
277 loop_carried_phi (tree exp)
279 gimple phi_stmt;
280 long block_rank;
282 if (TREE_CODE (exp) != SSA_NAME
283 || SSA_NAME_IS_DEFAULT_DEF (exp))
284 return false;
286 phi_stmt = SSA_NAME_DEF_STMT (exp);
288 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
289 return false;
291 /* Non-loop-carried phis have block rank. Loop-carried phis have
292 an additional bias added in. If this phi doesn't have block rank,
293 it's biased and should not be propagated. */
294 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
296 if (phi_rank (phi_stmt) != block_rank)
297 return true;
299 return false;
302 /* Return the maximum of RANK and the rank that should be propagated
303 from expression OP. For most operands, this is just the rank of OP.
304 For loop-carried phis, the value is zero to avoid undoing the bias
305 in favor of the phi. */
306 static long
307 propagate_rank (long rank, tree op)
309 long op_rank;
311 if (loop_carried_phi (op))
312 return rank;
314 op_rank = get_rank (op);
316 return MAX (rank, op_rank);
319 /* Look up the operand rank structure for expression E. */
321 static inline long
322 find_operand_rank (tree e)
324 void **slot = pointer_map_contains (operand_rank, e);
325 return slot ? (long) (intptr_t) *slot : -1;
328 /* Insert {E,RANK} into the operand rank hashtable. */
330 static inline void
331 insert_operand_rank (tree e, long rank)
333 void **slot;
334 gcc_assert (rank > 0);
335 slot = pointer_map_insert (operand_rank, e);
336 gcc_assert (!*slot);
337 *slot = (void *) (intptr_t) rank;
340 /* Given an expression E, return the rank of the expression. */
342 static long
343 get_rank (tree e)
345 /* Constants have rank 0. */
346 if (is_gimple_min_invariant (e))
347 return 0;
349 /* SSA_NAME's have the rank of the expression they are the result
351 For globals and uninitialized values, the rank is 0.
352 For function arguments, use the pre-setup rank.
353 For PHI nodes, stores, asm statements, etc, we use the rank of
354 the BB.
355 For simple operations, the rank is the maximum rank of any of
356 its operands, or the bb_rank, whichever is less.
357 I make no claims that this is optimal, however, it gives good
358 results. */
360 /* We make an exception to the normal ranking system to break
361 dependences of accumulator variables in loops. Suppose we
362 have a simple one-block loop containing:
364 x_1 = phi(x_0, x_2)
365 b = a + x_1
366 c = b + d
367 x_2 = c + e
369 As shown, each iteration of the calculation into x is fully
370 dependent upon the iteration before it. We would prefer to
371 see this in the form:
373 x_1 = phi(x_0, x_2)
374 b = a + d
375 c = b + e
376 x_2 = c + x_1
378 If the loop is unrolled, the calculations of b and c from
379 different iterations can be interleaved.
381 To obtain this result during reassociation, we bias the rank
382 of the phi definition x_1 upward, when it is recognized as an
383 accumulator pattern. The artificial rank causes it to be
384 added last, providing the desired independence. */
386 if (TREE_CODE (e) == SSA_NAME)
388 gimple stmt;
389 long rank;
390 int i, n;
391 tree op;
393 if (SSA_NAME_IS_DEFAULT_DEF (e))
394 return find_operand_rank (e);
396 stmt = SSA_NAME_DEF_STMT (e);
397 if (gimple_code (stmt) == GIMPLE_PHI)
398 return phi_rank (stmt);
400 if (!is_gimple_assign (stmt)
401 || gimple_vdef (stmt))
402 return bb_rank[gimple_bb (stmt)->index];
404 /* If we already have a rank for this expression, use that. */
405 rank = find_operand_rank (e);
406 if (rank != -1)
407 return rank;
409 /* Otherwise, find the maximum rank for the operands. As an
410 exception, remove the bias from loop-carried phis when propagating
411 the rank so that dependent operations are not also biased. */
412 rank = 0;
413 if (gimple_assign_single_p (stmt))
415 tree rhs = gimple_assign_rhs1 (stmt);
416 n = TREE_OPERAND_LENGTH (rhs);
417 if (n == 0)
418 rank = propagate_rank (rank, rhs);
419 else
421 for (i = 0; i < n; i++)
423 op = TREE_OPERAND (rhs, i);
425 if (op != NULL_TREE)
426 rank = propagate_rank (rank, op);
430 else
432 n = gimple_num_ops (stmt);
433 for (i = 1; i < n; i++)
435 op = gimple_op (stmt, i);
436 gcc_assert (op);
437 rank = propagate_rank (rank, op);
441 if (dump_file && (dump_flags & TDF_DETAILS))
443 fprintf (dump_file, "Rank for ");
444 print_generic_expr (dump_file, e, 0);
445 fprintf (dump_file, " is %ld\n", (rank + 1));
448 /* Note the rank in the hashtable so we don't recompute it. */
449 insert_operand_rank (e, (rank + 1));
450 return (rank + 1);
453 /* Globals, etc, are rank 0 */
454 return 0;
458 /* We want integer ones to end up last no matter what, since they are
459 the ones we can do the most with. */
460 #define INTEGER_CONST_TYPE 1 << 3
461 #define FLOAT_CONST_TYPE 1 << 2
462 #define OTHER_CONST_TYPE 1 << 1
464 /* Classify an invariant tree into integer, float, or other, so that
465 we can sort them to be near other constants of the same type. */
466 static inline int
467 constant_type (tree t)
469 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
470 return INTEGER_CONST_TYPE;
471 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
472 return FLOAT_CONST_TYPE;
473 else
474 return OTHER_CONST_TYPE;
477 /* qsort comparison function to sort operand entries PA and PB by rank
478 so that the sorted array is ordered by rank in decreasing order. */
479 static int
480 sort_by_operand_rank (const void *pa, const void *pb)
482 const operand_entry_t oea = *(const operand_entry_t *)pa;
483 const operand_entry_t oeb = *(const operand_entry_t *)pb;
485 /* It's nicer for optimize_expression if constants that are likely
486 to fold when added/multiplied//whatever are put next to each
487 other. Since all constants have rank 0, order them by type. */
488 if (oeb->rank == 0 && oea->rank == 0)
490 if (constant_type (oeb->op) != constant_type (oea->op))
491 return constant_type (oeb->op) - constant_type (oea->op);
492 else
493 /* To make sorting result stable, we use unique IDs to determine
494 order. */
495 return oeb->id - oea->id;
498 /* Lastly, make sure the versions that are the same go next to each
499 other. We use SSA_NAME_VERSION because it's stable. */
500 if ((oeb->rank - oea->rank == 0)
501 && TREE_CODE (oea->op) == SSA_NAME
502 && TREE_CODE (oeb->op) == SSA_NAME)
504 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
505 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
506 else
507 return oeb->id - oea->id;
510 if (oeb->rank != oea->rank)
511 return oeb->rank - oea->rank;
512 else
513 return oeb->id - oea->id;
516 /* Add an operand entry to *OPS for the tree operand OP. */
518 static void
519 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
521 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
523 oe->op = op;
524 oe->rank = get_rank (op);
525 oe->id = next_operand_entry_id++;
526 oe->count = 1;
527 ops->safe_push (oe);
530 /* Add an operand entry to *OPS for the tree operand OP with repeat
531 count REPEAT. */
533 static void
534 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
535 HOST_WIDE_INT repeat)
537 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
539 oe->op = op;
540 oe->rank = get_rank (op);
541 oe->id = next_operand_entry_id++;
542 oe->count = repeat;
543 ops->safe_push (oe);
545 reassociate_stats.pows_encountered++;
548 /* Return true if STMT is reassociable operation containing a binary
549 operation with tree code CODE, and is inside LOOP. */
551 static bool
552 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
554 basic_block bb = gimple_bb (stmt);
556 if (gimple_bb (stmt) == NULL)
557 return false;
559 if (!flow_bb_inside_loop_p (loop, bb))
560 return false;
562 if (is_gimple_assign (stmt)
563 && gimple_assign_rhs_code (stmt) == code
564 && has_single_use (gimple_assign_lhs (stmt)))
565 return true;
567 return false;
571 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
572 operand of the negate operation. Otherwise, return NULL. */
574 static tree
575 get_unary_op (tree name, enum tree_code opcode)
577 gimple stmt = SSA_NAME_DEF_STMT (name);
579 if (!is_gimple_assign (stmt))
580 return NULL_TREE;
582 if (gimple_assign_rhs_code (stmt) == opcode)
583 return gimple_assign_rhs1 (stmt);
584 return NULL_TREE;
587 /* If CURR and LAST are a pair of ops that OPCODE allows us to
588 eliminate through equivalences, do so, remove them from OPS, and
589 return true. Otherwise, return false. */
591 static bool
592 eliminate_duplicate_pair (enum tree_code opcode,
593 vec<operand_entry_t> *ops,
594 bool *all_done,
595 unsigned int i,
596 operand_entry_t curr,
597 operand_entry_t last)
600 /* If we have two of the same op, and the opcode is & |, min, or max,
601 we can eliminate one of them.
602 If we have two of the same op, and the opcode is ^, we can
603 eliminate both of them. */
605 if (last && last->op == curr->op)
607 switch (opcode)
609 case MAX_EXPR:
610 case MIN_EXPR:
611 case BIT_IOR_EXPR:
612 case BIT_AND_EXPR:
613 if (dump_file && (dump_flags & TDF_DETAILS))
615 fprintf (dump_file, "Equivalence: ");
616 print_generic_expr (dump_file, curr->op, 0);
617 fprintf (dump_file, " [&|minmax] ");
618 print_generic_expr (dump_file, last->op, 0);
619 fprintf (dump_file, " -> ");
620 print_generic_stmt (dump_file, last->op, 0);
623 ops->ordered_remove (i);
624 reassociate_stats.ops_eliminated ++;
626 return true;
628 case BIT_XOR_EXPR:
629 if (dump_file && (dump_flags & TDF_DETAILS))
631 fprintf (dump_file, "Equivalence: ");
632 print_generic_expr (dump_file, curr->op, 0);
633 fprintf (dump_file, " ^ ");
634 print_generic_expr (dump_file, last->op, 0);
635 fprintf (dump_file, " -> nothing\n");
638 reassociate_stats.ops_eliminated += 2;
640 if (ops->length () == 2)
642 ops->create (0);
643 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
644 *all_done = true;
646 else
648 ops->ordered_remove (i-1);
649 ops->ordered_remove (i-1);
652 return true;
654 default:
655 break;
658 return false;
661 static vec<tree> plus_negates;
663 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
664 expression, look in OPS for a corresponding positive operation to cancel
665 it out. If we find one, remove the other from OPS, replace
666 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
667 return false. */
669 static bool
670 eliminate_plus_minus_pair (enum tree_code opcode,
671 vec<operand_entry_t> *ops,
672 unsigned int currindex,
673 operand_entry_t curr)
675 tree negateop;
676 tree notop;
677 unsigned int i;
678 operand_entry_t oe;
680 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
681 return false;
683 negateop = get_unary_op (curr->op, NEGATE_EXPR);
684 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
685 if (negateop == NULL_TREE && notop == NULL_TREE)
686 return false;
688 /* Any non-negated version will have a rank that is one less than
689 the current rank. So once we hit those ranks, if we don't find
690 one, we can stop. */
692 for (i = currindex + 1;
693 ops->iterate (i, &oe)
694 && oe->rank >= curr->rank - 1 ;
695 i++)
697 if (oe->op == negateop)
700 if (dump_file && (dump_flags & TDF_DETAILS))
702 fprintf (dump_file, "Equivalence: ");
703 print_generic_expr (dump_file, negateop, 0);
704 fprintf (dump_file, " + -");
705 print_generic_expr (dump_file, oe->op, 0);
706 fprintf (dump_file, " -> 0\n");
709 ops->ordered_remove (i);
710 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
711 ops->ordered_remove (currindex);
712 reassociate_stats.ops_eliminated ++;
714 return true;
716 else if (oe->op == notop)
718 tree op_type = TREE_TYPE (oe->op);
720 if (dump_file && (dump_flags & TDF_DETAILS))
722 fprintf (dump_file, "Equivalence: ");
723 print_generic_expr (dump_file, notop, 0);
724 fprintf (dump_file, " + ~");
725 print_generic_expr (dump_file, oe->op, 0);
726 fprintf (dump_file, " -> -1\n");
729 ops->ordered_remove (i);
730 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
731 ops->ordered_remove (currindex);
732 reassociate_stats.ops_eliminated ++;
734 return true;
738 /* CURR->OP is a negate expr in a plus expr: save it for later
739 inspection in repropagate_negates(). */
740 if (negateop != NULL_TREE)
741 plus_negates.safe_push (curr->op);
743 return false;
746 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
747 bitwise not expression, look in OPS for a corresponding operand to
748 cancel it out. If we find one, remove the other from OPS, replace
749 OPS[CURRINDEX] with 0, and return true. Otherwise, return
750 false. */
752 static bool
753 eliminate_not_pairs (enum tree_code opcode,
754 vec<operand_entry_t> *ops,
755 unsigned int currindex,
756 operand_entry_t curr)
758 tree notop;
759 unsigned int i;
760 operand_entry_t oe;
762 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
763 || TREE_CODE (curr->op) != SSA_NAME)
764 return false;
766 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
767 if (notop == NULL_TREE)
768 return false;
770 /* Any non-not version will have a rank that is one less than
771 the current rank. So once we hit those ranks, if we don't find
772 one, we can stop. */
774 for (i = currindex + 1;
775 ops->iterate (i, &oe)
776 && oe->rank >= curr->rank - 1;
777 i++)
779 if (oe->op == notop)
781 if (dump_file && (dump_flags & TDF_DETAILS))
783 fprintf (dump_file, "Equivalence: ");
784 print_generic_expr (dump_file, notop, 0);
785 if (opcode == BIT_AND_EXPR)
786 fprintf (dump_file, " & ~");
787 else if (opcode == BIT_IOR_EXPR)
788 fprintf (dump_file, " | ~");
789 print_generic_expr (dump_file, oe->op, 0);
790 if (opcode == BIT_AND_EXPR)
791 fprintf (dump_file, " -> 0\n");
792 else if (opcode == BIT_IOR_EXPR)
793 fprintf (dump_file, " -> -1\n");
796 if (opcode == BIT_AND_EXPR)
797 oe->op = build_zero_cst (TREE_TYPE (oe->op));
798 else if (opcode == BIT_IOR_EXPR)
799 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
800 TYPE_PRECISION (TREE_TYPE (oe->op)));
802 reassociate_stats.ops_eliminated += ops->length () - 1;
803 ops->truncate (0);
804 ops->quick_push (oe);
805 return true;
809 return false;
812 /* Use constant value that may be present in OPS to try to eliminate
813 operands. Note that this function is only really used when we've
814 eliminated ops for other reasons, or merged constants. Across
815 single statements, fold already does all of this, plus more. There
816 is little point in duplicating logic, so I've only included the
817 identities that I could ever construct testcases to trigger. */
819 static void
820 eliminate_using_constants (enum tree_code opcode,
821 vec<operand_entry_t> *ops)
823 operand_entry_t oelast = ops->last ();
824 tree type = TREE_TYPE (oelast->op);
826 if (oelast->rank == 0
827 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
829 switch (opcode)
831 case BIT_AND_EXPR:
832 if (integer_zerop (oelast->op))
834 if (ops->length () != 1)
836 if (dump_file && (dump_flags & TDF_DETAILS))
837 fprintf (dump_file, "Found & 0, removing all other ops\n");
839 reassociate_stats.ops_eliminated += ops->length () - 1;
841 ops->truncate (0);
842 ops->quick_push (oelast);
843 return;
846 else if (integer_all_onesp (oelast->op))
848 if (ops->length () != 1)
850 if (dump_file && (dump_flags & TDF_DETAILS))
851 fprintf (dump_file, "Found & -1, removing\n");
852 ops->pop ();
853 reassociate_stats.ops_eliminated++;
856 break;
857 case BIT_IOR_EXPR:
858 if (integer_all_onesp (oelast->op))
860 if (ops->length () != 1)
862 if (dump_file && (dump_flags & TDF_DETAILS))
863 fprintf (dump_file, "Found | -1, removing all other ops\n");
865 reassociate_stats.ops_eliminated += ops->length () - 1;
867 ops->truncate (0);
868 ops->quick_push (oelast);
869 return;
872 else if (integer_zerop (oelast->op))
874 if (ops->length () != 1)
876 if (dump_file && (dump_flags & TDF_DETAILS))
877 fprintf (dump_file, "Found | 0, removing\n");
878 ops->pop ();
879 reassociate_stats.ops_eliminated++;
882 break;
883 case MULT_EXPR:
884 if (integer_zerop (oelast->op)
885 || (FLOAT_TYPE_P (type)
886 && !HONOR_NANS (TYPE_MODE (type))
887 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
888 && real_zerop (oelast->op)))
890 if (ops->length () != 1)
892 if (dump_file && (dump_flags & TDF_DETAILS))
893 fprintf (dump_file, "Found * 0, removing all other ops\n");
895 reassociate_stats.ops_eliminated += ops->length () - 1;
896 ops->truncate (1);
897 ops->quick_push (oelast);
898 return;
901 else if (integer_onep (oelast->op)
902 || (FLOAT_TYPE_P (type)
903 && !HONOR_SNANS (TYPE_MODE (type))
904 && real_onep (oelast->op)))
906 if (ops->length () != 1)
908 if (dump_file && (dump_flags & TDF_DETAILS))
909 fprintf (dump_file, "Found * 1, removing\n");
910 ops->pop ();
911 reassociate_stats.ops_eliminated++;
912 return;
915 break;
916 case BIT_XOR_EXPR:
917 case PLUS_EXPR:
918 case MINUS_EXPR:
919 if (integer_zerop (oelast->op)
920 || (FLOAT_TYPE_P (type)
921 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
922 && fold_real_zero_addition_p (type, oelast->op,
923 opcode == MINUS_EXPR)))
925 if (ops->length () != 1)
927 if (dump_file && (dump_flags & TDF_DETAILS))
928 fprintf (dump_file, "Found [|^+] 0, removing\n");
929 ops->pop ();
930 reassociate_stats.ops_eliminated++;
931 return;
934 break;
935 default:
936 break;
942 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
943 bool, bool);
945 /* Structure for tracking and counting operands. */
946 typedef struct oecount_s {
947 int cnt;
948 int id;
949 enum tree_code oecode;
950 tree op;
951 } oecount;
954 /* The heap for the oecount hashtable and the sorted list of operands. */
955 static vec<oecount> cvec;
958 /* Oecount hashtable helpers. */
960 struct oecount_hasher : typed_noop_remove <void>
962 /* Note that this hash table stores integers, not pointers.
963 So, observe the casting in the member functions. */
964 typedef void value_type;
965 typedef void compare_type;
966 static inline hashval_t hash (const value_type *);
967 static inline bool equal (const value_type *, const compare_type *);
970 /* Hash function for oecount. */
972 inline hashval_t
973 oecount_hasher::hash (const value_type *p)
975 const oecount *c = &cvec[(size_t)p - 42];
976 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
979 /* Comparison function for oecount. */
981 inline bool
982 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
984 const oecount *c1 = &cvec[(size_t)p1 - 42];
985 const oecount *c2 = &cvec[(size_t)p2 - 42];
986 return (c1->oecode == c2->oecode
987 && c1->op == c2->op);
990 /* Comparison function for qsort sorting oecount elements by count. */
992 static int
993 oecount_cmp (const void *p1, const void *p2)
995 const oecount *c1 = (const oecount *)p1;
996 const oecount *c2 = (const oecount *)p2;
997 if (c1->cnt != c2->cnt)
998 return c1->cnt - c2->cnt;
999 else
1000 /* If counts are identical, use unique IDs to stabilize qsort. */
1001 return c1->id - c2->id;
1004 /* Return TRUE iff STMT represents a builtin call that raises OP
1005 to some exponent. */
1007 static bool
1008 stmt_is_power_of_op (gimple stmt, tree op)
1010 tree fndecl;
1012 if (!is_gimple_call (stmt))
1013 return false;
1015 fndecl = gimple_call_fndecl (stmt);
1017 if (!fndecl
1018 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1019 return false;
1021 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1023 CASE_FLT_FN (BUILT_IN_POW):
1024 CASE_FLT_FN (BUILT_IN_POWI):
1025 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1027 default:
1028 return false;
1032 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1033 in place and return the result. Assumes that stmt_is_power_of_op
1034 was previously called for STMT and returned TRUE. */
1036 static HOST_WIDE_INT
1037 decrement_power (gimple stmt)
1039 REAL_VALUE_TYPE c, cint;
1040 HOST_WIDE_INT power;
1041 tree arg1;
1043 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1045 CASE_FLT_FN (BUILT_IN_POW):
1046 arg1 = gimple_call_arg (stmt, 1);
1047 c = TREE_REAL_CST (arg1);
1048 power = real_to_integer (&c) - 1;
1049 real_from_integer (&cint, VOIDmode, power, 0, 0);
1050 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1051 return power;
1053 CASE_FLT_FN (BUILT_IN_POWI):
1054 arg1 = gimple_call_arg (stmt, 1);
1055 power = TREE_INT_CST_LOW (arg1) - 1;
1056 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1057 return power;
1059 default:
1060 gcc_unreachable ();
1064 /* Find the single immediate use of STMT's LHS, and replace it
1065 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1066 replace *DEF with OP as well. */
1068 static void
1069 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1071 tree lhs;
1072 gimple use_stmt;
1073 use_operand_p use;
1074 gimple_stmt_iterator gsi;
1076 if (is_gimple_call (stmt))
1077 lhs = gimple_call_lhs (stmt);
1078 else
1079 lhs = gimple_assign_lhs (stmt);
1081 gcc_assert (has_single_use (lhs));
1082 single_imm_use (lhs, &use, &use_stmt);
1083 if (lhs == *def)
1084 *def = op;
1085 SET_USE (use, op);
1086 if (TREE_CODE (op) != SSA_NAME)
1087 update_stmt (use_stmt);
1088 gsi = gsi_for_stmt (stmt);
1089 unlink_stmt_vdef (stmt);
1090 gsi_remove (&gsi, true);
1091 release_defs (stmt);
1094 /* Walks the linear chain with result *DEF searching for an operation
1095 with operand OP and code OPCODE removing that from the chain. *DEF
1096 is updated if there is only one operand but no operation left. */
1098 static void
1099 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1101 gimple stmt = SSA_NAME_DEF_STMT (*def);
1105 tree name;
1107 if (opcode == MULT_EXPR
1108 && stmt_is_power_of_op (stmt, op))
1110 if (decrement_power (stmt) == 1)
1111 propagate_op_to_single_use (op, stmt, def);
1112 return;
1115 name = gimple_assign_rhs1 (stmt);
1117 /* If this is the operation we look for and one of the operands
1118 is ours simply propagate the other operand into the stmts
1119 single use. */
1120 if (gimple_assign_rhs_code (stmt) == opcode
1121 && (name == op
1122 || gimple_assign_rhs2 (stmt) == op))
1124 if (name == op)
1125 name = gimple_assign_rhs2 (stmt);
1126 propagate_op_to_single_use (name, stmt, def);
1127 return;
1130 /* We might have a multiply of two __builtin_pow* calls, and
1131 the operand might be hiding in the rightmost one. */
1132 if (opcode == MULT_EXPR
1133 && gimple_assign_rhs_code (stmt) == opcode
1134 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1135 && has_single_use (gimple_assign_rhs2 (stmt)))
1137 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1138 if (stmt_is_power_of_op (stmt2, op))
1140 if (decrement_power (stmt2) == 1)
1141 propagate_op_to_single_use (op, stmt2, def);
1142 return;
1146 /* Continue walking the chain. */
1147 gcc_assert (name != op
1148 && TREE_CODE (name) == SSA_NAME);
1149 stmt = SSA_NAME_DEF_STMT (name);
1151 while (1);
1154 /* Returns true if statement S1 dominates statement S2. Like
1155 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1157 static bool
1158 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1160 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1162 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1163 SSA_NAME. Assume it lives at the beginning of function and
1164 thus dominates everything. */
1165 if (!bb1 || s1 == s2)
1166 return true;
1168 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1169 if (!bb2)
1170 return false;
1172 if (bb1 == bb2)
1174 /* PHIs in the same basic block are assumed to be
1175 executed all in parallel, if only one stmt is a PHI,
1176 it dominates the other stmt in the same basic block. */
1177 if (gimple_code (s1) == GIMPLE_PHI)
1178 return true;
1180 if (gimple_code (s2) == GIMPLE_PHI)
1181 return false;
1183 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1185 if (gimple_uid (s1) < gimple_uid (s2))
1186 return true;
1188 if (gimple_uid (s1) > gimple_uid (s2))
1189 return false;
1191 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1192 unsigned int uid = gimple_uid (s1);
1193 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1195 gimple s = gsi_stmt (gsi);
1196 if (gimple_uid (s) != uid)
1197 break;
1198 if (s == s2)
1199 return true;
1202 return false;
1205 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1208 /* Insert STMT after INSERT_POINT. */
1210 static void
1211 insert_stmt_after (gimple stmt, gimple insert_point)
1213 gimple_stmt_iterator gsi;
1214 basic_block bb;
1216 if (gimple_code (insert_point) == GIMPLE_PHI)
1217 bb = gimple_bb (insert_point);
1218 else if (!stmt_ends_bb_p (insert_point))
1220 gsi = gsi_for_stmt (insert_point);
1221 gimple_set_uid (stmt, gimple_uid (insert_point));
1222 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1223 return;
1225 else
1226 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1227 thus if it must end a basic block, it should be a call that can
1228 throw, or some assignment that can throw. If it throws, the LHS
1229 of it will not be initialized though, so only valid places using
1230 the SSA_NAME should be dominated by the fallthru edge. */
1231 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1232 gsi = gsi_after_labels (bb);
1233 if (gsi_end_p (gsi))
1235 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1236 gimple_set_uid (stmt,
1237 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1239 else
1240 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1241 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1244 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1245 the result. Places the statement after the definition of either
1246 OP1 or OP2. Returns the new statement. */
1248 static gimple
1249 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1251 gimple op1def = NULL, op2def = NULL;
1252 gimple_stmt_iterator gsi;
1253 tree op;
1254 gimple sum;
1256 /* Create the addition statement. */
1257 op = make_ssa_name (type, NULL);
1258 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1260 /* Find an insertion place and insert. */
1261 if (TREE_CODE (op1) == SSA_NAME)
1262 op1def = SSA_NAME_DEF_STMT (op1);
1263 if (TREE_CODE (op2) == SSA_NAME)
1264 op2def = SSA_NAME_DEF_STMT (op2);
1265 if ((!op1def || gimple_nop_p (op1def))
1266 && (!op2def || gimple_nop_p (op2def)))
1268 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1269 if (gsi_end_p (gsi))
1271 gimple_stmt_iterator gsi2
1272 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR));
1273 gimple_set_uid (sum,
1274 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1276 else
1277 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1278 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1280 else
1282 gimple insert_point;
1283 if ((!op1def || gimple_nop_p (op1def))
1284 || (op2def && !gimple_nop_p (op2def)
1285 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1286 insert_point = op2def;
1287 else
1288 insert_point = op1def;
1289 insert_stmt_after (sum, insert_point);
1291 update_stmt (sum);
1293 return sum;
1296 /* Perform un-distribution of divisions and multiplications.
1297 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1298 to (A + B) / X for real X.
1300 The algorithm is organized as follows.
1302 - First we walk the addition chain *OPS looking for summands that
1303 are defined by a multiplication or a real division. This results
1304 in the candidates bitmap with relevant indices into *OPS.
1306 - Second we build the chains of multiplications or divisions for
1307 these candidates, counting the number of occurrences of (operand, code)
1308 pairs in all of the candidates chains.
1310 - Third we sort the (operand, code) pairs by number of occurrence and
1311 process them starting with the pair with the most uses.
1313 * For each such pair we walk the candidates again to build a
1314 second candidate bitmap noting all multiplication/division chains
1315 that have at least one occurrence of (operand, code).
1317 * We build an alternate addition chain only covering these
1318 candidates with one (operand, code) operation removed from their
1319 multiplication/division chain.
1321 * The first candidate gets replaced by the alternate addition chain
1322 multiplied/divided by the operand.
1324 * All candidate chains get disabled for further processing and
1325 processing of (operand, code) pairs continues.
1327 The alternate addition chains built are re-processed by the main
1328 reassociation algorithm which allows optimizing a * x * y + b * y * x
1329 to (a + b ) * x * y in one invocation of the reassociation pass. */
1331 static bool
1332 undistribute_ops_list (enum tree_code opcode,
1333 vec<operand_entry_t> *ops, struct loop *loop)
1335 unsigned int length = ops->length ();
1336 operand_entry_t oe1;
1337 unsigned i, j;
1338 sbitmap candidates, candidates2;
1339 unsigned nr_candidates, nr_candidates2;
1340 sbitmap_iterator sbi0;
1341 vec<operand_entry_t> *subops;
1342 hash_table <oecount_hasher> ctable;
1343 bool changed = false;
1344 int next_oecount_id = 0;
1346 if (length <= 1
1347 || opcode != PLUS_EXPR)
1348 return false;
1350 /* Build a list of candidates to process. */
1351 candidates = sbitmap_alloc (length);
1352 bitmap_clear (candidates);
1353 nr_candidates = 0;
1354 FOR_EACH_VEC_ELT (*ops, i, oe1)
1356 enum tree_code dcode;
1357 gimple oe1def;
1359 if (TREE_CODE (oe1->op) != SSA_NAME)
1360 continue;
1361 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1362 if (!is_gimple_assign (oe1def))
1363 continue;
1364 dcode = gimple_assign_rhs_code (oe1def);
1365 if ((dcode != MULT_EXPR
1366 && dcode != RDIV_EXPR)
1367 || !is_reassociable_op (oe1def, dcode, loop))
1368 continue;
1370 bitmap_set_bit (candidates, i);
1371 nr_candidates++;
1374 if (nr_candidates < 2)
1376 sbitmap_free (candidates);
1377 return false;
1380 if (dump_file && (dump_flags & TDF_DETAILS))
1382 fprintf (dump_file, "searching for un-distribute opportunities ");
1383 print_generic_expr (dump_file,
1384 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1385 fprintf (dump_file, " %d\n", nr_candidates);
1388 /* Build linearized sub-operand lists and the counting table. */
1389 cvec.create (0);
1390 ctable.create (15);
1391 /* ??? Macro arguments cannot have multi-argument template types in
1392 them. This typedef is needed to workaround that limitation. */
1393 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1394 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1395 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1397 gimple oedef;
1398 enum tree_code oecode;
1399 unsigned j;
1401 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1402 oecode = gimple_assign_rhs_code (oedef);
1403 linearize_expr_tree (&subops[i], oedef,
1404 associative_tree_code (oecode), false);
1406 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1408 oecount c;
1409 void **slot;
1410 size_t idx;
1411 c.oecode = oecode;
1412 c.cnt = 1;
1413 c.id = next_oecount_id++;
1414 c.op = oe1->op;
1415 cvec.safe_push (c);
1416 idx = cvec.length () + 41;
1417 slot = ctable.find_slot ((void *)idx, INSERT);
1418 if (!*slot)
1420 *slot = (void *)idx;
1422 else
1424 cvec.pop ();
1425 cvec[(size_t)*slot - 42].cnt++;
1429 ctable.dispose ();
1431 /* Sort the counting table. */
1432 cvec.qsort (oecount_cmp);
1434 if (dump_file && (dump_flags & TDF_DETAILS))
1436 oecount *c;
1437 fprintf (dump_file, "Candidates:\n");
1438 FOR_EACH_VEC_ELT (cvec, j, c)
1440 fprintf (dump_file, " %u %s: ", c->cnt,
1441 c->oecode == MULT_EXPR
1442 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1443 print_generic_expr (dump_file, c->op, 0);
1444 fprintf (dump_file, "\n");
1448 /* Process the (operand, code) pairs in order of most occurrence. */
1449 candidates2 = sbitmap_alloc (length);
1450 while (!cvec.is_empty ())
1452 oecount *c = &cvec.last ();
1453 if (c->cnt < 2)
1454 break;
1456 /* Now collect the operands in the outer chain that contain
1457 the common operand in their inner chain. */
1458 bitmap_clear (candidates2);
1459 nr_candidates2 = 0;
1460 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1462 gimple oedef;
1463 enum tree_code oecode;
1464 unsigned j;
1465 tree op = (*ops)[i]->op;
1467 /* If we undistributed in this chain already this may be
1468 a constant. */
1469 if (TREE_CODE (op) != SSA_NAME)
1470 continue;
1472 oedef = SSA_NAME_DEF_STMT (op);
1473 oecode = gimple_assign_rhs_code (oedef);
1474 if (oecode != c->oecode)
1475 continue;
1477 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1479 if (oe1->op == c->op)
1481 bitmap_set_bit (candidates2, i);
1482 ++nr_candidates2;
1483 break;
1488 if (nr_candidates2 >= 2)
1490 operand_entry_t oe1, oe2;
1491 gimple prod;
1492 int first = bitmap_first_set_bit (candidates2);
1494 /* Build the new addition chain. */
1495 oe1 = (*ops)[first];
1496 if (dump_file && (dump_flags & TDF_DETAILS))
1498 fprintf (dump_file, "Building (");
1499 print_generic_expr (dump_file, oe1->op, 0);
1501 zero_one_operation (&oe1->op, c->oecode, c->op);
1502 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1504 gimple sum;
1505 oe2 = (*ops)[i];
1506 if (dump_file && (dump_flags & TDF_DETAILS))
1508 fprintf (dump_file, " + ");
1509 print_generic_expr (dump_file, oe2->op, 0);
1511 zero_one_operation (&oe2->op, c->oecode, c->op);
1512 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1513 oe1->op, oe2->op, opcode);
1514 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1515 oe2->rank = 0;
1516 oe1->op = gimple_get_lhs (sum);
1519 /* Apply the multiplication/division. */
1520 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1521 oe1->op, c->op, c->oecode);
1522 if (dump_file && (dump_flags & TDF_DETAILS))
1524 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1525 print_generic_expr (dump_file, c->op, 0);
1526 fprintf (dump_file, "\n");
1529 /* Record it in the addition chain and disable further
1530 undistribution with this op. */
1531 oe1->op = gimple_assign_lhs (prod);
1532 oe1->rank = get_rank (oe1->op);
1533 subops[first].release ();
1535 changed = true;
1538 cvec.pop ();
1541 for (i = 0; i < ops->length (); ++i)
1542 subops[i].release ();
1543 free (subops);
1544 cvec.release ();
1545 sbitmap_free (candidates);
1546 sbitmap_free (candidates2);
1548 return changed;
1551 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1552 expression, examine the other OPS to see if any of them are comparisons
1553 of the same values, which we may be able to combine or eliminate.
1554 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1556 static bool
1557 eliminate_redundant_comparison (enum tree_code opcode,
1558 vec<operand_entry_t> *ops,
1559 unsigned int currindex,
1560 operand_entry_t curr)
1562 tree op1, op2;
1563 enum tree_code lcode, rcode;
1564 gimple def1, def2;
1565 int i;
1566 operand_entry_t oe;
1568 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1569 return false;
1571 /* Check that CURR is a comparison. */
1572 if (TREE_CODE (curr->op) != SSA_NAME)
1573 return false;
1574 def1 = SSA_NAME_DEF_STMT (curr->op);
1575 if (!is_gimple_assign (def1))
1576 return false;
1577 lcode = gimple_assign_rhs_code (def1);
1578 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1579 return false;
1580 op1 = gimple_assign_rhs1 (def1);
1581 op2 = gimple_assign_rhs2 (def1);
1583 /* Now look for a similar comparison in the remaining OPS. */
1584 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1586 tree t;
1588 if (TREE_CODE (oe->op) != SSA_NAME)
1589 continue;
1590 def2 = SSA_NAME_DEF_STMT (oe->op);
1591 if (!is_gimple_assign (def2))
1592 continue;
1593 rcode = gimple_assign_rhs_code (def2);
1594 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1595 continue;
1597 /* If we got here, we have a match. See if we can combine the
1598 two comparisons. */
1599 if (opcode == BIT_IOR_EXPR)
1600 t = maybe_fold_or_comparisons (lcode, op1, op2,
1601 rcode, gimple_assign_rhs1 (def2),
1602 gimple_assign_rhs2 (def2));
1603 else
1604 t = maybe_fold_and_comparisons (lcode, op1, op2,
1605 rcode, gimple_assign_rhs1 (def2),
1606 gimple_assign_rhs2 (def2));
1607 if (!t)
1608 continue;
1610 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1611 always give us a boolean_type_node value back. If the original
1612 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1613 we need to convert. */
1614 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1615 t = fold_convert (TREE_TYPE (curr->op), t);
1617 if (TREE_CODE (t) != INTEGER_CST
1618 && !operand_equal_p (t, curr->op, 0))
1620 enum tree_code subcode;
1621 tree newop1, newop2;
1622 if (!COMPARISON_CLASS_P (t))
1623 continue;
1624 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1625 STRIP_USELESS_TYPE_CONVERSION (newop1);
1626 STRIP_USELESS_TYPE_CONVERSION (newop2);
1627 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1628 continue;
1631 if (dump_file && (dump_flags & TDF_DETAILS))
1633 fprintf (dump_file, "Equivalence: ");
1634 print_generic_expr (dump_file, curr->op, 0);
1635 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1636 print_generic_expr (dump_file, oe->op, 0);
1637 fprintf (dump_file, " -> ");
1638 print_generic_expr (dump_file, t, 0);
1639 fprintf (dump_file, "\n");
1642 /* Now we can delete oe, as it has been subsumed by the new combined
1643 expression t. */
1644 ops->ordered_remove (i);
1645 reassociate_stats.ops_eliminated ++;
1647 /* If t is the same as curr->op, we're done. Otherwise we must
1648 replace curr->op with t. Special case is if we got a constant
1649 back, in which case we add it to the end instead of in place of
1650 the current entry. */
1651 if (TREE_CODE (t) == INTEGER_CST)
1653 ops->ordered_remove (currindex);
1654 add_to_ops_vec (ops, t);
1656 else if (!operand_equal_p (t, curr->op, 0))
1658 gimple sum;
1659 enum tree_code subcode;
1660 tree newop1;
1661 tree newop2;
1662 gcc_assert (COMPARISON_CLASS_P (t));
1663 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1664 STRIP_USELESS_TYPE_CONVERSION (newop1);
1665 STRIP_USELESS_TYPE_CONVERSION (newop2);
1666 gcc_checking_assert (is_gimple_val (newop1)
1667 && is_gimple_val (newop2));
1668 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1669 curr->op = gimple_get_lhs (sum);
1671 return true;
1674 return false;
1677 /* Perform various identities and other optimizations on the list of
1678 operand entries, stored in OPS. The tree code for the binary
1679 operation between all the operands is OPCODE. */
1681 static void
1682 optimize_ops_list (enum tree_code opcode,
1683 vec<operand_entry_t> *ops)
1685 unsigned int length = ops->length ();
1686 unsigned int i;
1687 operand_entry_t oe;
1688 operand_entry_t oelast = NULL;
1689 bool iterate = false;
1691 if (length == 1)
1692 return;
1694 oelast = ops->last ();
1696 /* If the last two are constants, pop the constants off, merge them
1697 and try the next two. */
1698 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1700 operand_entry_t oelm1 = (*ops)[length - 2];
1702 if (oelm1->rank == 0
1703 && is_gimple_min_invariant (oelm1->op)
1704 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1705 TREE_TYPE (oelast->op)))
1707 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1708 oelm1->op, oelast->op);
1710 if (folded && is_gimple_min_invariant (folded))
1712 if (dump_file && (dump_flags & TDF_DETAILS))
1713 fprintf (dump_file, "Merging constants\n");
1715 ops->pop ();
1716 ops->pop ();
1718 add_to_ops_vec (ops, folded);
1719 reassociate_stats.constants_eliminated++;
1721 optimize_ops_list (opcode, ops);
1722 return;
1727 eliminate_using_constants (opcode, ops);
1728 oelast = NULL;
1730 for (i = 0; ops->iterate (i, &oe);)
1732 bool done = false;
1734 if (eliminate_not_pairs (opcode, ops, i, oe))
1735 return;
1736 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1737 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1738 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1740 if (done)
1741 return;
1742 iterate = true;
1743 oelast = NULL;
1744 continue;
1746 oelast = oe;
1747 i++;
1750 length = ops->length ();
1751 oelast = ops->last ();
1753 if (iterate)
1754 optimize_ops_list (opcode, ops);
1757 /* The following functions are subroutines to optimize_range_tests and allow
1758 it to try to change a logical combination of comparisons into a range
1759 test.
1761 For example, both
1762 X == 2 || X == 5 || X == 3 || X == 4
1764 X >= 2 && X <= 5
1765 are converted to
1766 (unsigned) (X - 2) <= 3
1768 For more information see comments above fold_test_range in fold-const.c,
1769 this implementation is for GIMPLE. */
1771 struct range_entry
1773 tree exp;
1774 tree low;
1775 tree high;
1776 bool in_p;
1777 bool strict_overflow_p;
1778 unsigned int idx, next;
1781 /* This is similar to make_range in fold-const.c, but on top of
1782 GIMPLE instead of trees. If EXP is non-NULL, it should be
1783 an SSA_NAME and STMT argument is ignored, otherwise STMT
1784 argument should be a GIMPLE_COND. */
1786 static void
1787 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1789 int in_p;
1790 tree low, high;
1791 bool is_bool, strict_overflow_p;
1793 r->exp = NULL_TREE;
1794 r->in_p = false;
1795 r->strict_overflow_p = false;
1796 r->low = NULL_TREE;
1797 r->high = NULL_TREE;
1798 if (exp != NULL_TREE
1799 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1800 return;
1802 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1803 and see if we can refine the range. Some of the cases below may not
1804 happen, but it doesn't seem worth worrying about this. We "continue"
1805 the outer loop when we've changed something; otherwise we "break"
1806 the switch, which will "break" the while. */
1807 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1808 high = low;
1809 in_p = 0;
1810 strict_overflow_p = false;
1811 is_bool = false;
1812 if (exp == NULL_TREE)
1813 is_bool = true;
1814 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1816 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1817 is_bool = true;
1818 else
1819 return;
1821 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1822 is_bool = true;
1824 while (1)
1826 enum tree_code code;
1827 tree arg0, arg1, exp_type;
1828 tree nexp;
1829 location_t loc;
1831 if (exp != NULL_TREE)
1833 if (TREE_CODE (exp) != SSA_NAME)
1834 break;
1836 stmt = SSA_NAME_DEF_STMT (exp);
1837 if (!is_gimple_assign (stmt))
1838 break;
1840 code = gimple_assign_rhs_code (stmt);
1841 arg0 = gimple_assign_rhs1 (stmt);
1842 arg1 = gimple_assign_rhs2 (stmt);
1843 exp_type = TREE_TYPE (exp);
1845 else
1847 code = gimple_cond_code (stmt);
1848 arg0 = gimple_cond_lhs (stmt);
1849 arg1 = gimple_cond_rhs (stmt);
1850 exp_type = boolean_type_node;
1853 if (TREE_CODE (arg0) != SSA_NAME)
1854 break;
1855 loc = gimple_location (stmt);
1856 switch (code)
1858 case BIT_NOT_EXPR:
1859 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1860 /* Ensure the range is either +[-,0], +[0,0],
1861 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1862 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1863 or similar expression of unconditional true or
1864 false, it should not be negated. */
1865 && ((high && integer_zerop (high))
1866 || (low && integer_onep (low))))
1868 in_p = !in_p;
1869 exp = arg0;
1870 continue;
1872 break;
1873 case SSA_NAME:
1874 exp = arg0;
1875 continue;
1876 CASE_CONVERT:
1877 if (is_bool)
1878 goto do_default;
1879 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1881 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1882 is_bool = true;
1883 else
1884 return;
1886 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1887 is_bool = true;
1888 goto do_default;
1889 case EQ_EXPR:
1890 case NE_EXPR:
1891 case LT_EXPR:
1892 case LE_EXPR:
1893 case GE_EXPR:
1894 case GT_EXPR:
1895 is_bool = true;
1896 /* FALLTHRU */
1897 default:
1898 if (!is_bool)
1899 return;
1900 do_default:
1901 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1902 &low, &high, &in_p,
1903 &strict_overflow_p);
1904 if (nexp != NULL_TREE)
1906 exp = nexp;
1907 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1908 continue;
1910 break;
1912 break;
1914 if (is_bool)
1916 r->exp = exp;
1917 r->in_p = in_p;
1918 r->low = low;
1919 r->high = high;
1920 r->strict_overflow_p = strict_overflow_p;
1924 /* Comparison function for qsort. Sort entries
1925 without SSA_NAME exp first, then with SSA_NAMEs sorted
1926 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1927 by increasing ->low and if ->low is the same, by increasing
1928 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1929 maximum. */
1931 static int
1932 range_entry_cmp (const void *a, const void *b)
1934 const struct range_entry *p = (const struct range_entry *) a;
1935 const struct range_entry *q = (const struct range_entry *) b;
1937 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1939 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1941 /* Group range_entries for the same SSA_NAME together. */
1942 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1943 return -1;
1944 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1945 return 1;
1946 /* If ->low is different, NULL low goes first, then by
1947 ascending low. */
1948 if (p->low != NULL_TREE)
1950 if (q->low != NULL_TREE)
1952 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1953 p->low, q->low);
1954 if (tem && integer_onep (tem))
1955 return -1;
1956 tem = fold_binary (GT_EXPR, boolean_type_node,
1957 p->low, q->low);
1958 if (tem && integer_onep (tem))
1959 return 1;
1961 else
1962 return 1;
1964 else if (q->low != NULL_TREE)
1965 return -1;
1966 /* If ->high is different, NULL high goes last, before that by
1967 ascending high. */
1968 if (p->high != NULL_TREE)
1970 if (q->high != NULL_TREE)
1972 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1973 p->high, q->high);
1974 if (tem && integer_onep (tem))
1975 return -1;
1976 tem = fold_binary (GT_EXPR, boolean_type_node,
1977 p->high, q->high);
1978 if (tem && integer_onep (tem))
1979 return 1;
1981 else
1982 return -1;
1984 else if (p->high != NULL_TREE)
1985 return 1;
1986 /* If both ranges are the same, sort below by ascending idx. */
1988 else
1989 return 1;
1991 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1992 return -1;
1994 if (p->idx < q->idx)
1995 return -1;
1996 else
1998 gcc_checking_assert (p->idx > q->idx);
1999 return 1;
2003 /* Helper routine of optimize_range_test.
2004 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2005 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2006 OPCODE and OPS are arguments of optimize_range_tests. Return
2007 true if the range merge has been successful.
2008 If OPCODE is ERROR_MARK, this is called from within
2009 maybe_optimize_range_tests and is performing inter-bb range optimization.
2010 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2011 oe->rank. */
2013 static bool
2014 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2015 unsigned int count, enum tree_code opcode,
2016 vec<operand_entry_t> *ops, tree exp, bool in_p,
2017 tree low, tree high, bool strict_overflow_p)
2019 operand_entry_t oe = (*ops)[range->idx];
2020 tree op = oe->op;
2021 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
2022 location_t loc = gimple_location (stmt);
2023 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2024 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2025 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2026 gimple_stmt_iterator gsi;
2028 if (tem == NULL_TREE)
2029 return false;
2031 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2032 warning_at (loc, OPT_Wstrict_overflow,
2033 "assuming signed overflow does not occur "
2034 "when simplifying range test");
2036 if (dump_file && (dump_flags & TDF_DETAILS))
2038 struct range_entry *r;
2039 fprintf (dump_file, "Optimizing range tests ");
2040 print_generic_expr (dump_file, range->exp, 0);
2041 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2042 print_generic_expr (dump_file, range->low, 0);
2043 fprintf (dump_file, ", ");
2044 print_generic_expr (dump_file, range->high, 0);
2045 fprintf (dump_file, "]");
2046 for (r = otherrange; r < otherrange + count; r++)
2048 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2049 print_generic_expr (dump_file, r->low, 0);
2050 fprintf (dump_file, ", ");
2051 print_generic_expr (dump_file, r->high, 0);
2052 fprintf (dump_file, "]");
2054 fprintf (dump_file, "\n into ");
2055 print_generic_expr (dump_file, tem, 0);
2056 fprintf (dump_file, "\n");
2059 if (opcode == BIT_IOR_EXPR
2060 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2061 tem = invert_truthvalue_loc (loc, tem);
2063 tem = fold_convert_loc (loc, optype, tem);
2064 gsi = gsi_for_stmt (stmt);
2065 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2066 GSI_SAME_STMT);
2067 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
2068 if (gimple_uid (gsi_stmt (gsi)))
2069 break;
2070 else
2071 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2073 oe->op = tem;
2074 range->exp = exp;
2075 range->low = low;
2076 range->high = high;
2077 range->in_p = in_p;
2078 range->strict_overflow_p = false;
2080 for (range = otherrange; range < otherrange + count; range++)
2082 oe = (*ops)[range->idx];
2083 /* Now change all the other range test immediate uses, so that
2084 those tests will be optimized away. */
2085 if (opcode == ERROR_MARK)
2087 if (oe->op)
2088 oe->op = build_int_cst (TREE_TYPE (oe->op),
2089 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2090 else
2091 oe->op = (oe->rank == BIT_IOR_EXPR
2092 ? boolean_false_node : boolean_true_node);
2094 else
2095 oe->op = error_mark_node;
2096 range->exp = NULL_TREE;
2098 return true;
2101 /* Optimize X == CST1 || X == CST2
2102 if popcount (CST1 ^ CST2) == 1 into
2103 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2104 Similarly for ranges. E.g.
2105 X != 2 && X != 3 && X != 10 && X != 11
2106 will be transformed by the previous optimization into
2107 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2108 and this loop can transform that into
2109 !(((X & ~8) - 2U) <= 1U). */
2111 static bool
2112 optimize_range_tests_xor (enum tree_code opcode, tree type,
2113 tree lowi, tree lowj, tree highi, tree highj,
2114 vec<operand_entry_t> *ops,
2115 struct range_entry *rangei,
2116 struct range_entry *rangej)
2118 tree lowxor, highxor, tem, exp;
2119 /* Check highi ^ lowi == highj ^ lowj and
2120 popcount (highi ^ lowi) == 1. */
2121 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2122 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2123 return false;
2124 if (tree_log2 (lowxor) < 0)
2125 return false;
2126 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2127 if (!tree_int_cst_equal (lowxor, highxor))
2128 return false;
2130 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2131 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2132 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2133 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2134 if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2135 rangei->in_p, lowj, highj,
2136 rangei->strict_overflow_p
2137 || rangej->strict_overflow_p))
2138 return true;
2139 return false;
2142 /* Optimize X == CST1 || X == CST2
2143 if popcount (CST2 - CST1) == 1 into
2144 ((X - CST1) & ~(CST2 - CST1)) == 0.
2145 Similarly for ranges. E.g.
2146 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2147 || X == 75 || X == 45
2148 will be transformed by the previous optimization into
2149 (X - 43U) <= 3U || (X - 75U) <= 3U
2150 and this loop can transform that into
2151 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2152 static bool
2153 optimize_range_tests_diff (enum tree_code opcode, tree type,
2154 tree lowi, tree lowj, tree highi, tree highj,
2155 vec<operand_entry_t> *ops,
2156 struct range_entry *rangei,
2157 struct range_entry *rangej)
2159 tree tem1, tem2, mask;
2160 /* Check highi - lowi == highj - lowj. */
2161 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2162 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2163 return false;
2164 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2165 if (!tree_int_cst_equal (tem1, tem2))
2166 return false;
2167 /* Check popcount (lowj - lowi) == 1. */
2168 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2169 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2170 return false;
2171 if (tree_log2 (tem1) < 0)
2172 return false;
2174 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2175 tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2176 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2177 lowj = build_int_cst (type, 0);
2178 if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2179 rangei->in_p, lowj, tem2,
2180 rangei->strict_overflow_p
2181 || rangej->strict_overflow_p))
2182 return true;
2183 return false;
2186 /* It does some common checks for function optimize_range_tests_xor and
2187 optimize_range_tests_diff.
2188 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2189 Else it calls optimize_range_tests_diff. */
2191 static bool
2192 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2193 bool optimize_xor, vec<operand_entry_t> *ops,
2194 struct range_entry *ranges)
2196 int i, j;
2197 bool any_changes = false;
2198 for (i = first; i < length; i++)
2200 tree lowi, highi, lowj, highj, type, tem;
2202 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2203 continue;
2204 type = TREE_TYPE (ranges[i].exp);
2205 if (!INTEGRAL_TYPE_P (type))
2206 continue;
2207 lowi = ranges[i].low;
2208 if (lowi == NULL_TREE)
2209 lowi = TYPE_MIN_VALUE (type);
2210 highi = ranges[i].high;
2211 if (highi == NULL_TREE)
2212 continue;
2213 for (j = i + 1; j < length && j < i + 64; j++)
2215 bool changes;
2216 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2217 continue;
2218 lowj = ranges[j].low;
2219 if (lowj == NULL_TREE)
2220 continue;
2221 highj = ranges[j].high;
2222 if (highj == NULL_TREE)
2223 highj = TYPE_MAX_VALUE (type);
2224 /* Check lowj > highi. */
2225 tem = fold_binary (GT_EXPR, boolean_type_node,
2226 lowj, highi);
2227 if (tem == NULL_TREE || !integer_onep (tem))
2228 continue;
2229 if (optimize_xor)
2230 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2231 highi, highj, ops,
2232 ranges + i, ranges + j);
2233 else
2234 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2235 highi, highj, ops,
2236 ranges + i, ranges + j);
2237 if (changes)
2239 any_changes = true;
2240 break;
2244 return any_changes;
2247 /* Optimize range tests, similarly how fold_range_test optimizes
2248 it on trees. The tree code for the binary
2249 operation between all the operands is OPCODE.
2250 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2251 maybe_optimize_range_tests for inter-bb range optimization.
2252 In that case if oe->op is NULL, oe->id is bb->index whose
2253 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2254 the actual opcode. */
2256 static bool
2257 optimize_range_tests (enum tree_code opcode,
2258 vec<operand_entry_t> *ops)
2260 unsigned int length = ops->length (), i, j, first;
2261 operand_entry_t oe;
2262 struct range_entry *ranges;
2263 bool any_changes = false;
2265 if (length == 1)
2266 return false;
2268 ranges = XNEWVEC (struct range_entry, length);
2269 for (i = 0; i < length; i++)
2271 oe = (*ops)[i];
2272 ranges[i].idx = i;
2273 init_range_entry (ranges + i, oe->op,
2274 oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2275 /* For | invert it now, we will invert it again before emitting
2276 the optimized expression. */
2277 if (opcode == BIT_IOR_EXPR
2278 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2279 ranges[i].in_p = !ranges[i].in_p;
2282 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2283 for (i = 0; i < length; i++)
2284 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2285 break;
2287 /* Try to merge ranges. */
2288 for (first = i; i < length; i++)
2290 tree low = ranges[i].low;
2291 tree high = ranges[i].high;
2292 int in_p = ranges[i].in_p;
2293 bool strict_overflow_p = ranges[i].strict_overflow_p;
2294 int update_fail_count = 0;
2296 for (j = i + 1; j < length; j++)
2298 if (ranges[i].exp != ranges[j].exp)
2299 break;
2300 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2301 ranges[j].in_p, ranges[j].low, ranges[j].high))
2302 break;
2303 strict_overflow_p |= ranges[j].strict_overflow_p;
2306 if (j == i + 1)
2307 continue;
2309 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2310 ops, ranges[i].exp, in_p, low, high,
2311 strict_overflow_p))
2313 i = j - 1;
2314 any_changes = true;
2316 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2317 while update_range_test would fail. */
2318 else if (update_fail_count == 64)
2319 i = j - 1;
2320 else
2321 ++update_fail_count;
2324 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2325 ops, ranges);
2327 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2328 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2329 ops, ranges);
2331 if (any_changes && opcode != ERROR_MARK)
2333 j = 0;
2334 FOR_EACH_VEC_ELT (*ops, i, oe)
2336 if (oe->op == error_mark_node)
2337 continue;
2338 else if (i != j)
2339 (*ops)[j] = oe;
2340 j++;
2342 ops->truncate (j);
2345 XDELETEVEC (ranges);
2346 return any_changes;
2349 /* Return true if STMT is a cast like:
2350 <bb N>:
2352 _123 = (int) _234;
2354 <bb M>:
2355 # _345 = PHI <_123(N), 1(...), 1(...)>
2356 where _234 has bool type, _123 has single use and
2357 bb N has a single successor M. This is commonly used in
2358 the last block of a range test. */
2360 static bool
2361 final_range_test_p (gimple stmt)
2363 basic_block bb, rhs_bb;
2364 edge e;
2365 tree lhs, rhs;
2366 use_operand_p use_p;
2367 gimple use_stmt;
2369 if (!gimple_assign_cast_p (stmt))
2370 return false;
2371 bb = gimple_bb (stmt);
2372 if (!single_succ_p (bb))
2373 return false;
2374 e = single_succ_edge (bb);
2375 if (e->flags & EDGE_COMPLEX)
2376 return false;
2378 lhs = gimple_assign_lhs (stmt);
2379 rhs = gimple_assign_rhs1 (stmt);
2380 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2381 || TREE_CODE (rhs) != SSA_NAME
2382 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2383 return false;
2385 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2386 if (!single_imm_use (lhs, &use_p, &use_stmt))
2387 return false;
2389 if (gimple_code (use_stmt) != GIMPLE_PHI
2390 || gimple_bb (use_stmt) != e->dest)
2391 return false;
2393 /* And that the rhs is defined in the same loop. */
2394 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2395 if (rhs_bb == NULL
2396 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2397 return false;
2399 return true;
2402 /* Return true if BB is suitable basic block for inter-bb range test
2403 optimization. If BACKWARD is true, BB should be the only predecessor
2404 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2405 or compared with to find a common basic block to which all conditions
2406 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2407 be the only predecessor of BB. */
2409 static bool
2410 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2411 bool backward)
2413 edge_iterator ei, ei2;
2414 edge e, e2;
2415 gimple stmt;
2416 gimple_stmt_iterator gsi;
2417 bool other_edge_seen = false;
2418 bool is_cond;
2420 if (test_bb == bb)
2421 return false;
2422 /* Check last stmt first. */
2423 stmt = last_stmt (bb);
2424 if (stmt == NULL
2425 || (gimple_code (stmt) != GIMPLE_COND
2426 && (backward || !final_range_test_p (stmt)))
2427 || gimple_visited_p (stmt)
2428 || stmt_could_throw_p (stmt)
2429 || *other_bb == bb)
2430 return false;
2431 is_cond = gimple_code (stmt) == GIMPLE_COND;
2432 if (is_cond)
2434 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2435 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2436 to *OTHER_BB (if not set yet, try to find it out). */
2437 if (EDGE_COUNT (bb->succs) != 2)
2438 return false;
2439 FOR_EACH_EDGE (e, ei, bb->succs)
2441 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2442 return false;
2443 if (e->dest == test_bb)
2445 if (backward)
2446 continue;
2447 else
2448 return false;
2450 if (e->dest == bb)
2451 return false;
2452 if (*other_bb == NULL)
2454 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2455 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2456 return false;
2457 else if (e->dest == e2->dest)
2458 *other_bb = e->dest;
2459 if (*other_bb == NULL)
2460 return false;
2462 if (e->dest == *other_bb)
2463 other_edge_seen = true;
2464 else if (backward)
2465 return false;
2467 if (*other_bb == NULL || !other_edge_seen)
2468 return false;
2470 else if (single_succ (bb) != *other_bb)
2471 return false;
2473 /* Now check all PHIs of *OTHER_BB. */
2474 e = find_edge (bb, *other_bb);
2475 e2 = find_edge (test_bb, *other_bb);
2476 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2478 gimple phi = gsi_stmt (gsi);
2479 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2480 corresponding to BB and TEST_BB predecessor must be the same. */
2481 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2482 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2484 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2485 one of the PHIs should have the lhs of the last stmt in
2486 that block as PHI arg and that PHI should have 0 or 1
2487 corresponding to it in all other range test basic blocks
2488 considered. */
2489 if (!is_cond)
2491 if (gimple_phi_arg_def (phi, e->dest_idx)
2492 == gimple_assign_lhs (stmt)
2493 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2494 || integer_onep (gimple_phi_arg_def (phi,
2495 e2->dest_idx))))
2496 continue;
2498 else
2500 gimple test_last = last_stmt (test_bb);
2501 if (gimple_code (test_last) != GIMPLE_COND
2502 && gimple_phi_arg_def (phi, e2->dest_idx)
2503 == gimple_assign_lhs (test_last)
2504 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2505 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2506 continue;
2509 return false;
2512 return true;
2515 /* Return true if BB doesn't have side-effects that would disallow
2516 range test optimization, all SSA_NAMEs set in the bb are consumed
2517 in the bb and there are no PHIs. */
2519 static bool
2520 no_side_effect_bb (basic_block bb)
2522 gimple_stmt_iterator gsi;
2523 gimple last;
2525 if (!gimple_seq_empty_p (phi_nodes (bb)))
2526 return false;
2527 last = last_stmt (bb);
2528 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2530 gimple stmt = gsi_stmt (gsi);
2531 tree lhs;
2532 imm_use_iterator imm_iter;
2533 use_operand_p use_p;
2535 if (is_gimple_debug (stmt))
2536 continue;
2537 if (gimple_has_side_effects (stmt))
2538 return false;
2539 if (stmt == last)
2540 return true;
2541 if (!is_gimple_assign (stmt))
2542 return false;
2543 lhs = gimple_assign_lhs (stmt);
2544 if (TREE_CODE (lhs) != SSA_NAME)
2545 return false;
2546 if (gimple_assign_rhs_could_trap_p (stmt))
2547 return false;
2548 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2550 gimple use_stmt = USE_STMT (use_p);
2551 if (is_gimple_debug (use_stmt))
2552 continue;
2553 if (gimple_bb (use_stmt) != bb)
2554 return false;
2557 return false;
2560 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2561 return true and fill in *OPS recursively. */
2563 static bool
2564 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2565 struct loop *loop)
2567 gimple stmt = SSA_NAME_DEF_STMT (var);
2568 tree rhs[2];
2569 int i;
2571 if (!is_reassociable_op (stmt, code, loop))
2572 return false;
2574 rhs[0] = gimple_assign_rhs1 (stmt);
2575 rhs[1] = gimple_assign_rhs2 (stmt);
2576 gimple_set_visited (stmt, true);
2577 for (i = 0; i < 2; i++)
2578 if (TREE_CODE (rhs[i]) == SSA_NAME
2579 && !get_ops (rhs[i], code, ops, loop)
2580 && has_single_use (rhs[i]))
2582 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2584 oe->op = rhs[i];
2585 oe->rank = code;
2586 oe->id = 0;
2587 oe->count = 1;
2588 ops->safe_push (oe);
2590 return true;
2593 /* Find the ops that were added by get_ops starting from VAR, see if
2594 they were changed during update_range_test and if yes, create new
2595 stmts. */
2597 static tree
2598 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2599 unsigned int *pidx, struct loop *loop)
2601 gimple stmt = SSA_NAME_DEF_STMT (var);
2602 tree rhs[4];
2603 int i;
2605 if (!is_reassociable_op (stmt, code, loop))
2606 return NULL;
2608 rhs[0] = gimple_assign_rhs1 (stmt);
2609 rhs[1] = gimple_assign_rhs2 (stmt);
2610 rhs[2] = rhs[0];
2611 rhs[3] = rhs[1];
2612 for (i = 0; i < 2; i++)
2613 if (TREE_CODE (rhs[i]) == SSA_NAME)
2615 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2616 if (rhs[2 + i] == NULL_TREE)
2618 if (has_single_use (rhs[i]))
2619 rhs[2 + i] = ops[(*pidx)++]->op;
2620 else
2621 rhs[2 + i] = rhs[i];
2624 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2625 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2627 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2628 var = make_ssa_name (TREE_TYPE (var), NULL);
2629 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2630 var, rhs[2], rhs[3]);
2631 gimple_set_uid (g, gimple_uid (stmt));
2632 gimple_set_visited (g, true);
2633 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2635 return var;
2638 /* Structure to track the initial value passed to get_ops and
2639 the range in the ops vector for each basic block. */
2641 struct inter_bb_range_test_entry
2643 tree op;
2644 unsigned int first_idx, last_idx;
2647 /* Inter-bb range test optimization. */
2649 static void
2650 maybe_optimize_range_tests (gimple stmt)
2652 basic_block first_bb = gimple_bb (stmt);
2653 basic_block last_bb = first_bb;
2654 basic_block other_bb = NULL;
2655 basic_block bb;
2656 edge_iterator ei;
2657 edge e;
2658 vec<operand_entry_t> ops = vNULL;
2659 vec<inter_bb_range_test_entry> bbinfo = vNULL;
2660 bool any_changes = false;
2662 /* Consider only basic blocks that end with GIMPLE_COND or
2663 a cast statement satisfying final_range_test_p. All
2664 but the last bb in the first_bb .. last_bb range
2665 should end with GIMPLE_COND. */
2666 if (gimple_code (stmt) == GIMPLE_COND)
2668 if (EDGE_COUNT (first_bb->succs) != 2)
2669 return;
2671 else if (final_range_test_p (stmt))
2672 other_bb = single_succ (first_bb);
2673 else
2674 return;
2676 if (stmt_could_throw_p (stmt))
2677 return;
2679 /* As relative ordering of post-dominator sons isn't fixed,
2680 maybe_optimize_range_tests can be called first on any
2681 bb in the range we want to optimize. So, start searching
2682 backwards, if first_bb can be set to a predecessor. */
2683 while (single_pred_p (first_bb))
2685 basic_block pred_bb = single_pred (first_bb);
2686 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2687 break;
2688 if (!no_side_effect_bb (first_bb))
2689 break;
2690 first_bb = pred_bb;
2692 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2693 Before starting forward search in last_bb successors, find
2694 out the other_bb. */
2695 if (first_bb == last_bb)
2697 other_bb = NULL;
2698 /* As non-GIMPLE_COND last stmt always terminates the range,
2699 if forward search didn't discover anything, just give up. */
2700 if (gimple_code (stmt) != GIMPLE_COND)
2701 return;
2702 /* Look at both successors. Either it ends with a GIMPLE_COND
2703 and satisfies suitable_cond_bb, or ends with a cast and
2704 other_bb is that cast's successor. */
2705 FOR_EACH_EDGE (e, ei, first_bb->succs)
2706 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2707 || e->dest == first_bb)
2708 return;
2709 else if (single_pred_p (e->dest))
2711 stmt = last_stmt (e->dest);
2712 if (stmt
2713 && gimple_code (stmt) == GIMPLE_COND
2714 && EDGE_COUNT (e->dest->succs) == 2)
2716 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2717 break;
2718 else
2719 other_bb = NULL;
2721 else if (stmt
2722 && final_range_test_p (stmt)
2723 && find_edge (first_bb, single_succ (e->dest)))
2725 other_bb = single_succ (e->dest);
2726 if (other_bb == first_bb)
2727 other_bb = NULL;
2730 if (other_bb == NULL)
2731 return;
2733 /* Now do the forward search, moving last_bb to successor bbs
2734 that aren't other_bb. */
2735 while (EDGE_COUNT (last_bb->succs) == 2)
2737 FOR_EACH_EDGE (e, ei, last_bb->succs)
2738 if (e->dest != other_bb)
2739 break;
2740 if (e == NULL)
2741 break;
2742 if (!single_pred_p (e->dest))
2743 break;
2744 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2745 break;
2746 if (!no_side_effect_bb (e->dest))
2747 break;
2748 last_bb = e->dest;
2750 if (first_bb == last_bb)
2751 return;
2752 /* Here basic blocks first_bb through last_bb's predecessor
2753 end with GIMPLE_COND, all of them have one of the edges to
2754 other_bb and another to another block in the range,
2755 all blocks except first_bb don't have side-effects and
2756 last_bb ends with either GIMPLE_COND, or cast satisfying
2757 final_range_test_p. */
2758 for (bb = last_bb; ; bb = single_pred (bb))
2760 enum tree_code code;
2761 tree lhs, rhs;
2762 inter_bb_range_test_entry bb_ent;
2764 bb_ent.op = NULL_TREE;
2765 bb_ent.first_idx = ops.length ();
2766 bb_ent.last_idx = bb_ent.first_idx;
2767 e = find_edge (bb, other_bb);
2768 stmt = last_stmt (bb);
2769 gimple_set_visited (stmt, true);
2770 if (gimple_code (stmt) != GIMPLE_COND)
2772 use_operand_p use_p;
2773 gimple phi;
2774 edge e2;
2775 unsigned int d;
2777 lhs = gimple_assign_lhs (stmt);
2778 rhs = gimple_assign_rhs1 (stmt);
2779 gcc_assert (bb == last_bb);
2781 /* stmt is
2782 _123 = (int) _234;
2784 followed by:
2785 <bb M>:
2786 # _345 = PHI <_123(N), 1(...), 1(...)>
2788 or 0 instead of 1. If it is 0, the _234
2789 range test is anded together with all the
2790 other range tests, if it is 1, it is ored with
2791 them. */
2792 single_imm_use (lhs, &use_p, &phi);
2793 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2794 e2 = find_edge (first_bb, other_bb);
2795 d = e2->dest_idx;
2796 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2797 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2798 code = BIT_AND_EXPR;
2799 else
2801 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2802 code = BIT_IOR_EXPR;
2805 /* If _234 SSA_NAME_DEF_STMT is
2806 _234 = _567 | _789;
2807 (or &, corresponding to 1/0 in the phi arguments,
2808 push into ops the individual range test arguments
2809 of the bitwise or resp. and, recursively. */
2810 if (!get_ops (rhs, code, &ops,
2811 loop_containing_stmt (stmt))
2812 && has_single_use (rhs))
2814 /* Otherwise, push the _234 range test itself. */
2815 operand_entry_t oe
2816 = (operand_entry_t) pool_alloc (operand_entry_pool);
2818 oe->op = rhs;
2819 oe->rank = code;
2820 oe->id = 0;
2821 oe->count = 1;
2822 ops.safe_push (oe);
2823 bb_ent.last_idx++;
2825 else
2826 bb_ent.last_idx = ops.length ();
2827 bb_ent.op = rhs;
2828 bbinfo.safe_push (bb_ent);
2829 continue;
2831 /* Otherwise stmt is GIMPLE_COND. */
2832 code = gimple_cond_code (stmt);
2833 lhs = gimple_cond_lhs (stmt);
2834 rhs = gimple_cond_rhs (stmt);
2835 if (TREE_CODE (lhs) == SSA_NAME
2836 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2837 && ((code != EQ_EXPR && code != NE_EXPR)
2838 || rhs != boolean_false_node
2839 /* Either push into ops the individual bitwise
2840 or resp. and operands, depending on which
2841 edge is other_bb. */
2842 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2843 ^ (code == EQ_EXPR))
2844 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2845 loop_containing_stmt (stmt))))
2847 /* Or push the GIMPLE_COND stmt itself. */
2848 operand_entry_t oe
2849 = (operand_entry_t) pool_alloc (operand_entry_pool);
2851 oe->op = NULL;
2852 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2853 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2854 /* oe->op = NULL signs that there is no SSA_NAME
2855 for the range test, and oe->id instead is the
2856 basic block number, at which's end the GIMPLE_COND
2857 is. */
2858 oe->id = bb->index;
2859 oe->count = 1;
2860 ops.safe_push (oe);
2861 bb_ent.op = NULL;
2862 bb_ent.last_idx++;
2864 else if (ops.length () > bb_ent.first_idx)
2866 bb_ent.op = lhs;
2867 bb_ent.last_idx = ops.length ();
2869 bbinfo.safe_push (bb_ent);
2870 if (bb == first_bb)
2871 break;
2873 if (ops.length () > 1)
2874 any_changes = optimize_range_tests (ERROR_MARK, &ops);
2875 if (any_changes)
2877 unsigned int idx;
2878 /* update_ops relies on has_single_use predicates returning the
2879 same values as it did during get_ops earlier. Additionally it
2880 never removes statements, only adds new ones and it should walk
2881 from the single imm use and check the predicate already before
2882 making those changes.
2883 On the other side, the handling of GIMPLE_COND directly can turn
2884 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2885 it needs to be done in a separate loop afterwards. */
2886 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2888 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2889 && bbinfo[idx].op != NULL_TREE)
2891 tree new_op;
2893 stmt = last_stmt (bb);
2894 new_op = update_ops (bbinfo[idx].op,
2895 (enum tree_code)
2896 ops[bbinfo[idx].first_idx]->rank,
2897 ops, &bbinfo[idx].first_idx,
2898 loop_containing_stmt (stmt));
2899 if (new_op == NULL_TREE)
2901 gcc_assert (bb == last_bb);
2902 new_op = ops[bbinfo[idx].first_idx++]->op;
2904 if (bbinfo[idx].op != new_op)
2906 imm_use_iterator iter;
2907 use_operand_p use_p;
2908 gimple use_stmt, cast_stmt = NULL;
2910 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2911 if (is_gimple_debug (use_stmt))
2912 continue;
2913 else if (gimple_code (use_stmt) == GIMPLE_COND
2914 || gimple_code (use_stmt) == GIMPLE_PHI)
2915 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2916 SET_USE (use_p, new_op);
2917 else if (gimple_assign_cast_p (use_stmt))
2918 cast_stmt = use_stmt;
2919 else
2920 gcc_unreachable ();
2921 if (cast_stmt)
2923 gcc_assert (bb == last_bb);
2924 tree lhs = gimple_assign_lhs (cast_stmt);
2925 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2926 enum tree_code rhs_code
2927 = gimple_assign_rhs_code (cast_stmt);
2928 gimple g
2929 = gimple_build_assign_with_ops (rhs_code, new_lhs,
2930 new_op, NULL_TREE);
2931 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2932 gimple_set_uid (g, gimple_uid (cast_stmt));
2933 gimple_set_visited (g, true);
2934 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2935 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2936 if (is_gimple_debug (use_stmt))
2937 continue;
2938 else if (gimple_code (use_stmt) == GIMPLE_COND
2939 || gimple_code (use_stmt) == GIMPLE_PHI)
2940 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2941 SET_USE (use_p, new_lhs);
2942 else
2943 gcc_unreachable ();
2947 if (bb == first_bb)
2948 break;
2950 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2952 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2953 && bbinfo[idx].op == NULL_TREE
2954 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
2956 stmt = last_stmt (bb);
2957 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
2958 gimple_cond_make_false (stmt);
2959 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
2960 gimple_cond_make_true (stmt);
2961 else
2963 gimple_cond_set_code (stmt, NE_EXPR);
2964 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
2965 gimple_cond_set_rhs (stmt, boolean_false_node);
2967 update_stmt (stmt);
2969 if (bb == first_bb)
2970 break;
2973 bbinfo.release ();
2974 ops.release ();
2977 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2978 of STMT in it's operands. This is also known as a "destructive
2979 update" operation. */
2981 static bool
2982 is_phi_for_stmt (gimple stmt, tree operand)
2984 gimple def_stmt;
2985 tree lhs;
2986 use_operand_p arg_p;
2987 ssa_op_iter i;
2989 if (TREE_CODE (operand) != SSA_NAME)
2990 return false;
2992 lhs = gimple_assign_lhs (stmt);
2994 def_stmt = SSA_NAME_DEF_STMT (operand);
2995 if (gimple_code (def_stmt) != GIMPLE_PHI)
2996 return false;
2998 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2999 if (lhs == USE_FROM_PTR (arg_p))
3000 return true;
3001 return false;
3004 /* Remove def stmt of VAR if VAR has zero uses and recurse
3005 on rhs1 operand if so. */
3007 static void
3008 remove_visited_stmt_chain (tree var)
3010 gimple stmt;
3011 gimple_stmt_iterator gsi;
3013 while (1)
3015 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3016 return;
3017 stmt = SSA_NAME_DEF_STMT (var);
3018 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3020 var = gimple_assign_rhs1 (stmt);
3021 gsi = gsi_for_stmt (stmt);
3022 gsi_remove (&gsi, true);
3023 release_defs (stmt);
3025 else
3026 return;
3030 /* This function checks three consequtive operands in
3031 passed operands vector OPS starting from OPINDEX and
3032 swaps two operands if it is profitable for binary operation
3033 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3035 We pair ops with the same rank if possible.
3037 The alternative we try is to see if STMT is a destructive
3038 update style statement, which is like:
3039 b = phi (a, ...)
3040 a = c + b;
3041 In that case, we want to use the destructive update form to
3042 expose the possible vectorizer sum reduction opportunity.
3043 In that case, the third operand will be the phi node. This
3044 check is not performed if STMT is null.
3046 We could, of course, try to be better as noted above, and do a
3047 lot of work to try to find these opportunities in >3 operand
3048 cases, but it is unlikely to be worth it. */
3050 static void
3051 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3052 unsigned int opindex, gimple stmt)
3054 operand_entry_t oe1, oe2, oe3;
3056 oe1 = ops[opindex];
3057 oe2 = ops[opindex + 1];
3058 oe3 = ops[opindex + 2];
3060 if ((oe1->rank == oe2->rank
3061 && oe2->rank != oe3->rank)
3062 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3063 && !is_phi_for_stmt (stmt, oe1->op)
3064 && !is_phi_for_stmt (stmt, oe2->op)))
3066 struct operand_entry temp = *oe3;
3067 oe3->op = oe1->op;
3068 oe3->rank = oe1->rank;
3069 oe1->op = temp.op;
3070 oe1->rank= temp.rank;
3072 else if ((oe1->rank == oe3->rank
3073 && oe2->rank != oe3->rank)
3074 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3075 && !is_phi_for_stmt (stmt, oe1->op)
3076 && !is_phi_for_stmt (stmt, oe3->op)))
3078 struct operand_entry temp = *oe2;
3079 oe2->op = oe1->op;
3080 oe2->rank = oe1->rank;
3081 oe1->op = temp.op;
3082 oe1->rank = temp.rank;
3086 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3087 two definitions, otherwise return STMT. */
3089 static inline gimple
3090 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3092 if (TREE_CODE (rhs1) == SSA_NAME
3093 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3094 stmt = SSA_NAME_DEF_STMT (rhs1);
3095 if (TREE_CODE (rhs2) == SSA_NAME
3096 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3097 stmt = SSA_NAME_DEF_STMT (rhs2);
3098 return stmt;
3101 /* Recursively rewrite our linearized statements so that the operators
3102 match those in OPS[OPINDEX], putting the computation in rank
3103 order. Return new lhs. */
3105 static tree
3106 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3107 vec<operand_entry_t> ops, bool changed)
3109 tree rhs1 = gimple_assign_rhs1 (stmt);
3110 tree rhs2 = gimple_assign_rhs2 (stmt);
3111 tree lhs = gimple_assign_lhs (stmt);
3112 operand_entry_t oe;
3114 /* The final recursion case for this function is that you have
3115 exactly two operations left.
3116 If we had one exactly one op in the entire list to start with, we
3117 would have never called this function, and the tail recursion
3118 rewrites them one at a time. */
3119 if (opindex + 2 == ops.length ())
3121 operand_entry_t oe1, oe2;
3123 oe1 = ops[opindex];
3124 oe2 = ops[opindex + 1];
3126 if (rhs1 != oe1->op || rhs2 != oe2->op)
3128 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3129 unsigned int uid = gimple_uid (stmt);
3131 if (dump_file && (dump_flags & TDF_DETAILS))
3133 fprintf (dump_file, "Transforming ");
3134 print_gimple_stmt (dump_file, stmt, 0, 0);
3137 if (changed)
3139 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3140 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3141 stmt
3142 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3143 lhs, oe1->op, oe2->op);
3144 gimple_set_uid (stmt, uid);
3145 gimple_set_visited (stmt, true);
3146 if (insert_point == gsi_stmt (gsi))
3147 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3148 else
3149 insert_stmt_after (stmt, insert_point);
3151 else
3153 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3154 == stmt);
3155 gimple_assign_set_rhs1 (stmt, oe1->op);
3156 gimple_assign_set_rhs2 (stmt, oe2->op);
3157 update_stmt (stmt);
3160 if (rhs1 != oe1->op && rhs1 != oe2->op)
3161 remove_visited_stmt_chain (rhs1);
3163 if (dump_file && (dump_flags & TDF_DETAILS))
3165 fprintf (dump_file, " into ");
3166 print_gimple_stmt (dump_file, stmt, 0, 0);
3169 return lhs;
3172 /* If we hit here, we should have 3 or more ops left. */
3173 gcc_assert (opindex + 2 < ops.length ());
3175 /* Rewrite the next operator. */
3176 oe = ops[opindex];
3178 /* Recurse on the LHS of the binary operator, which is guaranteed to
3179 be the non-leaf side. */
3180 tree new_rhs1
3181 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3182 changed || oe->op != rhs2);
3184 if (oe->op != rhs2 || new_rhs1 != rhs1)
3186 if (dump_file && (dump_flags & TDF_DETAILS))
3188 fprintf (dump_file, "Transforming ");
3189 print_gimple_stmt (dump_file, stmt, 0, 0);
3192 /* If changed is false, this is either opindex == 0
3193 or all outer rhs2's were equal to corresponding oe->op,
3194 and powi_result is NULL.
3195 That means lhs is equivalent before and after reassociation.
3196 Otherwise ensure the old lhs SSA_NAME is not reused and
3197 create a new stmt as well, so that any debug stmts will be
3198 properly adjusted. */
3199 if (changed)
3201 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3202 unsigned int uid = gimple_uid (stmt);
3203 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3205 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3206 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3207 lhs, new_rhs1, oe->op);
3208 gimple_set_uid (stmt, uid);
3209 gimple_set_visited (stmt, true);
3210 if (insert_point == gsi_stmt (gsi))
3211 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3212 else
3213 insert_stmt_after (stmt, insert_point);
3215 else
3217 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3218 == stmt);
3219 gimple_assign_set_rhs1 (stmt, new_rhs1);
3220 gimple_assign_set_rhs2 (stmt, oe->op);
3221 update_stmt (stmt);
3224 if (dump_file && (dump_flags & TDF_DETAILS))
3226 fprintf (dump_file, " into ");
3227 print_gimple_stmt (dump_file, stmt, 0, 0);
3230 return lhs;
3233 /* Find out how many cycles we need to compute statements chain.
3234 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3235 maximum number of independent statements we may execute per cycle. */
3237 static int
3238 get_required_cycles (int ops_num, int cpu_width)
3240 int res;
3241 int elog;
3242 unsigned int rest;
3244 /* While we have more than 2 * cpu_width operands
3245 we may reduce number of operands by cpu_width
3246 per cycle. */
3247 res = ops_num / (2 * cpu_width);
3249 /* Remained operands count may be reduced twice per cycle
3250 until we have only one operand. */
3251 rest = (unsigned)(ops_num - res * cpu_width);
3252 elog = exact_log2 (rest);
3253 if (elog >= 0)
3254 res += elog;
3255 else
3256 res += floor_log2 (rest) + 1;
3258 return res;
3261 /* Returns an optimal number of registers to use for computation of
3262 given statements. */
3264 static int
3265 get_reassociation_width (int ops_num, enum tree_code opc,
3266 enum machine_mode mode)
3268 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3269 int width;
3270 int width_min;
3271 int cycles_best;
3273 if (param_width > 0)
3274 width = param_width;
3275 else
3276 width = targetm.sched.reassociation_width (opc, mode);
3278 if (width == 1)
3279 return width;
3281 /* Get the minimal time required for sequence computation. */
3282 cycles_best = get_required_cycles (ops_num, width);
3284 /* Check if we may use less width and still compute sequence for
3285 the same time. It will allow us to reduce registers usage.
3286 get_required_cycles is monotonically increasing with lower width
3287 so we can perform a binary search for the minimal width that still
3288 results in the optimal cycle count. */
3289 width_min = 1;
3290 while (width > width_min)
3292 int width_mid = (width + width_min) / 2;
3294 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3295 width = width_mid;
3296 else if (width_min < width_mid)
3297 width_min = width_mid;
3298 else
3299 break;
3302 return width;
3305 /* Recursively rewrite our linearized statements so that the operators
3306 match those in OPS[OPINDEX], putting the computation in rank
3307 order and trying to allow operations to be executed in
3308 parallel. */
3310 static void
3311 rewrite_expr_tree_parallel (gimple stmt, int width,
3312 vec<operand_entry_t> ops)
3314 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3315 int op_num = ops.length ();
3316 int stmt_num = op_num - 1;
3317 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3318 int op_index = op_num - 1;
3319 int stmt_index = 0;
3320 int ready_stmts_end = 0;
3321 int i = 0;
3322 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3324 /* We start expression rewriting from the top statements.
3325 So, in this loop we create a full list of statements
3326 we will work with. */
3327 stmts[stmt_num - 1] = stmt;
3328 for (i = stmt_num - 2; i >= 0; i--)
3329 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3331 for (i = 0; i < stmt_num; i++)
3333 tree op1, op2;
3335 /* Determine whether we should use results of
3336 already handled statements or not. */
3337 if (ready_stmts_end == 0
3338 && (i - stmt_index >= width || op_index < 1))
3339 ready_stmts_end = i;
3341 /* Now we choose operands for the next statement. Non zero
3342 value in ready_stmts_end means here that we should use
3343 the result of already generated statements as new operand. */
3344 if (ready_stmts_end > 0)
3346 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3347 if (ready_stmts_end > stmt_index)
3348 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3349 else if (op_index >= 0)
3350 op2 = ops[op_index--]->op;
3351 else
3353 gcc_assert (stmt_index < i);
3354 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3357 if (stmt_index >= ready_stmts_end)
3358 ready_stmts_end = 0;
3360 else
3362 if (op_index > 1)
3363 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3364 op2 = ops[op_index--]->op;
3365 op1 = ops[op_index--]->op;
3368 /* If we emit the last statement then we should put
3369 operands into the last statement. It will also
3370 break the loop. */
3371 if (op_index < 0 && stmt_index == i)
3372 i = stmt_num - 1;
3374 if (dump_file && (dump_flags & TDF_DETAILS))
3376 fprintf (dump_file, "Transforming ");
3377 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3380 /* We keep original statement only for the last one. All
3381 others are recreated. */
3382 if (i == stmt_num - 1)
3384 gimple_assign_set_rhs1 (stmts[i], op1);
3385 gimple_assign_set_rhs2 (stmts[i], op2);
3386 update_stmt (stmts[i]);
3388 else
3389 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3391 if (dump_file && (dump_flags & TDF_DETAILS))
3393 fprintf (dump_file, " into ");
3394 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3398 remove_visited_stmt_chain (last_rhs1);
3401 /* Transform STMT, which is really (A +B) + (C + D) into the left
3402 linear form, ((A+B)+C)+D.
3403 Recurse on D if necessary. */
3405 static void
3406 linearize_expr (gimple stmt)
3408 gimple_stmt_iterator gsi;
3409 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3410 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3411 gimple oldbinrhs = binrhs;
3412 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3413 gimple newbinrhs = NULL;
3414 struct loop *loop = loop_containing_stmt (stmt);
3415 tree lhs = gimple_assign_lhs (stmt);
3417 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3418 && is_reassociable_op (binrhs, rhscode, loop));
3420 gsi = gsi_for_stmt (stmt);
3422 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3423 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3424 make_ssa_name (TREE_TYPE (lhs), NULL),
3425 gimple_assign_lhs (binlhs),
3426 gimple_assign_rhs2 (binrhs));
3427 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3428 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3429 gimple_set_uid (binrhs, gimple_uid (stmt));
3431 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3432 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3434 if (dump_file && (dump_flags & TDF_DETAILS))
3436 fprintf (dump_file, "Linearized: ");
3437 print_gimple_stmt (dump_file, stmt, 0, 0);
3440 reassociate_stats.linearized++;
3441 update_stmt (stmt);
3443 gsi = gsi_for_stmt (oldbinrhs);
3444 gsi_remove (&gsi, true);
3445 release_defs (oldbinrhs);
3447 gimple_set_visited (stmt, true);
3448 gimple_set_visited (binlhs, true);
3449 gimple_set_visited (binrhs, true);
3451 /* Tail recurse on the new rhs if it still needs reassociation. */
3452 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3453 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3454 want to change the algorithm while converting to tuples. */
3455 linearize_expr (stmt);
3458 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3459 it. Otherwise, return NULL. */
3461 static gimple
3462 get_single_immediate_use (tree lhs)
3464 use_operand_p immuse;
3465 gimple immusestmt;
3467 if (TREE_CODE (lhs) == SSA_NAME
3468 && single_imm_use (lhs, &immuse, &immusestmt)
3469 && is_gimple_assign (immusestmt))
3470 return immusestmt;
3472 return NULL;
3475 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3476 representing the negated value. Insertions of any necessary
3477 instructions go before GSI.
3478 This function is recursive in that, if you hand it "a_5" as the
3479 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3480 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3482 static tree
3483 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3485 gimple negatedefstmt = NULL;
3486 tree resultofnegate;
3487 gimple_stmt_iterator gsi;
3488 unsigned int uid;
3490 /* If we are trying to negate a name, defined by an add, negate the
3491 add operands instead. */
3492 if (TREE_CODE (tonegate) == SSA_NAME)
3493 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3494 if (TREE_CODE (tonegate) == SSA_NAME
3495 && is_gimple_assign (negatedefstmt)
3496 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3497 && has_single_use (gimple_assign_lhs (negatedefstmt))
3498 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3500 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3501 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3502 tree lhs = gimple_assign_lhs (negatedefstmt);
3503 gimple g;
3505 gsi = gsi_for_stmt (negatedefstmt);
3506 rhs1 = negate_value (rhs1, &gsi);
3508 gsi = gsi_for_stmt (negatedefstmt);
3509 rhs2 = negate_value (rhs2, &gsi);
3511 gsi = gsi_for_stmt (negatedefstmt);
3512 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3513 gimple_set_visited (negatedefstmt, true);
3514 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3515 gimple_set_uid (g, gimple_uid (negatedefstmt));
3516 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3517 return lhs;
3520 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3521 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3522 NULL_TREE, true, GSI_SAME_STMT);
3523 gsi = *gsip;
3524 uid = gimple_uid (gsi_stmt (gsi));
3525 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3527 gimple stmt = gsi_stmt (gsi);
3528 if (gimple_uid (stmt) != 0)
3529 break;
3530 gimple_set_uid (stmt, uid);
3532 return resultofnegate;
3535 /* Return true if we should break up the subtract in STMT into an add
3536 with negate. This is true when we the subtract operands are really
3537 adds, or the subtract itself is used in an add expression. In
3538 either case, breaking up the subtract into an add with negate
3539 exposes the adds to reassociation. */
3541 static bool
3542 should_break_up_subtract (gimple stmt)
3544 tree lhs = gimple_assign_lhs (stmt);
3545 tree binlhs = gimple_assign_rhs1 (stmt);
3546 tree binrhs = gimple_assign_rhs2 (stmt);
3547 gimple immusestmt;
3548 struct loop *loop = loop_containing_stmt (stmt);
3550 if (TREE_CODE (binlhs) == SSA_NAME
3551 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3552 return true;
3554 if (TREE_CODE (binrhs) == SSA_NAME
3555 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3556 return true;
3558 if (TREE_CODE (lhs) == SSA_NAME
3559 && (immusestmt = get_single_immediate_use (lhs))
3560 && is_gimple_assign (immusestmt)
3561 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3562 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3563 return true;
3564 return false;
3567 /* Transform STMT from A - B into A + -B. */
3569 static void
3570 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3572 tree rhs1 = gimple_assign_rhs1 (stmt);
3573 tree rhs2 = gimple_assign_rhs2 (stmt);
3575 if (dump_file && (dump_flags & TDF_DETAILS))
3577 fprintf (dump_file, "Breaking up subtract ");
3578 print_gimple_stmt (dump_file, stmt, 0, 0);
3581 rhs2 = negate_value (rhs2, gsip);
3582 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3583 update_stmt (stmt);
3586 /* Determine whether STMT is a builtin call that raises an SSA name
3587 to an integer power and has only one use. If so, and this is early
3588 reassociation and unsafe math optimizations are permitted, place
3589 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3590 If any of these conditions does not hold, return FALSE. */
3592 static bool
3593 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3595 tree fndecl, arg1;
3596 REAL_VALUE_TYPE c, cint;
3598 if (!first_pass_instance
3599 || !flag_unsafe_math_optimizations
3600 || !is_gimple_call (stmt)
3601 || !has_single_use (gimple_call_lhs (stmt)))
3602 return false;
3604 fndecl = gimple_call_fndecl (stmt);
3606 if (!fndecl
3607 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3608 return false;
3610 switch (DECL_FUNCTION_CODE (fndecl))
3612 CASE_FLT_FN (BUILT_IN_POW):
3613 *base = gimple_call_arg (stmt, 0);
3614 arg1 = gimple_call_arg (stmt, 1);
3616 if (TREE_CODE (arg1) != REAL_CST)
3617 return false;
3619 c = TREE_REAL_CST (arg1);
3621 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3622 return false;
3624 *exponent = real_to_integer (&c);
3625 real_from_integer (&cint, VOIDmode, *exponent,
3626 *exponent < 0 ? -1 : 0, 0);
3627 if (!real_identical (&c, &cint))
3628 return false;
3630 break;
3632 CASE_FLT_FN (BUILT_IN_POWI):
3633 *base = gimple_call_arg (stmt, 0);
3634 arg1 = gimple_call_arg (stmt, 1);
3636 if (!host_integerp (arg1, 0))
3637 return false;
3639 *exponent = TREE_INT_CST_LOW (arg1);
3640 break;
3642 default:
3643 return false;
3646 /* Expanding negative exponents is generally unproductive, so we don't
3647 complicate matters with those. Exponents of zero and one should
3648 have been handled by expression folding. */
3649 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3650 return false;
3652 return true;
3655 /* Recursively linearize a binary expression that is the RHS of STMT.
3656 Place the operands of the expression tree in the vector named OPS. */
3658 static void
3659 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3660 bool is_associative, bool set_visited)
3662 tree binlhs = gimple_assign_rhs1 (stmt);
3663 tree binrhs = gimple_assign_rhs2 (stmt);
3664 gimple binlhsdef = NULL, binrhsdef = NULL;
3665 bool binlhsisreassoc = false;
3666 bool binrhsisreassoc = false;
3667 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3668 struct loop *loop = loop_containing_stmt (stmt);
3669 tree base = NULL_TREE;
3670 HOST_WIDE_INT exponent = 0;
3672 if (set_visited)
3673 gimple_set_visited (stmt, true);
3675 if (TREE_CODE (binlhs) == SSA_NAME)
3677 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3678 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3679 && !stmt_could_throw_p (binlhsdef));
3682 if (TREE_CODE (binrhs) == SSA_NAME)
3684 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3685 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3686 && !stmt_could_throw_p (binrhsdef));
3689 /* If the LHS is not reassociable, but the RHS is, we need to swap
3690 them. If neither is reassociable, there is nothing we can do, so
3691 just put them in the ops vector. If the LHS is reassociable,
3692 linearize it. If both are reassociable, then linearize the RHS
3693 and the LHS. */
3695 if (!binlhsisreassoc)
3697 tree temp;
3699 /* If this is not a associative operation like division, give up. */
3700 if (!is_associative)
3702 add_to_ops_vec (ops, binrhs);
3703 return;
3706 if (!binrhsisreassoc)
3708 if (rhscode == MULT_EXPR
3709 && TREE_CODE (binrhs) == SSA_NAME
3710 && acceptable_pow_call (binrhsdef, &base, &exponent))
3712 add_repeat_to_ops_vec (ops, base, exponent);
3713 gimple_set_visited (binrhsdef, true);
3715 else
3716 add_to_ops_vec (ops, binrhs);
3718 if (rhscode == MULT_EXPR
3719 && TREE_CODE (binlhs) == SSA_NAME
3720 && acceptable_pow_call (binlhsdef, &base, &exponent))
3722 add_repeat_to_ops_vec (ops, base, exponent);
3723 gimple_set_visited (binlhsdef, true);
3725 else
3726 add_to_ops_vec (ops, binlhs);
3728 return;
3731 if (dump_file && (dump_flags & TDF_DETAILS))
3733 fprintf (dump_file, "swapping operands of ");
3734 print_gimple_stmt (dump_file, stmt, 0, 0);
3737 swap_ssa_operands (stmt,
3738 gimple_assign_rhs1_ptr (stmt),
3739 gimple_assign_rhs2_ptr (stmt));
3740 update_stmt (stmt);
3742 if (dump_file && (dump_flags & TDF_DETAILS))
3744 fprintf (dump_file, " is now ");
3745 print_gimple_stmt (dump_file, stmt, 0, 0);
3748 /* We want to make it so the lhs is always the reassociative op,
3749 so swap. */
3750 temp = binlhs;
3751 binlhs = binrhs;
3752 binrhs = temp;
3754 else if (binrhsisreassoc)
3756 linearize_expr (stmt);
3757 binlhs = gimple_assign_rhs1 (stmt);
3758 binrhs = gimple_assign_rhs2 (stmt);
3761 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3762 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3763 rhscode, loop));
3764 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3765 is_associative, set_visited);
3767 if (rhscode == MULT_EXPR
3768 && TREE_CODE (binrhs) == SSA_NAME
3769 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3771 add_repeat_to_ops_vec (ops, base, exponent);
3772 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3774 else
3775 add_to_ops_vec (ops, binrhs);
3778 /* Repropagate the negates back into subtracts, since no other pass
3779 currently does it. */
3781 static void
3782 repropagate_negates (void)
3784 unsigned int i = 0;
3785 tree negate;
3787 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3789 gimple user = get_single_immediate_use (negate);
3791 if (!user || !is_gimple_assign (user))
3792 continue;
3794 /* The negate operand can be either operand of a PLUS_EXPR
3795 (it can be the LHS if the RHS is a constant for example).
3797 Force the negate operand to the RHS of the PLUS_EXPR, then
3798 transform the PLUS_EXPR into a MINUS_EXPR. */
3799 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3801 /* If the negated operand appears on the LHS of the
3802 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3803 to force the negated operand to the RHS of the PLUS_EXPR. */
3804 if (gimple_assign_rhs1 (user) == negate)
3806 swap_ssa_operands (user,
3807 gimple_assign_rhs1_ptr (user),
3808 gimple_assign_rhs2_ptr (user));
3811 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3812 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3813 if (gimple_assign_rhs2 (user) == negate)
3815 tree rhs1 = gimple_assign_rhs1 (user);
3816 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3817 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3818 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3819 update_stmt (user);
3822 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3824 if (gimple_assign_rhs1 (user) == negate)
3826 /* We have
3827 x = -a
3828 y = x - b
3829 which we transform into
3830 x = a + b
3831 y = -x .
3832 This pushes down the negate which we possibly can merge
3833 into some other operation, hence insert it into the
3834 plus_negates vector. */
3835 gimple feed = SSA_NAME_DEF_STMT (negate);
3836 tree a = gimple_assign_rhs1 (feed);
3837 tree b = gimple_assign_rhs2 (user);
3838 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3839 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3840 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3841 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3842 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3843 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3844 user = gsi_stmt (gsi2);
3845 update_stmt (user);
3846 gsi_remove (&gsi, true);
3847 release_defs (feed);
3848 plus_negates.safe_push (gimple_assign_lhs (user));
3850 else
3852 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3853 rid of one operation. */
3854 gimple feed = SSA_NAME_DEF_STMT (negate);
3855 tree a = gimple_assign_rhs1 (feed);
3856 tree rhs1 = gimple_assign_rhs1 (user);
3857 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3858 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3859 update_stmt (gsi_stmt (gsi));
3865 /* Returns true if OP is of a type for which we can do reassociation.
3866 That is for integral or non-saturating fixed-point types, and for
3867 floating point type when associative-math is enabled. */
3869 static bool
3870 can_reassociate_p (tree op)
3872 tree type = TREE_TYPE (op);
3873 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3874 || NON_SAT_FIXED_POINT_TYPE_P (type)
3875 || (flag_associative_math && FLOAT_TYPE_P (type)))
3876 return true;
3877 return false;
3880 /* Break up subtract operations in block BB.
3882 We do this top down because we don't know whether the subtract is
3883 part of a possible chain of reassociation except at the top.
3885 IE given
3886 d = f + g
3887 c = a + e
3888 b = c - d
3889 q = b - r
3890 k = t - q
3892 we want to break up k = t - q, but we won't until we've transformed q
3893 = b - r, which won't be broken up until we transform b = c - d.
3895 En passant, clear the GIMPLE visited flag on every statement
3896 and set UIDs within each basic block. */
3898 static void
3899 break_up_subtract_bb (basic_block bb)
3901 gimple_stmt_iterator gsi;
3902 basic_block son;
3903 unsigned int uid = 1;
3905 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3907 gimple stmt = gsi_stmt (gsi);
3908 gimple_set_visited (stmt, false);
3909 gimple_set_uid (stmt, uid++);
3911 if (!is_gimple_assign (stmt)
3912 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3913 continue;
3915 /* Look for simple gimple subtract operations. */
3916 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3918 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3919 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3920 continue;
3922 /* Check for a subtract used only in an addition. If this
3923 is the case, transform it into add of a negate for better
3924 reassociation. IE transform C = A-B into C = A + -B if C
3925 is only used in an addition. */
3926 if (should_break_up_subtract (stmt))
3927 break_up_subtract (stmt, &gsi);
3929 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3930 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3931 plus_negates.safe_push (gimple_assign_lhs (stmt));
3933 for (son = first_dom_son (CDI_DOMINATORS, bb);
3934 son;
3935 son = next_dom_son (CDI_DOMINATORS, son))
3936 break_up_subtract_bb (son);
3939 /* Used for repeated factor analysis. */
3940 struct repeat_factor_d
3942 /* An SSA name that occurs in a multiply chain. */
3943 tree factor;
3945 /* Cached rank of the factor. */
3946 unsigned rank;
3948 /* Number of occurrences of the factor in the chain. */
3949 HOST_WIDE_INT count;
3951 /* An SSA name representing the product of this factor and
3952 all factors appearing later in the repeated factor vector. */
3953 tree repr;
3956 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3957 typedef const struct repeat_factor_d *const_repeat_factor_t;
3960 static vec<repeat_factor> repeat_factor_vec;
3962 /* Used for sorting the repeat factor vector. Sort primarily by
3963 ascending occurrence count, secondarily by descending rank. */
3965 static int
3966 compare_repeat_factors (const void *x1, const void *x2)
3968 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3969 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3971 if (rf1->count != rf2->count)
3972 return rf1->count - rf2->count;
3974 return rf2->rank - rf1->rank;
3977 /* Look for repeated operands in OPS in the multiply tree rooted at
3978 STMT. Replace them with an optimal sequence of multiplies and powi
3979 builtin calls, and remove the used operands from OPS. Return an
3980 SSA name representing the value of the replacement sequence. */
3982 static tree
3983 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3985 unsigned i, j, vec_len;
3986 int ii;
3987 operand_entry_t oe;
3988 repeat_factor_t rf1, rf2;
3989 repeat_factor rfnew;
3990 tree result = NULL_TREE;
3991 tree target_ssa, iter_result;
3992 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3993 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3994 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3995 gimple mul_stmt, pow_stmt;
3997 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3998 target. */
3999 if (!powi_fndecl)
4000 return NULL_TREE;
4002 /* Allocate the repeated factor vector. */
4003 repeat_factor_vec.create (10);
4005 /* Scan the OPS vector for all SSA names in the product and build
4006 up a vector of occurrence counts for each factor. */
4007 FOR_EACH_VEC_ELT (*ops, i, oe)
4009 if (TREE_CODE (oe->op) == SSA_NAME)
4011 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4013 if (rf1->factor == oe->op)
4015 rf1->count += oe->count;
4016 break;
4020 if (j >= repeat_factor_vec.length ())
4022 rfnew.factor = oe->op;
4023 rfnew.rank = oe->rank;
4024 rfnew.count = oe->count;
4025 rfnew.repr = NULL_TREE;
4026 repeat_factor_vec.safe_push (rfnew);
4031 /* Sort the repeated factor vector by (a) increasing occurrence count,
4032 and (b) decreasing rank. */
4033 repeat_factor_vec.qsort (compare_repeat_factors);
4035 /* It is generally best to combine as many base factors as possible
4036 into a product before applying __builtin_powi to the result.
4037 However, the sort order chosen for the repeated factor vector
4038 allows us to cache partial results for the product of the base
4039 factors for subsequent use. When we already have a cached partial
4040 result from a previous iteration, it is best to make use of it
4041 before looking for another __builtin_pow opportunity.
4043 As an example, consider x * x * y * y * y * z * z * z * z.
4044 We want to first compose the product x * y * z, raise it to the
4045 second power, then multiply this by y * z, and finally multiply
4046 by z. This can be done in 5 multiplies provided we cache y * z
4047 for use in both expressions:
4049 t1 = y * z
4050 t2 = t1 * x
4051 t3 = t2 * t2
4052 t4 = t1 * t3
4053 result = t4 * z
4055 If we instead ignored the cached y * z and first multiplied by
4056 the __builtin_pow opportunity z * z, we would get the inferior:
4058 t1 = y * z
4059 t2 = t1 * x
4060 t3 = t2 * t2
4061 t4 = z * z
4062 t5 = t3 * t4
4063 result = t5 * y */
4065 vec_len = repeat_factor_vec.length ();
4067 /* Repeatedly look for opportunities to create a builtin_powi call. */
4068 while (true)
4070 HOST_WIDE_INT power;
4072 /* First look for the largest cached product of factors from
4073 preceding iterations. If found, create a builtin_powi for
4074 it if the minimum occurrence count for its factors is at
4075 least 2, or just use this cached product as our next
4076 multiplicand if the minimum occurrence count is 1. */
4077 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4079 if (rf1->repr && rf1->count > 0)
4080 break;
4083 if (j < vec_len)
4085 power = rf1->count;
4087 if (power == 1)
4089 iter_result = rf1->repr;
4091 if (dump_file && (dump_flags & TDF_DETAILS))
4093 unsigned elt;
4094 repeat_factor_t rf;
4095 fputs ("Multiplying by cached product ", dump_file);
4096 for (elt = j; elt < vec_len; elt++)
4098 rf = &repeat_factor_vec[elt];
4099 print_generic_expr (dump_file, rf->factor, 0);
4100 if (elt < vec_len - 1)
4101 fputs (" * ", dump_file);
4103 fputs ("\n", dump_file);
4106 else
4108 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4109 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4110 build_int_cst (integer_type_node,
4111 power));
4112 gimple_call_set_lhs (pow_stmt, iter_result);
4113 gimple_set_location (pow_stmt, gimple_location (stmt));
4114 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4116 if (dump_file && (dump_flags & TDF_DETAILS))
4118 unsigned elt;
4119 repeat_factor_t rf;
4120 fputs ("Building __builtin_pow call for cached product (",
4121 dump_file);
4122 for (elt = j; elt < vec_len; elt++)
4124 rf = &repeat_factor_vec[elt];
4125 print_generic_expr (dump_file, rf->factor, 0);
4126 if (elt < vec_len - 1)
4127 fputs (" * ", dump_file);
4129 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4130 power);
4134 else
4136 /* Otherwise, find the first factor in the repeated factor
4137 vector whose occurrence count is at least 2. If no such
4138 factor exists, there are no builtin_powi opportunities
4139 remaining. */
4140 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4142 if (rf1->count >= 2)
4143 break;
4146 if (j >= vec_len)
4147 break;
4149 power = rf1->count;
4151 if (dump_file && (dump_flags & TDF_DETAILS))
4153 unsigned elt;
4154 repeat_factor_t rf;
4155 fputs ("Building __builtin_pow call for (", dump_file);
4156 for (elt = j; elt < vec_len; elt++)
4158 rf = &repeat_factor_vec[elt];
4159 print_generic_expr (dump_file, rf->factor, 0);
4160 if (elt < vec_len - 1)
4161 fputs (" * ", dump_file);
4163 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4166 reassociate_stats.pows_created++;
4168 /* Visit each element of the vector in reverse order (so that
4169 high-occurrence elements are visited first, and within the
4170 same occurrence count, lower-ranked elements are visited
4171 first). Form a linear product of all elements in this order
4172 whose occurrencce count is at least that of element J.
4173 Record the SSA name representing the product of each element
4174 with all subsequent elements in the vector. */
4175 if (j == vec_len - 1)
4176 rf1->repr = rf1->factor;
4177 else
4179 for (ii = vec_len - 2; ii >= (int)j; ii--)
4181 tree op1, op2;
4183 rf1 = &repeat_factor_vec[ii];
4184 rf2 = &repeat_factor_vec[ii + 1];
4186 /* Init the last factor's representative to be itself. */
4187 if (!rf2->repr)
4188 rf2->repr = rf2->factor;
4190 op1 = rf1->factor;
4191 op2 = rf2->repr;
4193 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4194 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4195 target_ssa,
4196 op1, op2);
4197 gimple_set_location (mul_stmt, gimple_location (stmt));
4198 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4199 rf1->repr = target_ssa;
4201 /* Don't reprocess the multiply we just introduced. */
4202 gimple_set_visited (mul_stmt, true);
4206 /* Form a call to __builtin_powi for the maximum product
4207 just formed, raised to the power obtained earlier. */
4208 rf1 = &repeat_factor_vec[j];
4209 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4210 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4211 build_int_cst (integer_type_node,
4212 power));
4213 gimple_call_set_lhs (pow_stmt, iter_result);
4214 gimple_set_location (pow_stmt, gimple_location (stmt));
4215 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4218 /* If we previously formed at least one other builtin_powi call,
4219 form the product of this one and those others. */
4220 if (result)
4222 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4223 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4224 result, iter_result);
4225 gimple_set_location (mul_stmt, gimple_location (stmt));
4226 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4227 gimple_set_visited (mul_stmt, true);
4228 result = new_result;
4230 else
4231 result = iter_result;
4233 /* Decrement the occurrence count of each element in the product
4234 by the count found above, and remove this many copies of each
4235 factor from OPS. */
4236 for (i = j; i < vec_len; i++)
4238 unsigned k = power;
4239 unsigned n;
4241 rf1 = &repeat_factor_vec[i];
4242 rf1->count -= power;
4244 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4246 if (oe->op == rf1->factor)
4248 if (oe->count <= k)
4250 ops->ordered_remove (n);
4251 k -= oe->count;
4253 if (k == 0)
4254 break;
4256 else
4258 oe->count -= k;
4259 break;
4266 /* At this point all elements in the repeated factor vector have a
4267 remaining occurrence count of 0 or 1, and those with a count of 1
4268 don't have cached representatives. Re-sort the ops vector and
4269 clean up. */
4270 ops->qsort (sort_by_operand_rank);
4271 repeat_factor_vec.release ();
4273 /* Return the final product computed herein. Note that there may
4274 still be some elements with single occurrence count left in OPS;
4275 those will be handled by the normal reassociation logic. */
4276 return result;
4279 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4281 static void
4282 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4284 tree rhs1;
4286 if (dump_file && (dump_flags & TDF_DETAILS))
4288 fprintf (dump_file, "Transforming ");
4289 print_gimple_stmt (dump_file, stmt, 0, 0);
4292 rhs1 = gimple_assign_rhs1 (stmt);
4293 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4294 update_stmt (stmt);
4295 remove_visited_stmt_chain (rhs1);
4297 if (dump_file && (dump_flags & TDF_DETAILS))
4299 fprintf (dump_file, " into ");
4300 print_gimple_stmt (dump_file, stmt, 0, 0);
4304 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4306 static void
4307 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4308 tree rhs1, tree rhs2)
4310 if (dump_file && (dump_flags & TDF_DETAILS))
4312 fprintf (dump_file, "Transforming ");
4313 print_gimple_stmt (dump_file, stmt, 0, 0);
4316 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4317 update_stmt (gsi_stmt (*gsi));
4318 remove_visited_stmt_chain (rhs1);
4320 if (dump_file && (dump_flags & TDF_DETAILS))
4322 fprintf (dump_file, " into ");
4323 print_gimple_stmt (dump_file, stmt, 0, 0);
4327 /* Reassociate expressions in basic block BB and its post-dominator as
4328 children. */
4330 static void
4331 reassociate_bb (basic_block bb)
4333 gimple_stmt_iterator gsi;
4334 basic_block son;
4335 gimple stmt = last_stmt (bb);
4337 if (stmt && !gimple_visited_p (stmt))
4338 maybe_optimize_range_tests (stmt);
4340 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4342 stmt = gsi_stmt (gsi);
4344 if (is_gimple_assign (stmt)
4345 && !stmt_could_throw_p (stmt))
4347 tree lhs, rhs1, rhs2;
4348 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4350 /* If this is not a gimple binary expression, there is
4351 nothing for us to do with it. */
4352 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4353 continue;
4355 /* If this was part of an already processed statement,
4356 we don't need to touch it again. */
4357 if (gimple_visited_p (stmt))
4359 /* This statement might have become dead because of previous
4360 reassociations. */
4361 if (has_zero_uses (gimple_get_lhs (stmt)))
4363 gsi_remove (&gsi, true);
4364 release_defs (stmt);
4365 /* We might end up removing the last stmt above which
4366 places the iterator to the end of the sequence.
4367 Reset it to the last stmt in this case which might
4368 be the end of the sequence as well if we removed
4369 the last statement of the sequence. In which case
4370 we need to bail out. */
4371 if (gsi_end_p (gsi))
4373 gsi = gsi_last_bb (bb);
4374 if (gsi_end_p (gsi))
4375 break;
4378 continue;
4381 lhs = gimple_assign_lhs (stmt);
4382 rhs1 = gimple_assign_rhs1 (stmt);
4383 rhs2 = gimple_assign_rhs2 (stmt);
4385 /* For non-bit or min/max operations we can't associate
4386 all types. Verify that here. */
4387 if (rhs_code != BIT_IOR_EXPR
4388 && rhs_code != BIT_AND_EXPR
4389 && rhs_code != BIT_XOR_EXPR
4390 && rhs_code != MIN_EXPR
4391 && rhs_code != MAX_EXPR
4392 && (!can_reassociate_p (lhs)
4393 || !can_reassociate_p (rhs1)
4394 || !can_reassociate_p (rhs2)))
4395 continue;
4397 if (associative_tree_code (rhs_code))
4399 vec<operand_entry_t> ops = vNULL;
4400 tree powi_result = NULL_TREE;
4402 /* There may be no immediate uses left by the time we
4403 get here because we may have eliminated them all. */
4404 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4405 continue;
4407 gimple_set_visited (stmt, true);
4408 linearize_expr_tree (&ops, stmt, true, true);
4409 ops.qsort (sort_by_operand_rank);
4410 optimize_ops_list (rhs_code, &ops);
4411 if (undistribute_ops_list (rhs_code, &ops,
4412 loop_containing_stmt (stmt)))
4414 ops.qsort (sort_by_operand_rank);
4415 optimize_ops_list (rhs_code, &ops);
4418 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4419 optimize_range_tests (rhs_code, &ops);
4421 if (first_pass_instance
4422 && rhs_code == MULT_EXPR
4423 && flag_unsafe_math_optimizations)
4424 powi_result = attempt_builtin_powi (stmt, &ops);
4426 /* If the operand vector is now empty, all operands were
4427 consumed by the __builtin_powi optimization. */
4428 if (ops.length () == 0)
4429 transform_stmt_to_copy (&gsi, stmt, powi_result);
4430 else if (ops.length () == 1)
4432 tree last_op = ops.last ()->op;
4434 if (powi_result)
4435 transform_stmt_to_multiply (&gsi, stmt, last_op,
4436 powi_result);
4437 else
4438 transform_stmt_to_copy (&gsi, stmt, last_op);
4440 else
4442 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4443 int ops_num = ops.length ();
4444 int width = get_reassociation_width (ops_num, rhs_code, mode);
4445 tree new_lhs = lhs;
4447 if (dump_file && (dump_flags & TDF_DETAILS))
4448 fprintf (dump_file,
4449 "Width = %d was chosen for reassociation\n", width);
4451 if (width > 1
4452 && ops.length () > 3)
4453 rewrite_expr_tree_parallel (stmt, width, ops);
4454 else
4456 /* When there are three operands left, we want
4457 to make sure the ones that get the double
4458 binary op are chosen wisely. */
4459 int len = ops.length ();
4460 if (len >= 3)
4461 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4463 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4464 powi_result != NULL);
4467 /* If we combined some repeated factors into a
4468 __builtin_powi call, multiply that result by the
4469 reassociated operands. */
4470 if (powi_result)
4472 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4473 tree type = TREE_TYPE (lhs);
4474 tree target_ssa = make_temp_ssa_name (type, NULL,
4475 "reassocpow");
4476 gimple_set_lhs (lhs_stmt, target_ssa);
4477 update_stmt (lhs_stmt);
4478 if (lhs != new_lhs)
4479 target_ssa = new_lhs;
4480 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4481 powi_result,
4482 target_ssa);
4483 gimple_set_location (mul_stmt, gimple_location (stmt));
4484 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4488 ops.release ();
4492 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4493 son;
4494 son = next_dom_son (CDI_POST_DOMINATORS, son))
4495 reassociate_bb (son);
4498 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4499 void debug_ops_vector (vec<operand_entry_t> ops);
4501 /* Dump the operand entry vector OPS to FILE. */
4503 void
4504 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4506 operand_entry_t oe;
4507 unsigned int i;
4509 FOR_EACH_VEC_ELT (ops, i, oe)
4511 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4512 print_generic_expr (file, oe->op, 0);
4516 /* Dump the operand entry vector OPS to STDERR. */
4518 DEBUG_FUNCTION void
4519 debug_ops_vector (vec<operand_entry_t> ops)
4521 dump_ops_vector (stderr, ops);
4524 static void
4525 do_reassoc (void)
4527 break_up_subtract_bb (ENTRY_BLOCK_PTR);
4528 reassociate_bb (EXIT_BLOCK_PTR);
4531 /* Initialize the reassociation pass. */
4533 static void
4534 init_reassoc (void)
4536 int i;
4537 long rank = 2;
4538 int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4540 /* Find the loops, so that we can prevent moving calculations in
4541 them. */
4542 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4544 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4546 operand_entry_pool = create_alloc_pool ("operand entry pool",
4547 sizeof (struct operand_entry), 30);
4548 next_operand_entry_id = 0;
4550 /* Reverse RPO (Reverse Post Order) will give us something where
4551 deeper loops come later. */
4552 pre_and_rev_post_order_compute (NULL, bbs, false);
4553 bb_rank = XCNEWVEC (long, last_basic_block);
4554 operand_rank = pointer_map_create ();
4556 /* Give each default definition a distinct rank. This includes
4557 parameters and the static chain. Walk backwards over all
4558 SSA names so that we get proper rank ordering according
4559 to tree_swap_operands_p. */
4560 for (i = num_ssa_names - 1; i > 0; --i)
4562 tree name = ssa_name (i);
4563 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4564 insert_operand_rank (name, ++rank);
4567 /* Set up rank for each BB */
4568 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4569 bb_rank[bbs[i]] = ++rank << 16;
4571 free (bbs);
4572 calculate_dominance_info (CDI_POST_DOMINATORS);
4573 plus_negates = vNULL;
4576 /* Cleanup after the reassociation pass, and print stats if
4577 requested. */
4579 static void
4580 fini_reassoc (void)
4582 statistics_counter_event (cfun, "Linearized",
4583 reassociate_stats.linearized);
4584 statistics_counter_event (cfun, "Constants eliminated",
4585 reassociate_stats.constants_eliminated);
4586 statistics_counter_event (cfun, "Ops eliminated",
4587 reassociate_stats.ops_eliminated);
4588 statistics_counter_event (cfun, "Statements rewritten",
4589 reassociate_stats.rewritten);
4590 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4591 reassociate_stats.pows_encountered);
4592 statistics_counter_event (cfun, "Built-in powi calls created",
4593 reassociate_stats.pows_created);
4595 pointer_map_destroy (operand_rank);
4596 free_alloc_pool (operand_entry_pool);
4597 free (bb_rank);
4598 plus_negates.release ();
4599 free_dominance_info (CDI_POST_DOMINATORS);
4600 loop_optimizer_finalize ();
4603 /* Gate and execute functions for Reassociation. */
4605 static unsigned int
4606 execute_reassoc (void)
4608 init_reassoc ();
4610 do_reassoc ();
4611 repropagate_negates ();
4613 fini_reassoc ();
4614 return 0;
4617 static bool
4618 gate_tree_ssa_reassoc (void)
4620 return flag_tree_reassoc != 0;
4623 namespace {
4625 const pass_data pass_data_reassoc =
4627 GIMPLE_PASS, /* type */
4628 "reassoc", /* name */
4629 OPTGROUP_NONE, /* optinfo_flags */
4630 true, /* has_gate */
4631 true, /* has_execute */
4632 TV_TREE_REASSOC, /* tv_id */
4633 ( PROP_cfg | PROP_ssa ), /* properties_required */
4634 0, /* properties_provided */
4635 0, /* properties_destroyed */
4636 0, /* todo_flags_start */
4637 ( TODO_verify_ssa
4638 | TODO_update_ssa_only_virtuals
4639 | TODO_verify_flow ), /* todo_flags_finish */
4642 class pass_reassoc : public gimple_opt_pass
4644 public:
4645 pass_reassoc (gcc::context *ctxt)
4646 : gimple_opt_pass (pass_data_reassoc, ctxt)
4649 /* opt_pass methods: */
4650 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4651 bool gate () { return gate_tree_ssa_reassoc (); }
4652 unsigned int execute () { return execute_reassoc (); }
4654 }; // class pass_reassoc
4656 } // anon namespace
4658 gimple_opt_pass *
4659 make_pass_reassoc (gcc::context *ctxt)
4661 return new pass_reassoc (ctxt);