Daily bump.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob7145559d697b58f5806c7fcd737b666bea25d787
1 /* Reassociation for trees.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-inline.h"
33 #include "pointer-set.h"
34 #include "tree-ssa-alias.h"
35 #include "internal-fn.h"
36 #include "gimple-fold.h"
37 #include "tree-eh.h"
38 #include "gimple-expr.h"
39 #include "is-a.h"
40 #include "gimple.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "gimple-ssa.h"
44 #include "tree-cfg.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
51 #include "expr.h"
52 #include "tree-dfa.h"
53 #include "tree-ssa.h"
54 #include "tree-iterator.h"
55 #include "tree-pass.h"
56 #include "alloc-pool.h"
57 #include "langhooks.h"
58 #include "cfgloop.h"
59 #include "flags.h"
60 #include "target.h"
61 #include "params.h"
62 #include "diagnostic-core.h"
64 /* This is a simple global reassociation pass. It is, in part, based
65 on the LLVM pass of the same name (They do some things more/less
66 than we do, in different orders, etc).
68 It consists of five steps:
70 1. Breaking up subtract operations into addition + negate, where
71 it would promote the reassociation of adds.
73 2. Left linearization of the expression trees, so that (A+B)+(C+D)
74 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
75 During linearization, we place the operands of the binary
76 expressions into a vector of operand_entry_t
78 3. Optimization of the operand lists, eliminating things like a +
79 -a, a & a, etc.
81 3a. Combine repeated factors with the same occurrence counts
82 into a __builtin_powi call that will later be optimized into
83 an optimal number of multiplies.
85 4. Rewrite the expression trees we linearized and optimized so
86 they are in proper rank order.
88 5. Repropagate negates, as nothing else will clean it up ATM.
90 A bit of theory on #4, since nobody seems to write anything down
91 about why it makes sense to do it the way they do it:
93 We could do this much nicer theoretically, but don't (for reasons
94 explained after how to do it theoretically nice :P).
96 In order to promote the most redundancy elimination, you want
97 binary expressions whose operands are the same rank (or
98 preferably, the same value) exposed to the redundancy eliminator,
99 for possible elimination.
101 So the way to do this if we really cared, is to build the new op
102 tree from the leaves to the roots, merging as you go, and putting the
103 new op on the end of the worklist, until you are left with one
104 thing on the worklist.
106 IE if you have to rewrite the following set of operands (listed with
107 rank in parentheses), with opcode PLUS_EXPR:
109 a (1), b (1), c (1), d (2), e (2)
112 We start with our merge worklist empty, and the ops list with all of
113 those on it.
115 You want to first merge all leaves of the same rank, as much as
116 possible.
118 So first build a binary op of
120 mergetmp = a + b, and put "mergetmp" on the merge worklist.
122 Because there is no three operand form of PLUS_EXPR, c is not going to
123 be exposed to redundancy elimination as a rank 1 operand.
125 So you might as well throw it on the merge worklist (you could also
126 consider it to now be a rank two operand, and merge it with d and e,
127 but in this case, you then have evicted e from a binary op. So at
128 least in this situation, you can't win.)
130 Then build a binary op of d + e
131 mergetmp2 = d + e
133 and put mergetmp2 on the merge worklist.
135 so merge worklist = {mergetmp, c, mergetmp2}
137 Continue building binary ops of these operations until you have only
138 one operation left on the worklist.
140 So we have
142 build binary op
143 mergetmp3 = mergetmp + c
145 worklist = {mergetmp2, mergetmp3}
147 mergetmp4 = mergetmp2 + mergetmp3
149 worklist = {mergetmp4}
151 because we have one operation left, we can now just set the original
152 statement equal to the result of that operation.
154 This will at least expose a + b and d + e to redundancy elimination
155 as binary operations.
157 For extra points, you can reuse the old statements to build the
158 mergetmps, since you shouldn't run out.
160 So why don't we do this?
162 Because it's expensive, and rarely will help. Most trees we are
163 reassociating have 3 or less ops. If they have 2 ops, they already
164 will be written into a nice single binary op. If you have 3 ops, a
165 single simple check suffices to tell you whether the first two are of the
166 same rank. If so, you know to order it
168 mergetmp = op1 + op2
169 newstmt = mergetmp + op3
171 instead of
172 mergetmp = op2 + op3
173 newstmt = mergetmp + op1
175 If all three are of the same rank, you can't expose them all in a
176 single binary operator anyway, so the above is *still* the best you
177 can do.
179 Thus, this is what we do. When we have three ops left, we check to see
180 what order to put them in, and call it a day. As a nod to vector sum
181 reduction, we check if any of the ops are really a phi node that is a
182 destructive update for the associating op, and keep the destructive
183 update together for vector sum reduction recognition. */
186 /* Statistics */
187 static struct
189 int linearized;
190 int constants_eliminated;
191 int ops_eliminated;
192 int rewritten;
193 int pows_encountered;
194 int pows_created;
195 } reassociate_stats;
197 /* Operator, rank pair. */
198 typedef struct operand_entry
200 unsigned int rank;
201 int id;
202 tree op;
203 unsigned int count;
204 } *operand_entry_t;
206 static alloc_pool operand_entry_pool;
208 /* This is used to assign a unique ID to each struct operand_entry
209 so that qsort results are identical on different hosts. */
210 static int next_operand_entry_id;
212 /* Starting rank number for a given basic block, so that we can rank
213 operations using unmovable instructions in that BB based on the bb
214 depth. */
215 static long *bb_rank;
217 /* Operand->rank hashtable. */
218 static struct pointer_map_t *operand_rank;
220 /* Forward decls. */
221 static long get_rank (tree);
224 /* Bias amount for loop-carried phis. We want this to be larger than
225 the depth of any reassociation tree we can see, but not larger than
226 the rank difference between two blocks. */
227 #define PHI_LOOP_BIAS (1 << 15)
229 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
230 an innermost loop, and the phi has only a single use which is inside
231 the loop, then the rank is the block rank of the loop latch plus an
232 extra bias for the loop-carried dependence. This causes expressions
233 calculated into an accumulator variable to be independent for each
234 iteration of the loop. If STMT is some other phi, the rank is the
235 block rank of its containing block. */
236 static long
237 phi_rank (gimple stmt)
239 basic_block bb = gimple_bb (stmt);
240 struct loop *father = bb->loop_father;
241 tree res;
242 unsigned i;
243 use_operand_p use;
244 gimple use_stmt;
246 /* We only care about real loops (those with a latch). */
247 if (!father->latch)
248 return bb_rank[bb->index];
250 /* Interesting phis must be in headers of innermost loops. */
251 if (bb != father->header
252 || father->inner)
253 return bb_rank[bb->index];
255 /* Ignore virtual SSA_NAMEs. */
256 res = gimple_phi_result (stmt);
257 if (virtual_operand_p (res))
258 return bb_rank[bb->index];
260 /* The phi definition must have a single use, and that use must be
261 within the loop. Otherwise this isn't an accumulator pattern. */
262 if (!single_imm_use (res, &use, &use_stmt)
263 || gimple_bb (use_stmt)->loop_father != father)
264 return bb_rank[bb->index];
266 /* Look for phi arguments from within the loop. If found, bias this phi. */
267 for (i = 0; i < gimple_phi_num_args (stmt); i++)
269 tree arg = gimple_phi_arg_def (stmt, i);
270 if (TREE_CODE (arg) == SSA_NAME
271 && !SSA_NAME_IS_DEFAULT_DEF (arg))
273 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
274 if (gimple_bb (def_stmt)->loop_father == father)
275 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
279 /* Must be an uninteresting phi. */
280 return bb_rank[bb->index];
283 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
284 loop-carried dependence of an innermost loop, return TRUE; else
285 return FALSE. */
286 static bool
287 loop_carried_phi (tree exp)
289 gimple phi_stmt;
290 long block_rank;
292 if (TREE_CODE (exp) != SSA_NAME
293 || SSA_NAME_IS_DEFAULT_DEF (exp))
294 return false;
296 phi_stmt = SSA_NAME_DEF_STMT (exp);
298 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
299 return false;
301 /* Non-loop-carried phis have block rank. Loop-carried phis have
302 an additional bias added in. If this phi doesn't have block rank,
303 it's biased and should not be propagated. */
304 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
306 if (phi_rank (phi_stmt) != block_rank)
307 return true;
309 return false;
312 /* Return the maximum of RANK and the rank that should be propagated
313 from expression OP. For most operands, this is just the rank of OP.
314 For loop-carried phis, the value is zero to avoid undoing the bias
315 in favor of the phi. */
316 static long
317 propagate_rank (long rank, tree op)
319 long op_rank;
321 if (loop_carried_phi (op))
322 return rank;
324 op_rank = get_rank (op);
326 return MAX (rank, op_rank);
329 /* Look up the operand rank structure for expression E. */
331 static inline long
332 find_operand_rank (tree e)
334 void **slot = pointer_map_contains (operand_rank, e);
335 return slot ? (long) (intptr_t) *slot : -1;
338 /* Insert {E,RANK} into the operand rank hashtable. */
340 static inline void
341 insert_operand_rank (tree e, long rank)
343 void **slot;
344 gcc_assert (rank > 0);
345 slot = pointer_map_insert (operand_rank, e);
346 gcc_assert (!*slot);
347 *slot = (void *) (intptr_t) rank;
350 /* Given an expression E, return the rank of the expression. */
352 static long
353 get_rank (tree e)
355 /* Constants have rank 0. */
356 if (is_gimple_min_invariant (e))
357 return 0;
359 /* SSA_NAME's have the rank of the expression they are the result
361 For globals and uninitialized values, the rank is 0.
362 For function arguments, use the pre-setup rank.
363 For PHI nodes, stores, asm statements, etc, we use the rank of
364 the BB.
365 For simple operations, the rank is the maximum rank of any of
366 its operands, or the bb_rank, whichever is less.
367 I make no claims that this is optimal, however, it gives good
368 results. */
370 /* We make an exception to the normal ranking system to break
371 dependences of accumulator variables in loops. Suppose we
372 have a simple one-block loop containing:
374 x_1 = phi(x_0, x_2)
375 b = a + x_1
376 c = b + d
377 x_2 = c + e
379 As shown, each iteration of the calculation into x is fully
380 dependent upon the iteration before it. We would prefer to
381 see this in the form:
383 x_1 = phi(x_0, x_2)
384 b = a + d
385 c = b + e
386 x_2 = c + x_1
388 If the loop is unrolled, the calculations of b and c from
389 different iterations can be interleaved.
391 To obtain this result during reassociation, we bias the rank
392 of the phi definition x_1 upward, when it is recognized as an
393 accumulator pattern. The artificial rank causes it to be
394 added last, providing the desired independence. */
396 if (TREE_CODE (e) == SSA_NAME)
398 gimple stmt;
399 long rank;
400 int i, n;
401 tree op;
403 if (SSA_NAME_IS_DEFAULT_DEF (e))
404 return find_operand_rank (e);
406 stmt = SSA_NAME_DEF_STMT (e);
407 if (gimple_code (stmt) == GIMPLE_PHI)
408 return phi_rank (stmt);
410 if (!is_gimple_assign (stmt)
411 || gimple_vdef (stmt))
412 return bb_rank[gimple_bb (stmt)->index];
414 /* If we already have a rank for this expression, use that. */
415 rank = find_operand_rank (e);
416 if (rank != -1)
417 return rank;
419 /* Otherwise, find the maximum rank for the operands. As an
420 exception, remove the bias from loop-carried phis when propagating
421 the rank so that dependent operations are not also biased. */
422 rank = 0;
423 if (gimple_assign_single_p (stmt))
425 tree rhs = gimple_assign_rhs1 (stmt);
426 n = TREE_OPERAND_LENGTH (rhs);
427 if (n == 0)
428 rank = propagate_rank (rank, rhs);
429 else
431 for (i = 0; i < n; i++)
433 op = TREE_OPERAND (rhs, i);
435 if (op != NULL_TREE)
436 rank = propagate_rank (rank, op);
440 else
442 n = gimple_num_ops (stmt);
443 for (i = 1; i < n; i++)
445 op = gimple_op (stmt, i);
446 gcc_assert (op);
447 rank = propagate_rank (rank, op);
451 if (dump_file && (dump_flags & TDF_DETAILS))
453 fprintf (dump_file, "Rank for ");
454 print_generic_expr (dump_file, e, 0);
455 fprintf (dump_file, " is %ld\n", (rank + 1));
458 /* Note the rank in the hashtable so we don't recompute it. */
459 insert_operand_rank (e, (rank + 1));
460 return (rank + 1);
463 /* Globals, etc, are rank 0 */
464 return 0;
468 /* We want integer ones to end up last no matter what, since they are
469 the ones we can do the most with. */
470 #define INTEGER_CONST_TYPE 1 << 3
471 #define FLOAT_CONST_TYPE 1 << 2
472 #define OTHER_CONST_TYPE 1 << 1
474 /* Classify an invariant tree into integer, float, or other, so that
475 we can sort them to be near other constants of the same type. */
476 static inline int
477 constant_type (tree t)
479 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
480 return INTEGER_CONST_TYPE;
481 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
482 return FLOAT_CONST_TYPE;
483 else
484 return OTHER_CONST_TYPE;
487 /* qsort comparison function to sort operand entries PA and PB by rank
488 so that the sorted array is ordered by rank in decreasing order. */
489 static int
490 sort_by_operand_rank (const void *pa, const void *pb)
492 const operand_entry_t oea = *(const operand_entry_t *)pa;
493 const operand_entry_t oeb = *(const operand_entry_t *)pb;
495 /* It's nicer for optimize_expression if constants that are likely
496 to fold when added/multiplied//whatever are put next to each
497 other. Since all constants have rank 0, order them by type. */
498 if (oeb->rank == 0 && oea->rank == 0)
500 if (constant_type (oeb->op) != constant_type (oea->op))
501 return constant_type (oeb->op) - constant_type (oea->op);
502 else
503 /* To make sorting result stable, we use unique IDs to determine
504 order. */
505 return oeb->id - oea->id;
508 /* Lastly, make sure the versions that are the same go next to each
509 other. We use SSA_NAME_VERSION because it's stable. */
510 if ((oeb->rank - oea->rank == 0)
511 && TREE_CODE (oea->op) == SSA_NAME
512 && TREE_CODE (oeb->op) == SSA_NAME)
514 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
515 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
516 else
517 return oeb->id - oea->id;
520 if (oeb->rank != oea->rank)
521 return oeb->rank - oea->rank;
522 else
523 return oeb->id - oea->id;
526 /* Add an operand entry to *OPS for the tree operand OP. */
528 static void
529 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
531 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
533 oe->op = op;
534 oe->rank = get_rank (op);
535 oe->id = next_operand_entry_id++;
536 oe->count = 1;
537 ops->safe_push (oe);
540 /* Add an operand entry to *OPS for the tree operand OP with repeat
541 count REPEAT. */
543 static void
544 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
545 HOST_WIDE_INT repeat)
547 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
549 oe->op = op;
550 oe->rank = get_rank (op);
551 oe->id = next_operand_entry_id++;
552 oe->count = repeat;
553 ops->safe_push (oe);
555 reassociate_stats.pows_encountered++;
558 /* Return true if STMT is reassociable operation containing a binary
559 operation with tree code CODE, and is inside LOOP. */
561 static bool
562 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
564 basic_block bb = gimple_bb (stmt);
566 if (gimple_bb (stmt) == NULL)
567 return false;
569 if (!flow_bb_inside_loop_p (loop, bb))
570 return false;
572 if (is_gimple_assign (stmt)
573 && gimple_assign_rhs_code (stmt) == code
574 && has_single_use (gimple_assign_lhs (stmt)))
575 return true;
577 return false;
581 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
582 operand of the negate operation. Otherwise, return NULL. */
584 static tree
585 get_unary_op (tree name, enum tree_code opcode)
587 gimple stmt = SSA_NAME_DEF_STMT (name);
589 if (!is_gimple_assign (stmt))
590 return NULL_TREE;
592 if (gimple_assign_rhs_code (stmt) == opcode)
593 return gimple_assign_rhs1 (stmt);
594 return NULL_TREE;
597 /* If CURR and LAST are a pair of ops that OPCODE allows us to
598 eliminate through equivalences, do so, remove them from OPS, and
599 return true. Otherwise, return false. */
601 static bool
602 eliminate_duplicate_pair (enum tree_code opcode,
603 vec<operand_entry_t> *ops,
604 bool *all_done,
605 unsigned int i,
606 operand_entry_t curr,
607 operand_entry_t last)
610 /* If we have two of the same op, and the opcode is & |, min, or max,
611 we can eliminate one of them.
612 If we have two of the same op, and the opcode is ^, we can
613 eliminate both of them. */
615 if (last && last->op == curr->op)
617 switch (opcode)
619 case MAX_EXPR:
620 case MIN_EXPR:
621 case BIT_IOR_EXPR:
622 case BIT_AND_EXPR:
623 if (dump_file && (dump_flags & TDF_DETAILS))
625 fprintf (dump_file, "Equivalence: ");
626 print_generic_expr (dump_file, curr->op, 0);
627 fprintf (dump_file, " [&|minmax] ");
628 print_generic_expr (dump_file, last->op, 0);
629 fprintf (dump_file, " -> ");
630 print_generic_stmt (dump_file, last->op, 0);
633 ops->ordered_remove (i);
634 reassociate_stats.ops_eliminated ++;
636 return true;
638 case BIT_XOR_EXPR:
639 if (dump_file && (dump_flags & TDF_DETAILS))
641 fprintf (dump_file, "Equivalence: ");
642 print_generic_expr (dump_file, curr->op, 0);
643 fprintf (dump_file, " ^ ");
644 print_generic_expr (dump_file, last->op, 0);
645 fprintf (dump_file, " -> nothing\n");
648 reassociate_stats.ops_eliminated += 2;
650 if (ops->length () == 2)
652 ops->create (0);
653 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
654 *all_done = true;
656 else
658 ops->ordered_remove (i-1);
659 ops->ordered_remove (i-1);
662 return true;
664 default:
665 break;
668 return false;
671 static vec<tree> plus_negates;
673 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
674 expression, look in OPS for a corresponding positive operation to cancel
675 it out. If we find one, remove the other from OPS, replace
676 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
677 return false. */
679 static bool
680 eliminate_plus_minus_pair (enum tree_code opcode,
681 vec<operand_entry_t> *ops,
682 unsigned int currindex,
683 operand_entry_t curr)
685 tree negateop;
686 tree notop;
687 unsigned int i;
688 operand_entry_t oe;
690 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
691 return false;
693 negateop = get_unary_op (curr->op, NEGATE_EXPR);
694 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
695 if (negateop == NULL_TREE && notop == NULL_TREE)
696 return false;
698 /* Any non-negated version will have a rank that is one less than
699 the current rank. So once we hit those ranks, if we don't find
700 one, we can stop. */
702 for (i = currindex + 1;
703 ops->iterate (i, &oe)
704 && oe->rank >= curr->rank - 1 ;
705 i++)
707 if (oe->op == negateop)
710 if (dump_file && (dump_flags & TDF_DETAILS))
712 fprintf (dump_file, "Equivalence: ");
713 print_generic_expr (dump_file, negateop, 0);
714 fprintf (dump_file, " + -");
715 print_generic_expr (dump_file, oe->op, 0);
716 fprintf (dump_file, " -> 0\n");
719 ops->ordered_remove (i);
720 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
721 ops->ordered_remove (currindex);
722 reassociate_stats.ops_eliminated ++;
724 return true;
726 else if (oe->op == notop)
728 tree op_type = TREE_TYPE (oe->op);
730 if (dump_file && (dump_flags & TDF_DETAILS))
732 fprintf (dump_file, "Equivalence: ");
733 print_generic_expr (dump_file, notop, 0);
734 fprintf (dump_file, " + ~");
735 print_generic_expr (dump_file, oe->op, 0);
736 fprintf (dump_file, " -> -1\n");
739 ops->ordered_remove (i);
740 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
741 ops->ordered_remove (currindex);
742 reassociate_stats.ops_eliminated ++;
744 return true;
748 /* CURR->OP is a negate expr in a plus expr: save it for later
749 inspection in repropagate_negates(). */
750 if (negateop != NULL_TREE)
751 plus_negates.safe_push (curr->op);
753 return false;
756 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
757 bitwise not expression, look in OPS for a corresponding operand to
758 cancel it out. If we find one, remove the other from OPS, replace
759 OPS[CURRINDEX] with 0, and return true. Otherwise, return
760 false. */
762 static bool
763 eliminate_not_pairs (enum tree_code opcode,
764 vec<operand_entry_t> *ops,
765 unsigned int currindex,
766 operand_entry_t curr)
768 tree notop;
769 unsigned int i;
770 operand_entry_t oe;
772 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
773 || TREE_CODE (curr->op) != SSA_NAME)
774 return false;
776 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
777 if (notop == NULL_TREE)
778 return false;
780 /* Any non-not version will have a rank that is one less than
781 the current rank. So once we hit those ranks, if we don't find
782 one, we can stop. */
784 for (i = currindex + 1;
785 ops->iterate (i, &oe)
786 && oe->rank >= curr->rank - 1;
787 i++)
789 if (oe->op == notop)
791 if (dump_file && (dump_flags & TDF_DETAILS))
793 fprintf (dump_file, "Equivalence: ");
794 print_generic_expr (dump_file, notop, 0);
795 if (opcode == BIT_AND_EXPR)
796 fprintf (dump_file, " & ~");
797 else if (opcode == BIT_IOR_EXPR)
798 fprintf (dump_file, " | ~");
799 print_generic_expr (dump_file, oe->op, 0);
800 if (opcode == BIT_AND_EXPR)
801 fprintf (dump_file, " -> 0\n");
802 else if (opcode == BIT_IOR_EXPR)
803 fprintf (dump_file, " -> -1\n");
806 if (opcode == BIT_AND_EXPR)
807 oe->op = build_zero_cst (TREE_TYPE (oe->op));
808 else if (opcode == BIT_IOR_EXPR)
809 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
810 TYPE_PRECISION (TREE_TYPE (oe->op)));
812 reassociate_stats.ops_eliminated += ops->length () - 1;
813 ops->truncate (0);
814 ops->quick_push (oe);
815 return true;
819 return false;
822 /* Use constant value that may be present in OPS to try to eliminate
823 operands. Note that this function is only really used when we've
824 eliminated ops for other reasons, or merged constants. Across
825 single statements, fold already does all of this, plus more. There
826 is little point in duplicating logic, so I've only included the
827 identities that I could ever construct testcases to trigger. */
829 static void
830 eliminate_using_constants (enum tree_code opcode,
831 vec<operand_entry_t> *ops)
833 operand_entry_t oelast = ops->last ();
834 tree type = TREE_TYPE (oelast->op);
836 if (oelast->rank == 0
837 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
839 switch (opcode)
841 case BIT_AND_EXPR:
842 if (integer_zerop (oelast->op))
844 if (ops->length () != 1)
846 if (dump_file && (dump_flags & TDF_DETAILS))
847 fprintf (dump_file, "Found & 0, removing all other ops\n");
849 reassociate_stats.ops_eliminated += ops->length () - 1;
851 ops->truncate (0);
852 ops->quick_push (oelast);
853 return;
856 else if (integer_all_onesp (oelast->op))
858 if (ops->length () != 1)
860 if (dump_file && (dump_flags & TDF_DETAILS))
861 fprintf (dump_file, "Found & -1, removing\n");
862 ops->pop ();
863 reassociate_stats.ops_eliminated++;
866 break;
867 case BIT_IOR_EXPR:
868 if (integer_all_onesp (oelast->op))
870 if (ops->length () != 1)
872 if (dump_file && (dump_flags & TDF_DETAILS))
873 fprintf (dump_file, "Found | -1, removing all other ops\n");
875 reassociate_stats.ops_eliminated += ops->length () - 1;
877 ops->truncate (0);
878 ops->quick_push (oelast);
879 return;
882 else if (integer_zerop (oelast->op))
884 if (ops->length () != 1)
886 if (dump_file && (dump_flags & TDF_DETAILS))
887 fprintf (dump_file, "Found | 0, removing\n");
888 ops->pop ();
889 reassociate_stats.ops_eliminated++;
892 break;
893 case MULT_EXPR:
894 if (integer_zerop (oelast->op)
895 || (FLOAT_TYPE_P (type)
896 && !HONOR_NANS (TYPE_MODE (type))
897 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
898 && real_zerop (oelast->op)))
900 if (ops->length () != 1)
902 if (dump_file && (dump_flags & TDF_DETAILS))
903 fprintf (dump_file, "Found * 0, removing all other ops\n");
905 reassociate_stats.ops_eliminated += ops->length () - 1;
906 ops->truncate (1);
907 ops->quick_push (oelast);
908 return;
911 else if (integer_onep (oelast->op)
912 || (FLOAT_TYPE_P (type)
913 && !HONOR_SNANS (TYPE_MODE (type))
914 && real_onep (oelast->op)))
916 if (ops->length () != 1)
918 if (dump_file && (dump_flags & TDF_DETAILS))
919 fprintf (dump_file, "Found * 1, removing\n");
920 ops->pop ();
921 reassociate_stats.ops_eliminated++;
922 return;
925 break;
926 case BIT_XOR_EXPR:
927 case PLUS_EXPR:
928 case MINUS_EXPR:
929 if (integer_zerop (oelast->op)
930 || (FLOAT_TYPE_P (type)
931 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
932 && fold_real_zero_addition_p (type, oelast->op,
933 opcode == MINUS_EXPR)))
935 if (ops->length () != 1)
937 if (dump_file && (dump_flags & TDF_DETAILS))
938 fprintf (dump_file, "Found [|^+] 0, removing\n");
939 ops->pop ();
940 reassociate_stats.ops_eliminated++;
941 return;
944 break;
945 default:
946 break;
952 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
953 bool, bool);
955 /* Structure for tracking and counting operands. */
956 typedef struct oecount_s {
957 int cnt;
958 int id;
959 enum tree_code oecode;
960 tree op;
961 } oecount;
964 /* The heap for the oecount hashtable and the sorted list of operands. */
965 static vec<oecount> cvec;
968 /* Oecount hashtable helpers. */
970 struct oecount_hasher : typed_noop_remove <void>
972 /* Note that this hash table stores integers, not pointers.
973 So, observe the casting in the member functions. */
974 typedef void value_type;
975 typedef void compare_type;
976 static inline hashval_t hash (const value_type *);
977 static inline bool equal (const value_type *, const compare_type *);
980 /* Hash function for oecount. */
982 inline hashval_t
983 oecount_hasher::hash (const value_type *p)
985 const oecount *c = &cvec[(size_t)p - 42];
986 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
989 /* Comparison function for oecount. */
991 inline bool
992 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
994 const oecount *c1 = &cvec[(size_t)p1 - 42];
995 const oecount *c2 = &cvec[(size_t)p2 - 42];
996 return (c1->oecode == c2->oecode
997 && c1->op == c2->op);
1000 /* Comparison function for qsort sorting oecount elements by count. */
1002 static int
1003 oecount_cmp (const void *p1, const void *p2)
1005 const oecount *c1 = (const oecount *)p1;
1006 const oecount *c2 = (const oecount *)p2;
1007 if (c1->cnt != c2->cnt)
1008 return c1->cnt - c2->cnt;
1009 else
1010 /* If counts are identical, use unique IDs to stabilize qsort. */
1011 return c1->id - c2->id;
1014 /* Return TRUE iff STMT represents a builtin call that raises OP
1015 to some exponent. */
1017 static bool
1018 stmt_is_power_of_op (gimple stmt, tree op)
1020 tree fndecl;
1022 if (!is_gimple_call (stmt))
1023 return false;
1025 fndecl = gimple_call_fndecl (stmt);
1027 if (!fndecl
1028 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1029 return false;
1031 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1033 CASE_FLT_FN (BUILT_IN_POW):
1034 CASE_FLT_FN (BUILT_IN_POWI):
1035 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1037 default:
1038 return false;
1042 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1043 in place and return the result. Assumes that stmt_is_power_of_op
1044 was previously called for STMT and returned TRUE. */
1046 static HOST_WIDE_INT
1047 decrement_power (gimple stmt)
1049 REAL_VALUE_TYPE c, cint;
1050 HOST_WIDE_INT power;
1051 tree arg1;
1053 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1055 CASE_FLT_FN (BUILT_IN_POW):
1056 arg1 = gimple_call_arg (stmt, 1);
1057 c = TREE_REAL_CST (arg1);
1058 power = real_to_integer (&c) - 1;
1059 real_from_integer (&cint, VOIDmode, power, 0, 0);
1060 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1061 return power;
1063 CASE_FLT_FN (BUILT_IN_POWI):
1064 arg1 = gimple_call_arg (stmt, 1);
1065 power = TREE_INT_CST_LOW (arg1) - 1;
1066 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1067 return power;
1069 default:
1070 gcc_unreachable ();
1074 /* Find the single immediate use of STMT's LHS, and replace it
1075 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1076 replace *DEF with OP as well. */
1078 static void
1079 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1081 tree lhs;
1082 gimple use_stmt;
1083 use_operand_p use;
1084 gimple_stmt_iterator gsi;
1086 if (is_gimple_call (stmt))
1087 lhs = gimple_call_lhs (stmt);
1088 else
1089 lhs = gimple_assign_lhs (stmt);
1091 gcc_assert (has_single_use (lhs));
1092 single_imm_use (lhs, &use, &use_stmt);
1093 if (lhs == *def)
1094 *def = op;
1095 SET_USE (use, op);
1096 if (TREE_CODE (op) != SSA_NAME)
1097 update_stmt (use_stmt);
1098 gsi = gsi_for_stmt (stmt);
1099 unlink_stmt_vdef (stmt);
1100 gsi_remove (&gsi, true);
1101 release_defs (stmt);
1104 /* Walks the linear chain with result *DEF searching for an operation
1105 with operand OP and code OPCODE removing that from the chain. *DEF
1106 is updated if there is only one operand but no operation left. */
1108 static void
1109 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1111 gimple stmt = SSA_NAME_DEF_STMT (*def);
1115 tree name;
1117 if (opcode == MULT_EXPR
1118 && stmt_is_power_of_op (stmt, op))
1120 if (decrement_power (stmt) == 1)
1121 propagate_op_to_single_use (op, stmt, def);
1122 return;
1125 name = gimple_assign_rhs1 (stmt);
1127 /* If this is the operation we look for and one of the operands
1128 is ours simply propagate the other operand into the stmts
1129 single use. */
1130 if (gimple_assign_rhs_code (stmt) == opcode
1131 && (name == op
1132 || gimple_assign_rhs2 (stmt) == op))
1134 if (name == op)
1135 name = gimple_assign_rhs2 (stmt);
1136 propagate_op_to_single_use (name, stmt, def);
1137 return;
1140 /* We might have a multiply of two __builtin_pow* calls, and
1141 the operand might be hiding in the rightmost one. */
1142 if (opcode == MULT_EXPR
1143 && gimple_assign_rhs_code (stmt) == opcode
1144 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1145 && has_single_use (gimple_assign_rhs2 (stmt)))
1147 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1148 if (stmt_is_power_of_op (stmt2, op))
1150 if (decrement_power (stmt2) == 1)
1151 propagate_op_to_single_use (op, stmt2, def);
1152 return;
1156 /* Continue walking the chain. */
1157 gcc_assert (name != op
1158 && TREE_CODE (name) == SSA_NAME);
1159 stmt = SSA_NAME_DEF_STMT (name);
1161 while (1);
1164 /* Returns true if statement S1 dominates statement S2. Like
1165 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1167 static bool
1168 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1170 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1172 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1173 SSA_NAME. Assume it lives at the beginning of function and
1174 thus dominates everything. */
1175 if (!bb1 || s1 == s2)
1176 return true;
1178 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1179 if (!bb2)
1180 return false;
1182 if (bb1 == bb2)
1184 /* PHIs in the same basic block are assumed to be
1185 executed all in parallel, if only one stmt is a PHI,
1186 it dominates the other stmt in the same basic block. */
1187 if (gimple_code (s1) == GIMPLE_PHI)
1188 return true;
1190 if (gimple_code (s2) == GIMPLE_PHI)
1191 return false;
1193 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1195 if (gimple_uid (s1) < gimple_uid (s2))
1196 return true;
1198 if (gimple_uid (s1) > gimple_uid (s2))
1199 return false;
1201 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1202 unsigned int uid = gimple_uid (s1);
1203 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1205 gimple s = gsi_stmt (gsi);
1206 if (gimple_uid (s) != uid)
1207 break;
1208 if (s == s2)
1209 return true;
1212 return false;
1215 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1218 /* Insert STMT after INSERT_POINT. */
1220 static void
1221 insert_stmt_after (gimple stmt, gimple insert_point)
1223 gimple_stmt_iterator gsi;
1224 basic_block bb;
1226 if (gimple_code (insert_point) == GIMPLE_PHI)
1227 bb = gimple_bb (insert_point);
1228 else if (!stmt_ends_bb_p (insert_point))
1230 gsi = gsi_for_stmt (insert_point);
1231 gimple_set_uid (stmt, gimple_uid (insert_point));
1232 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1233 return;
1235 else
1236 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1237 thus if it must end a basic block, it should be a call that can
1238 throw, or some assignment that can throw. If it throws, the LHS
1239 of it will not be initialized though, so only valid places using
1240 the SSA_NAME should be dominated by the fallthru edge. */
1241 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1242 gsi = gsi_after_labels (bb);
1243 if (gsi_end_p (gsi))
1245 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1246 gimple_set_uid (stmt,
1247 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1249 else
1250 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1251 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1254 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1255 the result. Places the statement after the definition of either
1256 OP1 or OP2. Returns the new statement. */
1258 static gimple
1259 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1261 gimple op1def = NULL, op2def = NULL;
1262 gimple_stmt_iterator gsi;
1263 tree op;
1264 gimple sum;
1266 /* Create the addition statement. */
1267 op = make_ssa_name (type, NULL);
1268 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1270 /* Find an insertion place and insert. */
1271 if (TREE_CODE (op1) == SSA_NAME)
1272 op1def = SSA_NAME_DEF_STMT (op1);
1273 if (TREE_CODE (op2) == SSA_NAME)
1274 op2def = SSA_NAME_DEF_STMT (op2);
1275 if ((!op1def || gimple_nop_p (op1def))
1276 && (!op2def || gimple_nop_p (op2def)))
1278 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1279 if (gsi_end_p (gsi))
1281 gimple_stmt_iterator gsi2
1282 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1283 gimple_set_uid (sum,
1284 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1286 else
1287 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1288 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1290 else
1292 gimple insert_point;
1293 if ((!op1def || gimple_nop_p (op1def))
1294 || (op2def && !gimple_nop_p (op2def)
1295 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1296 insert_point = op2def;
1297 else
1298 insert_point = op1def;
1299 insert_stmt_after (sum, insert_point);
1301 update_stmt (sum);
1303 return sum;
1306 /* Perform un-distribution of divisions and multiplications.
1307 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1308 to (A + B) / X for real X.
1310 The algorithm is organized as follows.
1312 - First we walk the addition chain *OPS looking for summands that
1313 are defined by a multiplication or a real division. This results
1314 in the candidates bitmap with relevant indices into *OPS.
1316 - Second we build the chains of multiplications or divisions for
1317 these candidates, counting the number of occurrences of (operand, code)
1318 pairs in all of the candidates chains.
1320 - Third we sort the (operand, code) pairs by number of occurrence and
1321 process them starting with the pair with the most uses.
1323 * For each such pair we walk the candidates again to build a
1324 second candidate bitmap noting all multiplication/division chains
1325 that have at least one occurrence of (operand, code).
1327 * We build an alternate addition chain only covering these
1328 candidates with one (operand, code) operation removed from their
1329 multiplication/division chain.
1331 * The first candidate gets replaced by the alternate addition chain
1332 multiplied/divided by the operand.
1334 * All candidate chains get disabled for further processing and
1335 processing of (operand, code) pairs continues.
1337 The alternate addition chains built are re-processed by the main
1338 reassociation algorithm which allows optimizing a * x * y + b * y * x
1339 to (a + b ) * x * y in one invocation of the reassociation pass. */
1341 static bool
1342 undistribute_ops_list (enum tree_code opcode,
1343 vec<operand_entry_t> *ops, struct loop *loop)
1345 unsigned int length = ops->length ();
1346 operand_entry_t oe1;
1347 unsigned i, j;
1348 sbitmap candidates, candidates2;
1349 unsigned nr_candidates, nr_candidates2;
1350 sbitmap_iterator sbi0;
1351 vec<operand_entry_t> *subops;
1352 hash_table <oecount_hasher> ctable;
1353 bool changed = false;
1354 int next_oecount_id = 0;
1356 if (length <= 1
1357 || opcode != PLUS_EXPR)
1358 return false;
1360 /* Build a list of candidates to process. */
1361 candidates = sbitmap_alloc (length);
1362 bitmap_clear (candidates);
1363 nr_candidates = 0;
1364 FOR_EACH_VEC_ELT (*ops, i, oe1)
1366 enum tree_code dcode;
1367 gimple oe1def;
1369 if (TREE_CODE (oe1->op) != SSA_NAME)
1370 continue;
1371 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1372 if (!is_gimple_assign (oe1def))
1373 continue;
1374 dcode = gimple_assign_rhs_code (oe1def);
1375 if ((dcode != MULT_EXPR
1376 && dcode != RDIV_EXPR)
1377 || !is_reassociable_op (oe1def, dcode, loop))
1378 continue;
1380 bitmap_set_bit (candidates, i);
1381 nr_candidates++;
1384 if (nr_candidates < 2)
1386 sbitmap_free (candidates);
1387 return false;
1390 if (dump_file && (dump_flags & TDF_DETAILS))
1392 fprintf (dump_file, "searching for un-distribute opportunities ");
1393 print_generic_expr (dump_file,
1394 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1395 fprintf (dump_file, " %d\n", nr_candidates);
1398 /* Build linearized sub-operand lists and the counting table. */
1399 cvec.create (0);
1400 ctable.create (15);
1401 /* ??? Macro arguments cannot have multi-argument template types in
1402 them. This typedef is needed to workaround that limitation. */
1403 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1404 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1405 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1407 gimple oedef;
1408 enum tree_code oecode;
1409 unsigned j;
1411 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1412 oecode = gimple_assign_rhs_code (oedef);
1413 linearize_expr_tree (&subops[i], oedef,
1414 associative_tree_code (oecode), false);
1416 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1418 oecount c;
1419 void **slot;
1420 size_t idx;
1421 c.oecode = oecode;
1422 c.cnt = 1;
1423 c.id = next_oecount_id++;
1424 c.op = oe1->op;
1425 cvec.safe_push (c);
1426 idx = cvec.length () + 41;
1427 slot = ctable.find_slot ((void *)idx, INSERT);
1428 if (!*slot)
1430 *slot = (void *)idx;
1432 else
1434 cvec.pop ();
1435 cvec[(size_t)*slot - 42].cnt++;
1439 ctable.dispose ();
1441 /* Sort the counting table. */
1442 cvec.qsort (oecount_cmp);
1444 if (dump_file && (dump_flags & TDF_DETAILS))
1446 oecount *c;
1447 fprintf (dump_file, "Candidates:\n");
1448 FOR_EACH_VEC_ELT (cvec, j, c)
1450 fprintf (dump_file, " %u %s: ", c->cnt,
1451 c->oecode == MULT_EXPR
1452 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1453 print_generic_expr (dump_file, c->op, 0);
1454 fprintf (dump_file, "\n");
1458 /* Process the (operand, code) pairs in order of most occurrence. */
1459 candidates2 = sbitmap_alloc (length);
1460 while (!cvec.is_empty ())
1462 oecount *c = &cvec.last ();
1463 if (c->cnt < 2)
1464 break;
1466 /* Now collect the operands in the outer chain that contain
1467 the common operand in their inner chain. */
1468 bitmap_clear (candidates2);
1469 nr_candidates2 = 0;
1470 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1472 gimple oedef;
1473 enum tree_code oecode;
1474 unsigned j;
1475 tree op = (*ops)[i]->op;
1477 /* If we undistributed in this chain already this may be
1478 a constant. */
1479 if (TREE_CODE (op) != SSA_NAME)
1480 continue;
1482 oedef = SSA_NAME_DEF_STMT (op);
1483 oecode = gimple_assign_rhs_code (oedef);
1484 if (oecode != c->oecode)
1485 continue;
1487 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1489 if (oe1->op == c->op)
1491 bitmap_set_bit (candidates2, i);
1492 ++nr_candidates2;
1493 break;
1498 if (nr_candidates2 >= 2)
1500 operand_entry_t oe1, oe2;
1501 gimple prod;
1502 int first = bitmap_first_set_bit (candidates2);
1504 /* Build the new addition chain. */
1505 oe1 = (*ops)[first];
1506 if (dump_file && (dump_flags & TDF_DETAILS))
1508 fprintf (dump_file, "Building (");
1509 print_generic_expr (dump_file, oe1->op, 0);
1511 zero_one_operation (&oe1->op, c->oecode, c->op);
1512 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1514 gimple sum;
1515 oe2 = (*ops)[i];
1516 if (dump_file && (dump_flags & TDF_DETAILS))
1518 fprintf (dump_file, " + ");
1519 print_generic_expr (dump_file, oe2->op, 0);
1521 zero_one_operation (&oe2->op, c->oecode, c->op);
1522 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1523 oe1->op, oe2->op, opcode);
1524 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1525 oe2->rank = 0;
1526 oe1->op = gimple_get_lhs (sum);
1529 /* Apply the multiplication/division. */
1530 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1531 oe1->op, c->op, c->oecode);
1532 if (dump_file && (dump_flags & TDF_DETAILS))
1534 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1535 print_generic_expr (dump_file, c->op, 0);
1536 fprintf (dump_file, "\n");
1539 /* Record it in the addition chain and disable further
1540 undistribution with this op. */
1541 oe1->op = gimple_assign_lhs (prod);
1542 oe1->rank = get_rank (oe1->op);
1543 subops[first].release ();
1545 changed = true;
1548 cvec.pop ();
1551 for (i = 0; i < ops->length (); ++i)
1552 subops[i].release ();
1553 free (subops);
1554 cvec.release ();
1555 sbitmap_free (candidates);
1556 sbitmap_free (candidates2);
1558 return changed;
1561 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1562 expression, examine the other OPS to see if any of them are comparisons
1563 of the same values, which we may be able to combine or eliminate.
1564 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1566 static bool
1567 eliminate_redundant_comparison (enum tree_code opcode,
1568 vec<operand_entry_t> *ops,
1569 unsigned int currindex,
1570 operand_entry_t curr)
1572 tree op1, op2;
1573 enum tree_code lcode, rcode;
1574 gimple def1, def2;
1575 int i;
1576 operand_entry_t oe;
1578 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1579 return false;
1581 /* Check that CURR is a comparison. */
1582 if (TREE_CODE (curr->op) != SSA_NAME)
1583 return false;
1584 def1 = SSA_NAME_DEF_STMT (curr->op);
1585 if (!is_gimple_assign (def1))
1586 return false;
1587 lcode = gimple_assign_rhs_code (def1);
1588 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1589 return false;
1590 op1 = gimple_assign_rhs1 (def1);
1591 op2 = gimple_assign_rhs2 (def1);
1593 /* Now look for a similar comparison in the remaining OPS. */
1594 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1596 tree t;
1598 if (TREE_CODE (oe->op) != SSA_NAME)
1599 continue;
1600 def2 = SSA_NAME_DEF_STMT (oe->op);
1601 if (!is_gimple_assign (def2))
1602 continue;
1603 rcode = gimple_assign_rhs_code (def2);
1604 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1605 continue;
1607 /* If we got here, we have a match. See if we can combine the
1608 two comparisons. */
1609 if (opcode == BIT_IOR_EXPR)
1610 t = maybe_fold_or_comparisons (lcode, op1, op2,
1611 rcode, gimple_assign_rhs1 (def2),
1612 gimple_assign_rhs2 (def2));
1613 else
1614 t = maybe_fold_and_comparisons (lcode, op1, op2,
1615 rcode, gimple_assign_rhs1 (def2),
1616 gimple_assign_rhs2 (def2));
1617 if (!t)
1618 continue;
1620 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1621 always give us a boolean_type_node value back. If the original
1622 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1623 we need to convert. */
1624 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1625 t = fold_convert (TREE_TYPE (curr->op), t);
1627 if (TREE_CODE (t) != INTEGER_CST
1628 && !operand_equal_p (t, curr->op, 0))
1630 enum tree_code subcode;
1631 tree newop1, newop2;
1632 if (!COMPARISON_CLASS_P (t))
1633 continue;
1634 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1635 STRIP_USELESS_TYPE_CONVERSION (newop1);
1636 STRIP_USELESS_TYPE_CONVERSION (newop2);
1637 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1638 continue;
1641 if (dump_file && (dump_flags & TDF_DETAILS))
1643 fprintf (dump_file, "Equivalence: ");
1644 print_generic_expr (dump_file, curr->op, 0);
1645 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1646 print_generic_expr (dump_file, oe->op, 0);
1647 fprintf (dump_file, " -> ");
1648 print_generic_expr (dump_file, t, 0);
1649 fprintf (dump_file, "\n");
1652 /* Now we can delete oe, as it has been subsumed by the new combined
1653 expression t. */
1654 ops->ordered_remove (i);
1655 reassociate_stats.ops_eliminated ++;
1657 /* If t is the same as curr->op, we're done. Otherwise we must
1658 replace curr->op with t. Special case is if we got a constant
1659 back, in which case we add it to the end instead of in place of
1660 the current entry. */
1661 if (TREE_CODE (t) == INTEGER_CST)
1663 ops->ordered_remove (currindex);
1664 add_to_ops_vec (ops, t);
1666 else if (!operand_equal_p (t, curr->op, 0))
1668 gimple sum;
1669 enum tree_code subcode;
1670 tree newop1;
1671 tree newop2;
1672 gcc_assert (COMPARISON_CLASS_P (t));
1673 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1674 STRIP_USELESS_TYPE_CONVERSION (newop1);
1675 STRIP_USELESS_TYPE_CONVERSION (newop2);
1676 gcc_checking_assert (is_gimple_val (newop1)
1677 && is_gimple_val (newop2));
1678 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1679 curr->op = gimple_get_lhs (sum);
1681 return true;
1684 return false;
1687 /* Perform various identities and other optimizations on the list of
1688 operand entries, stored in OPS. The tree code for the binary
1689 operation between all the operands is OPCODE. */
1691 static void
1692 optimize_ops_list (enum tree_code opcode,
1693 vec<operand_entry_t> *ops)
1695 unsigned int length = ops->length ();
1696 unsigned int i;
1697 operand_entry_t oe;
1698 operand_entry_t oelast = NULL;
1699 bool iterate = false;
1701 if (length == 1)
1702 return;
1704 oelast = ops->last ();
1706 /* If the last two are constants, pop the constants off, merge them
1707 and try the next two. */
1708 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1710 operand_entry_t oelm1 = (*ops)[length - 2];
1712 if (oelm1->rank == 0
1713 && is_gimple_min_invariant (oelm1->op)
1714 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1715 TREE_TYPE (oelast->op)))
1717 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1718 oelm1->op, oelast->op);
1720 if (folded && is_gimple_min_invariant (folded))
1722 if (dump_file && (dump_flags & TDF_DETAILS))
1723 fprintf (dump_file, "Merging constants\n");
1725 ops->pop ();
1726 ops->pop ();
1728 add_to_ops_vec (ops, folded);
1729 reassociate_stats.constants_eliminated++;
1731 optimize_ops_list (opcode, ops);
1732 return;
1737 eliminate_using_constants (opcode, ops);
1738 oelast = NULL;
1740 for (i = 0; ops->iterate (i, &oe);)
1742 bool done = false;
1744 if (eliminate_not_pairs (opcode, ops, i, oe))
1745 return;
1746 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1747 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1748 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1750 if (done)
1751 return;
1752 iterate = true;
1753 oelast = NULL;
1754 continue;
1756 oelast = oe;
1757 i++;
1760 length = ops->length ();
1761 oelast = ops->last ();
1763 if (iterate)
1764 optimize_ops_list (opcode, ops);
1767 /* The following functions are subroutines to optimize_range_tests and allow
1768 it to try to change a logical combination of comparisons into a range
1769 test.
1771 For example, both
1772 X == 2 || X == 5 || X == 3 || X == 4
1774 X >= 2 && X <= 5
1775 are converted to
1776 (unsigned) (X - 2) <= 3
1778 For more information see comments above fold_test_range in fold-const.c,
1779 this implementation is for GIMPLE. */
1781 struct range_entry
1783 tree exp;
1784 tree low;
1785 tree high;
1786 bool in_p;
1787 bool strict_overflow_p;
1788 unsigned int idx, next;
1791 /* This is similar to make_range in fold-const.c, but on top of
1792 GIMPLE instead of trees. If EXP is non-NULL, it should be
1793 an SSA_NAME and STMT argument is ignored, otherwise STMT
1794 argument should be a GIMPLE_COND. */
1796 static void
1797 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1799 int in_p;
1800 tree low, high;
1801 bool is_bool, strict_overflow_p;
1803 r->exp = NULL_TREE;
1804 r->in_p = false;
1805 r->strict_overflow_p = false;
1806 r->low = NULL_TREE;
1807 r->high = NULL_TREE;
1808 if (exp != NULL_TREE
1809 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1810 return;
1812 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1813 and see if we can refine the range. Some of the cases below may not
1814 happen, but it doesn't seem worth worrying about this. We "continue"
1815 the outer loop when we've changed something; otherwise we "break"
1816 the switch, which will "break" the while. */
1817 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1818 high = low;
1819 in_p = 0;
1820 strict_overflow_p = false;
1821 is_bool = false;
1822 if (exp == NULL_TREE)
1823 is_bool = true;
1824 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1826 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1827 is_bool = true;
1828 else
1829 return;
1831 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1832 is_bool = true;
1834 while (1)
1836 enum tree_code code;
1837 tree arg0, arg1, exp_type;
1838 tree nexp;
1839 location_t loc;
1841 if (exp != NULL_TREE)
1843 if (TREE_CODE (exp) != SSA_NAME)
1844 break;
1846 stmt = SSA_NAME_DEF_STMT (exp);
1847 if (!is_gimple_assign (stmt))
1848 break;
1850 code = gimple_assign_rhs_code (stmt);
1851 arg0 = gimple_assign_rhs1 (stmt);
1852 arg1 = gimple_assign_rhs2 (stmt);
1853 exp_type = TREE_TYPE (exp);
1855 else
1857 code = gimple_cond_code (stmt);
1858 arg0 = gimple_cond_lhs (stmt);
1859 arg1 = gimple_cond_rhs (stmt);
1860 exp_type = boolean_type_node;
1863 if (TREE_CODE (arg0) != SSA_NAME)
1864 break;
1865 loc = gimple_location (stmt);
1866 switch (code)
1868 case BIT_NOT_EXPR:
1869 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1870 /* Ensure the range is either +[-,0], +[0,0],
1871 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1872 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1873 or similar expression of unconditional true or
1874 false, it should not be negated. */
1875 && ((high && integer_zerop (high))
1876 || (low && integer_onep (low))))
1878 in_p = !in_p;
1879 exp = arg0;
1880 continue;
1882 break;
1883 case SSA_NAME:
1884 exp = arg0;
1885 continue;
1886 CASE_CONVERT:
1887 if (is_bool)
1888 goto do_default;
1889 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1891 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1892 is_bool = true;
1893 else
1894 return;
1896 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1897 is_bool = true;
1898 goto do_default;
1899 case EQ_EXPR:
1900 case NE_EXPR:
1901 case LT_EXPR:
1902 case LE_EXPR:
1903 case GE_EXPR:
1904 case GT_EXPR:
1905 is_bool = true;
1906 /* FALLTHRU */
1907 default:
1908 if (!is_bool)
1909 return;
1910 do_default:
1911 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1912 &low, &high, &in_p,
1913 &strict_overflow_p);
1914 if (nexp != NULL_TREE)
1916 exp = nexp;
1917 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1918 continue;
1920 break;
1922 break;
1924 if (is_bool)
1926 r->exp = exp;
1927 r->in_p = in_p;
1928 r->low = low;
1929 r->high = high;
1930 r->strict_overflow_p = strict_overflow_p;
1934 /* Comparison function for qsort. Sort entries
1935 without SSA_NAME exp first, then with SSA_NAMEs sorted
1936 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1937 by increasing ->low and if ->low is the same, by increasing
1938 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1939 maximum. */
1941 static int
1942 range_entry_cmp (const void *a, const void *b)
1944 const struct range_entry *p = (const struct range_entry *) a;
1945 const struct range_entry *q = (const struct range_entry *) b;
1947 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1949 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1951 /* Group range_entries for the same SSA_NAME together. */
1952 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1953 return -1;
1954 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1955 return 1;
1956 /* If ->low is different, NULL low goes first, then by
1957 ascending low. */
1958 if (p->low != NULL_TREE)
1960 if (q->low != NULL_TREE)
1962 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1963 p->low, q->low);
1964 if (tem && integer_onep (tem))
1965 return -1;
1966 tem = fold_binary (GT_EXPR, boolean_type_node,
1967 p->low, q->low);
1968 if (tem && integer_onep (tem))
1969 return 1;
1971 else
1972 return 1;
1974 else if (q->low != NULL_TREE)
1975 return -1;
1976 /* If ->high is different, NULL high goes last, before that by
1977 ascending high. */
1978 if (p->high != NULL_TREE)
1980 if (q->high != NULL_TREE)
1982 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1983 p->high, q->high);
1984 if (tem && integer_onep (tem))
1985 return -1;
1986 tem = fold_binary (GT_EXPR, boolean_type_node,
1987 p->high, q->high);
1988 if (tem && integer_onep (tem))
1989 return 1;
1991 else
1992 return -1;
1994 else if (p->high != NULL_TREE)
1995 return 1;
1996 /* If both ranges are the same, sort below by ascending idx. */
1998 else
1999 return 1;
2001 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2002 return -1;
2004 if (p->idx < q->idx)
2005 return -1;
2006 else
2008 gcc_checking_assert (p->idx > q->idx);
2009 return 1;
2013 /* Helper routine of optimize_range_test.
2014 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2015 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2016 OPCODE and OPS are arguments of optimize_range_tests. Return
2017 true if the range merge has been successful.
2018 If OPCODE is ERROR_MARK, this is called from within
2019 maybe_optimize_range_tests and is performing inter-bb range optimization.
2020 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2021 oe->rank. */
2023 static bool
2024 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2025 unsigned int count, enum tree_code opcode,
2026 vec<operand_entry_t> *ops, tree exp, bool in_p,
2027 tree low, tree high, bool strict_overflow_p)
2029 operand_entry_t oe = (*ops)[range->idx];
2030 tree op = oe->op;
2031 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
2032 location_t loc = gimple_location (stmt);
2033 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2034 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2035 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2036 gimple_stmt_iterator gsi;
2038 if (tem == NULL_TREE)
2039 return false;
2041 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2042 warning_at (loc, OPT_Wstrict_overflow,
2043 "assuming signed overflow does not occur "
2044 "when simplifying range test");
2046 if (dump_file && (dump_flags & TDF_DETAILS))
2048 struct range_entry *r;
2049 fprintf (dump_file, "Optimizing range tests ");
2050 print_generic_expr (dump_file, range->exp, 0);
2051 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2052 print_generic_expr (dump_file, range->low, 0);
2053 fprintf (dump_file, ", ");
2054 print_generic_expr (dump_file, range->high, 0);
2055 fprintf (dump_file, "]");
2056 for (r = otherrange; r < otherrange + count; r++)
2058 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2059 print_generic_expr (dump_file, r->low, 0);
2060 fprintf (dump_file, ", ");
2061 print_generic_expr (dump_file, r->high, 0);
2062 fprintf (dump_file, "]");
2064 fprintf (dump_file, "\n into ");
2065 print_generic_expr (dump_file, tem, 0);
2066 fprintf (dump_file, "\n");
2069 if (opcode == BIT_IOR_EXPR
2070 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2071 tem = invert_truthvalue_loc (loc, tem);
2073 tem = fold_convert_loc (loc, optype, tem);
2074 gsi = gsi_for_stmt (stmt);
2075 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2076 GSI_SAME_STMT);
2077 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
2078 if (gimple_uid (gsi_stmt (gsi)))
2079 break;
2080 else
2081 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2083 oe->op = tem;
2084 range->exp = exp;
2085 range->low = low;
2086 range->high = high;
2087 range->in_p = in_p;
2088 range->strict_overflow_p = false;
2090 for (range = otherrange; range < otherrange + count; range++)
2092 oe = (*ops)[range->idx];
2093 /* Now change all the other range test immediate uses, so that
2094 those tests will be optimized away. */
2095 if (opcode == ERROR_MARK)
2097 if (oe->op)
2098 oe->op = build_int_cst (TREE_TYPE (oe->op),
2099 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2100 else
2101 oe->op = (oe->rank == BIT_IOR_EXPR
2102 ? boolean_false_node : boolean_true_node);
2104 else
2105 oe->op = error_mark_node;
2106 range->exp = NULL_TREE;
2108 return true;
2111 /* Optimize X == CST1 || X == CST2
2112 if popcount (CST1 ^ CST2) == 1 into
2113 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2114 Similarly for ranges. E.g.
2115 X != 2 && X != 3 && X != 10 && X != 11
2116 will be transformed by the previous optimization into
2117 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2118 and this loop can transform that into
2119 !(((X & ~8) - 2U) <= 1U). */
2121 static bool
2122 optimize_range_tests_xor (enum tree_code opcode, tree type,
2123 tree lowi, tree lowj, tree highi, tree highj,
2124 vec<operand_entry_t> *ops,
2125 struct range_entry *rangei,
2126 struct range_entry *rangej)
2128 tree lowxor, highxor, tem, exp;
2129 /* Check highi ^ lowi == highj ^ lowj and
2130 popcount (highi ^ lowi) == 1. */
2131 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2132 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2133 return false;
2134 if (tree_log2 (lowxor) < 0)
2135 return false;
2136 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2137 if (!tree_int_cst_equal (lowxor, highxor))
2138 return false;
2140 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2141 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2142 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2143 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2144 if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2145 rangei->in_p, lowj, highj,
2146 rangei->strict_overflow_p
2147 || rangej->strict_overflow_p))
2148 return true;
2149 return false;
2152 /* Optimize X == CST1 || X == CST2
2153 if popcount (CST2 - CST1) == 1 into
2154 ((X - CST1) & ~(CST2 - CST1)) == 0.
2155 Similarly for ranges. E.g.
2156 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2157 || X == 75 || X == 45
2158 will be transformed by the previous optimization into
2159 (X - 43U) <= 3U || (X - 75U) <= 3U
2160 and this loop can transform that into
2161 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2162 static bool
2163 optimize_range_tests_diff (enum tree_code opcode, tree type,
2164 tree lowi, tree lowj, tree highi, tree highj,
2165 vec<operand_entry_t> *ops,
2166 struct range_entry *rangei,
2167 struct range_entry *rangej)
2169 tree tem1, tem2, mask;
2170 /* Check highi - lowi == highj - lowj. */
2171 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2172 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2173 return false;
2174 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2175 if (!tree_int_cst_equal (tem1, tem2))
2176 return false;
2177 /* Check popcount (lowj - lowi) == 1. */
2178 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2179 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2180 return false;
2181 if (tree_log2 (tem1) < 0)
2182 return false;
2184 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2185 tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2186 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2187 lowj = build_int_cst (type, 0);
2188 if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2189 rangei->in_p, lowj, tem2,
2190 rangei->strict_overflow_p
2191 || rangej->strict_overflow_p))
2192 return true;
2193 return false;
2196 /* It does some common checks for function optimize_range_tests_xor and
2197 optimize_range_tests_diff.
2198 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2199 Else it calls optimize_range_tests_diff. */
2201 static bool
2202 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2203 bool optimize_xor, vec<operand_entry_t> *ops,
2204 struct range_entry *ranges)
2206 int i, j;
2207 bool any_changes = false;
2208 for (i = first; i < length; i++)
2210 tree lowi, highi, lowj, highj, type, tem;
2212 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2213 continue;
2214 type = TREE_TYPE (ranges[i].exp);
2215 if (!INTEGRAL_TYPE_P (type))
2216 continue;
2217 lowi = ranges[i].low;
2218 if (lowi == NULL_TREE)
2219 lowi = TYPE_MIN_VALUE (type);
2220 highi = ranges[i].high;
2221 if (highi == NULL_TREE)
2222 continue;
2223 for (j = i + 1; j < length && j < i + 64; j++)
2225 bool changes;
2226 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2227 continue;
2228 lowj = ranges[j].low;
2229 if (lowj == NULL_TREE)
2230 continue;
2231 highj = ranges[j].high;
2232 if (highj == NULL_TREE)
2233 highj = TYPE_MAX_VALUE (type);
2234 /* Check lowj > highi. */
2235 tem = fold_binary (GT_EXPR, boolean_type_node,
2236 lowj, highi);
2237 if (tem == NULL_TREE || !integer_onep (tem))
2238 continue;
2239 if (optimize_xor)
2240 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2241 highi, highj, ops,
2242 ranges + i, ranges + j);
2243 else
2244 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2245 highi, highj, ops,
2246 ranges + i, ranges + j);
2247 if (changes)
2249 any_changes = true;
2250 break;
2254 return any_changes;
2257 /* Optimize range tests, similarly how fold_range_test optimizes
2258 it on trees. The tree code for the binary
2259 operation between all the operands is OPCODE.
2260 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2261 maybe_optimize_range_tests for inter-bb range optimization.
2262 In that case if oe->op is NULL, oe->id is bb->index whose
2263 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2264 the actual opcode. */
2266 static bool
2267 optimize_range_tests (enum tree_code opcode,
2268 vec<operand_entry_t> *ops)
2270 unsigned int length = ops->length (), i, j, first;
2271 operand_entry_t oe;
2272 struct range_entry *ranges;
2273 bool any_changes = false;
2275 if (length == 1)
2276 return false;
2278 ranges = XNEWVEC (struct range_entry, length);
2279 for (i = 0; i < length; i++)
2281 oe = (*ops)[i];
2282 ranges[i].idx = i;
2283 init_range_entry (ranges + i, oe->op,
2284 oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2285 /* For | invert it now, we will invert it again before emitting
2286 the optimized expression. */
2287 if (opcode == BIT_IOR_EXPR
2288 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2289 ranges[i].in_p = !ranges[i].in_p;
2292 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2293 for (i = 0; i < length; i++)
2294 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2295 break;
2297 /* Try to merge ranges. */
2298 for (first = i; i < length; i++)
2300 tree low = ranges[i].low;
2301 tree high = ranges[i].high;
2302 int in_p = ranges[i].in_p;
2303 bool strict_overflow_p = ranges[i].strict_overflow_p;
2304 int update_fail_count = 0;
2306 for (j = i + 1; j < length; j++)
2308 if (ranges[i].exp != ranges[j].exp)
2309 break;
2310 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2311 ranges[j].in_p, ranges[j].low, ranges[j].high))
2312 break;
2313 strict_overflow_p |= ranges[j].strict_overflow_p;
2316 if (j == i + 1)
2317 continue;
2319 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2320 ops, ranges[i].exp, in_p, low, high,
2321 strict_overflow_p))
2323 i = j - 1;
2324 any_changes = true;
2326 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2327 while update_range_test would fail. */
2328 else if (update_fail_count == 64)
2329 i = j - 1;
2330 else
2331 ++update_fail_count;
2334 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2335 ops, ranges);
2337 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2338 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2339 ops, ranges);
2341 if (any_changes && opcode != ERROR_MARK)
2343 j = 0;
2344 FOR_EACH_VEC_ELT (*ops, i, oe)
2346 if (oe->op == error_mark_node)
2347 continue;
2348 else if (i != j)
2349 (*ops)[j] = oe;
2350 j++;
2352 ops->truncate (j);
2355 XDELETEVEC (ranges);
2356 return any_changes;
2359 /* Return true if STMT is a cast like:
2360 <bb N>:
2362 _123 = (int) _234;
2364 <bb M>:
2365 # _345 = PHI <_123(N), 1(...), 1(...)>
2366 where _234 has bool type, _123 has single use and
2367 bb N has a single successor M. This is commonly used in
2368 the last block of a range test. */
2370 static bool
2371 final_range_test_p (gimple stmt)
2373 basic_block bb, rhs_bb;
2374 edge e;
2375 tree lhs, rhs;
2376 use_operand_p use_p;
2377 gimple use_stmt;
2379 if (!gimple_assign_cast_p (stmt))
2380 return false;
2381 bb = gimple_bb (stmt);
2382 if (!single_succ_p (bb))
2383 return false;
2384 e = single_succ_edge (bb);
2385 if (e->flags & EDGE_COMPLEX)
2386 return false;
2388 lhs = gimple_assign_lhs (stmt);
2389 rhs = gimple_assign_rhs1 (stmt);
2390 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2391 || TREE_CODE (rhs) != SSA_NAME
2392 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2393 return false;
2395 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2396 if (!single_imm_use (lhs, &use_p, &use_stmt))
2397 return false;
2399 if (gimple_code (use_stmt) != GIMPLE_PHI
2400 || gimple_bb (use_stmt) != e->dest)
2401 return false;
2403 /* And that the rhs is defined in the same loop. */
2404 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2405 if (rhs_bb == NULL
2406 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2407 return false;
2409 return true;
2412 /* Return true if BB is suitable basic block for inter-bb range test
2413 optimization. If BACKWARD is true, BB should be the only predecessor
2414 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2415 or compared with to find a common basic block to which all conditions
2416 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2417 be the only predecessor of BB. */
2419 static bool
2420 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2421 bool backward)
2423 edge_iterator ei, ei2;
2424 edge e, e2;
2425 gimple stmt;
2426 gimple_stmt_iterator gsi;
2427 bool other_edge_seen = false;
2428 bool is_cond;
2430 if (test_bb == bb)
2431 return false;
2432 /* Check last stmt first. */
2433 stmt = last_stmt (bb);
2434 if (stmt == NULL
2435 || (gimple_code (stmt) != GIMPLE_COND
2436 && (backward || !final_range_test_p (stmt)))
2437 || gimple_visited_p (stmt)
2438 || stmt_could_throw_p (stmt)
2439 || *other_bb == bb)
2440 return false;
2441 is_cond = gimple_code (stmt) == GIMPLE_COND;
2442 if (is_cond)
2444 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2445 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2446 to *OTHER_BB (if not set yet, try to find it out). */
2447 if (EDGE_COUNT (bb->succs) != 2)
2448 return false;
2449 FOR_EACH_EDGE (e, ei, bb->succs)
2451 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2452 return false;
2453 if (e->dest == test_bb)
2455 if (backward)
2456 continue;
2457 else
2458 return false;
2460 if (e->dest == bb)
2461 return false;
2462 if (*other_bb == NULL)
2464 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2465 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2466 return false;
2467 else if (e->dest == e2->dest)
2468 *other_bb = e->dest;
2469 if (*other_bb == NULL)
2470 return false;
2472 if (e->dest == *other_bb)
2473 other_edge_seen = true;
2474 else if (backward)
2475 return false;
2477 if (*other_bb == NULL || !other_edge_seen)
2478 return false;
2480 else if (single_succ (bb) != *other_bb)
2481 return false;
2483 /* Now check all PHIs of *OTHER_BB. */
2484 e = find_edge (bb, *other_bb);
2485 e2 = find_edge (test_bb, *other_bb);
2486 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2488 gimple phi = gsi_stmt (gsi);
2489 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2490 corresponding to BB and TEST_BB predecessor must be the same. */
2491 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2492 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2494 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2495 one of the PHIs should have the lhs of the last stmt in
2496 that block as PHI arg and that PHI should have 0 or 1
2497 corresponding to it in all other range test basic blocks
2498 considered. */
2499 if (!is_cond)
2501 if (gimple_phi_arg_def (phi, e->dest_idx)
2502 == gimple_assign_lhs (stmt)
2503 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2504 || integer_onep (gimple_phi_arg_def (phi,
2505 e2->dest_idx))))
2506 continue;
2508 else
2510 gimple test_last = last_stmt (test_bb);
2511 if (gimple_code (test_last) != GIMPLE_COND
2512 && gimple_phi_arg_def (phi, e2->dest_idx)
2513 == gimple_assign_lhs (test_last)
2514 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2515 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2516 continue;
2519 return false;
2522 return true;
2525 /* Return true if BB doesn't have side-effects that would disallow
2526 range test optimization, all SSA_NAMEs set in the bb are consumed
2527 in the bb and there are no PHIs. */
2529 static bool
2530 no_side_effect_bb (basic_block bb)
2532 gimple_stmt_iterator gsi;
2533 gimple last;
2535 if (!gimple_seq_empty_p (phi_nodes (bb)))
2536 return false;
2537 last = last_stmt (bb);
2538 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2540 gimple stmt = gsi_stmt (gsi);
2541 tree lhs;
2542 imm_use_iterator imm_iter;
2543 use_operand_p use_p;
2545 if (is_gimple_debug (stmt))
2546 continue;
2547 if (gimple_has_side_effects (stmt))
2548 return false;
2549 if (stmt == last)
2550 return true;
2551 if (!is_gimple_assign (stmt))
2552 return false;
2553 lhs = gimple_assign_lhs (stmt);
2554 if (TREE_CODE (lhs) != SSA_NAME)
2555 return false;
2556 if (gimple_assign_rhs_could_trap_p (stmt))
2557 return false;
2558 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2560 gimple use_stmt = USE_STMT (use_p);
2561 if (is_gimple_debug (use_stmt))
2562 continue;
2563 if (gimple_bb (use_stmt) != bb)
2564 return false;
2567 return false;
2570 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2571 return true and fill in *OPS recursively. */
2573 static bool
2574 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2575 struct loop *loop)
2577 gimple stmt = SSA_NAME_DEF_STMT (var);
2578 tree rhs[2];
2579 int i;
2581 if (!is_reassociable_op (stmt, code, loop))
2582 return false;
2584 rhs[0] = gimple_assign_rhs1 (stmt);
2585 rhs[1] = gimple_assign_rhs2 (stmt);
2586 gimple_set_visited (stmt, true);
2587 for (i = 0; i < 2; i++)
2588 if (TREE_CODE (rhs[i]) == SSA_NAME
2589 && !get_ops (rhs[i], code, ops, loop)
2590 && has_single_use (rhs[i]))
2592 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2594 oe->op = rhs[i];
2595 oe->rank = code;
2596 oe->id = 0;
2597 oe->count = 1;
2598 ops->safe_push (oe);
2600 return true;
2603 /* Find the ops that were added by get_ops starting from VAR, see if
2604 they were changed during update_range_test and if yes, create new
2605 stmts. */
2607 static tree
2608 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2609 unsigned int *pidx, struct loop *loop)
2611 gimple stmt = SSA_NAME_DEF_STMT (var);
2612 tree rhs[4];
2613 int i;
2615 if (!is_reassociable_op (stmt, code, loop))
2616 return NULL;
2618 rhs[0] = gimple_assign_rhs1 (stmt);
2619 rhs[1] = gimple_assign_rhs2 (stmt);
2620 rhs[2] = rhs[0];
2621 rhs[3] = rhs[1];
2622 for (i = 0; i < 2; i++)
2623 if (TREE_CODE (rhs[i]) == SSA_NAME)
2625 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2626 if (rhs[2 + i] == NULL_TREE)
2628 if (has_single_use (rhs[i]))
2629 rhs[2 + i] = ops[(*pidx)++]->op;
2630 else
2631 rhs[2 + i] = rhs[i];
2634 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2635 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2637 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2638 var = make_ssa_name (TREE_TYPE (var), NULL);
2639 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2640 var, rhs[2], rhs[3]);
2641 gimple_set_uid (g, gimple_uid (stmt));
2642 gimple_set_visited (g, true);
2643 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2645 return var;
2648 /* Structure to track the initial value passed to get_ops and
2649 the range in the ops vector for each basic block. */
2651 struct inter_bb_range_test_entry
2653 tree op;
2654 unsigned int first_idx, last_idx;
2657 /* Inter-bb range test optimization. */
2659 static void
2660 maybe_optimize_range_tests (gimple stmt)
2662 basic_block first_bb = gimple_bb (stmt);
2663 basic_block last_bb = first_bb;
2664 basic_block other_bb = NULL;
2665 basic_block bb;
2666 edge_iterator ei;
2667 edge e;
2668 auto_vec<operand_entry_t> ops;
2669 auto_vec<inter_bb_range_test_entry> bbinfo;
2670 bool any_changes = false;
2672 /* Consider only basic blocks that end with GIMPLE_COND or
2673 a cast statement satisfying final_range_test_p. All
2674 but the last bb in the first_bb .. last_bb range
2675 should end with GIMPLE_COND. */
2676 if (gimple_code (stmt) == GIMPLE_COND)
2678 if (EDGE_COUNT (first_bb->succs) != 2)
2679 return;
2681 else if (final_range_test_p (stmt))
2682 other_bb = single_succ (first_bb);
2683 else
2684 return;
2686 if (stmt_could_throw_p (stmt))
2687 return;
2689 /* As relative ordering of post-dominator sons isn't fixed,
2690 maybe_optimize_range_tests can be called first on any
2691 bb in the range we want to optimize. So, start searching
2692 backwards, if first_bb can be set to a predecessor. */
2693 while (single_pred_p (first_bb))
2695 basic_block pred_bb = single_pred (first_bb);
2696 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2697 break;
2698 if (!no_side_effect_bb (first_bb))
2699 break;
2700 first_bb = pred_bb;
2702 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2703 Before starting forward search in last_bb successors, find
2704 out the other_bb. */
2705 if (first_bb == last_bb)
2707 other_bb = NULL;
2708 /* As non-GIMPLE_COND last stmt always terminates the range,
2709 if forward search didn't discover anything, just give up. */
2710 if (gimple_code (stmt) != GIMPLE_COND)
2711 return;
2712 /* Look at both successors. Either it ends with a GIMPLE_COND
2713 and satisfies suitable_cond_bb, or ends with a cast and
2714 other_bb is that cast's successor. */
2715 FOR_EACH_EDGE (e, ei, first_bb->succs)
2716 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2717 || e->dest == first_bb)
2718 return;
2719 else if (single_pred_p (e->dest))
2721 stmt = last_stmt (e->dest);
2722 if (stmt
2723 && gimple_code (stmt) == GIMPLE_COND
2724 && EDGE_COUNT (e->dest->succs) == 2)
2726 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2727 break;
2728 else
2729 other_bb = NULL;
2731 else if (stmt
2732 && final_range_test_p (stmt)
2733 && find_edge (first_bb, single_succ (e->dest)))
2735 other_bb = single_succ (e->dest);
2736 if (other_bb == first_bb)
2737 other_bb = NULL;
2740 if (other_bb == NULL)
2741 return;
2743 /* Now do the forward search, moving last_bb to successor bbs
2744 that aren't other_bb. */
2745 while (EDGE_COUNT (last_bb->succs) == 2)
2747 FOR_EACH_EDGE (e, ei, last_bb->succs)
2748 if (e->dest != other_bb)
2749 break;
2750 if (e == NULL)
2751 break;
2752 if (!single_pred_p (e->dest))
2753 break;
2754 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2755 break;
2756 if (!no_side_effect_bb (e->dest))
2757 break;
2758 last_bb = e->dest;
2760 if (first_bb == last_bb)
2761 return;
2762 /* Here basic blocks first_bb through last_bb's predecessor
2763 end with GIMPLE_COND, all of them have one of the edges to
2764 other_bb and another to another block in the range,
2765 all blocks except first_bb don't have side-effects and
2766 last_bb ends with either GIMPLE_COND, or cast satisfying
2767 final_range_test_p. */
2768 for (bb = last_bb; ; bb = single_pred (bb))
2770 enum tree_code code;
2771 tree lhs, rhs;
2772 inter_bb_range_test_entry bb_ent;
2774 bb_ent.op = NULL_TREE;
2775 bb_ent.first_idx = ops.length ();
2776 bb_ent.last_idx = bb_ent.first_idx;
2777 e = find_edge (bb, other_bb);
2778 stmt = last_stmt (bb);
2779 gimple_set_visited (stmt, true);
2780 if (gimple_code (stmt) != GIMPLE_COND)
2782 use_operand_p use_p;
2783 gimple phi;
2784 edge e2;
2785 unsigned int d;
2787 lhs = gimple_assign_lhs (stmt);
2788 rhs = gimple_assign_rhs1 (stmt);
2789 gcc_assert (bb == last_bb);
2791 /* stmt is
2792 _123 = (int) _234;
2794 followed by:
2795 <bb M>:
2796 # _345 = PHI <_123(N), 1(...), 1(...)>
2798 or 0 instead of 1. If it is 0, the _234
2799 range test is anded together with all the
2800 other range tests, if it is 1, it is ored with
2801 them. */
2802 single_imm_use (lhs, &use_p, &phi);
2803 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2804 e2 = find_edge (first_bb, other_bb);
2805 d = e2->dest_idx;
2806 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2807 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2808 code = BIT_AND_EXPR;
2809 else
2811 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2812 code = BIT_IOR_EXPR;
2815 /* If _234 SSA_NAME_DEF_STMT is
2816 _234 = _567 | _789;
2817 (or &, corresponding to 1/0 in the phi arguments,
2818 push into ops the individual range test arguments
2819 of the bitwise or resp. and, recursively. */
2820 if (!get_ops (rhs, code, &ops,
2821 loop_containing_stmt (stmt))
2822 && has_single_use (rhs))
2824 /* Otherwise, push the _234 range test itself. */
2825 operand_entry_t oe
2826 = (operand_entry_t) pool_alloc (operand_entry_pool);
2828 oe->op = rhs;
2829 oe->rank = code;
2830 oe->id = 0;
2831 oe->count = 1;
2832 ops.safe_push (oe);
2833 bb_ent.last_idx++;
2835 else
2836 bb_ent.last_idx = ops.length ();
2837 bb_ent.op = rhs;
2838 bbinfo.safe_push (bb_ent);
2839 continue;
2841 /* Otherwise stmt is GIMPLE_COND. */
2842 code = gimple_cond_code (stmt);
2843 lhs = gimple_cond_lhs (stmt);
2844 rhs = gimple_cond_rhs (stmt);
2845 if (TREE_CODE (lhs) == SSA_NAME
2846 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2847 && ((code != EQ_EXPR && code != NE_EXPR)
2848 || rhs != boolean_false_node
2849 /* Either push into ops the individual bitwise
2850 or resp. and operands, depending on which
2851 edge is other_bb. */
2852 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2853 ^ (code == EQ_EXPR))
2854 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2855 loop_containing_stmt (stmt))))
2857 /* Or push the GIMPLE_COND stmt itself. */
2858 operand_entry_t oe
2859 = (operand_entry_t) pool_alloc (operand_entry_pool);
2861 oe->op = NULL;
2862 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2863 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2864 /* oe->op = NULL signs that there is no SSA_NAME
2865 for the range test, and oe->id instead is the
2866 basic block number, at which's end the GIMPLE_COND
2867 is. */
2868 oe->id = bb->index;
2869 oe->count = 1;
2870 ops.safe_push (oe);
2871 bb_ent.op = NULL;
2872 bb_ent.last_idx++;
2874 else if (ops.length () > bb_ent.first_idx)
2876 bb_ent.op = lhs;
2877 bb_ent.last_idx = ops.length ();
2879 bbinfo.safe_push (bb_ent);
2880 if (bb == first_bb)
2881 break;
2883 if (ops.length () > 1)
2884 any_changes = optimize_range_tests (ERROR_MARK, &ops);
2885 if (any_changes)
2887 unsigned int idx;
2888 /* update_ops relies on has_single_use predicates returning the
2889 same values as it did during get_ops earlier. Additionally it
2890 never removes statements, only adds new ones and it should walk
2891 from the single imm use and check the predicate already before
2892 making those changes.
2893 On the other side, the handling of GIMPLE_COND directly can turn
2894 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2895 it needs to be done in a separate loop afterwards. */
2896 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2898 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2899 && bbinfo[idx].op != NULL_TREE)
2901 tree new_op;
2903 stmt = last_stmt (bb);
2904 new_op = update_ops (bbinfo[idx].op,
2905 (enum tree_code)
2906 ops[bbinfo[idx].first_idx]->rank,
2907 ops, &bbinfo[idx].first_idx,
2908 loop_containing_stmt (stmt));
2909 if (new_op == NULL_TREE)
2911 gcc_assert (bb == last_bb);
2912 new_op = ops[bbinfo[idx].first_idx++]->op;
2914 if (bbinfo[idx].op != new_op)
2916 imm_use_iterator iter;
2917 use_operand_p use_p;
2918 gimple use_stmt, cast_stmt = NULL;
2920 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2921 if (is_gimple_debug (use_stmt))
2922 continue;
2923 else if (gimple_code (use_stmt) == GIMPLE_COND
2924 || gimple_code (use_stmt) == GIMPLE_PHI)
2925 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2926 SET_USE (use_p, new_op);
2927 else if (gimple_assign_cast_p (use_stmt))
2928 cast_stmt = use_stmt;
2929 else
2930 gcc_unreachable ();
2931 if (cast_stmt)
2933 gcc_assert (bb == last_bb);
2934 tree lhs = gimple_assign_lhs (cast_stmt);
2935 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2936 enum tree_code rhs_code
2937 = gimple_assign_rhs_code (cast_stmt);
2938 gimple g;
2939 if (is_gimple_min_invariant (new_op))
2941 new_op = fold_convert (TREE_TYPE (lhs), new_op);
2942 g = gimple_build_assign (new_lhs, new_op);
2944 else
2945 g = gimple_build_assign_with_ops (rhs_code, new_lhs,
2946 new_op, NULL_TREE);
2947 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2948 gimple_set_uid (g, gimple_uid (cast_stmt));
2949 gimple_set_visited (g, true);
2950 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2951 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2952 if (is_gimple_debug (use_stmt))
2953 continue;
2954 else if (gimple_code (use_stmt) == GIMPLE_COND
2955 || gimple_code (use_stmt) == GIMPLE_PHI)
2956 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2957 SET_USE (use_p, new_lhs);
2958 else
2959 gcc_unreachable ();
2963 if (bb == first_bb)
2964 break;
2966 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2968 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2969 && bbinfo[idx].op == NULL_TREE
2970 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
2972 stmt = last_stmt (bb);
2973 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
2974 gimple_cond_make_false (stmt);
2975 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
2976 gimple_cond_make_true (stmt);
2977 else
2979 gimple_cond_set_code (stmt, NE_EXPR);
2980 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
2981 gimple_cond_set_rhs (stmt, boolean_false_node);
2983 update_stmt (stmt);
2985 if (bb == first_bb)
2986 break;
2991 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2992 of STMT in it's operands. This is also known as a "destructive
2993 update" operation. */
2995 static bool
2996 is_phi_for_stmt (gimple stmt, tree operand)
2998 gimple def_stmt;
2999 tree lhs;
3000 use_operand_p arg_p;
3001 ssa_op_iter i;
3003 if (TREE_CODE (operand) != SSA_NAME)
3004 return false;
3006 lhs = gimple_assign_lhs (stmt);
3008 def_stmt = SSA_NAME_DEF_STMT (operand);
3009 if (gimple_code (def_stmt) != GIMPLE_PHI)
3010 return false;
3012 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3013 if (lhs == USE_FROM_PTR (arg_p))
3014 return true;
3015 return false;
3018 /* Remove def stmt of VAR if VAR has zero uses and recurse
3019 on rhs1 operand if so. */
3021 static void
3022 remove_visited_stmt_chain (tree var)
3024 gimple stmt;
3025 gimple_stmt_iterator gsi;
3027 while (1)
3029 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3030 return;
3031 stmt = SSA_NAME_DEF_STMT (var);
3032 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3034 var = gimple_assign_rhs1 (stmt);
3035 gsi = gsi_for_stmt (stmt);
3036 gsi_remove (&gsi, true);
3037 release_defs (stmt);
3039 else
3040 return;
3044 /* This function checks three consequtive operands in
3045 passed operands vector OPS starting from OPINDEX and
3046 swaps two operands if it is profitable for binary operation
3047 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3049 We pair ops with the same rank if possible.
3051 The alternative we try is to see if STMT is a destructive
3052 update style statement, which is like:
3053 b = phi (a, ...)
3054 a = c + b;
3055 In that case, we want to use the destructive update form to
3056 expose the possible vectorizer sum reduction opportunity.
3057 In that case, the third operand will be the phi node. This
3058 check is not performed if STMT is null.
3060 We could, of course, try to be better as noted above, and do a
3061 lot of work to try to find these opportunities in >3 operand
3062 cases, but it is unlikely to be worth it. */
3064 static void
3065 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3066 unsigned int opindex, gimple stmt)
3068 operand_entry_t oe1, oe2, oe3;
3070 oe1 = ops[opindex];
3071 oe2 = ops[opindex + 1];
3072 oe3 = ops[opindex + 2];
3074 if ((oe1->rank == oe2->rank
3075 && oe2->rank != oe3->rank)
3076 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3077 && !is_phi_for_stmt (stmt, oe1->op)
3078 && !is_phi_for_stmt (stmt, oe2->op)))
3080 struct operand_entry temp = *oe3;
3081 oe3->op = oe1->op;
3082 oe3->rank = oe1->rank;
3083 oe1->op = temp.op;
3084 oe1->rank= temp.rank;
3086 else if ((oe1->rank == oe3->rank
3087 && oe2->rank != oe3->rank)
3088 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3089 && !is_phi_for_stmt (stmt, oe1->op)
3090 && !is_phi_for_stmt (stmt, oe3->op)))
3092 struct operand_entry temp = *oe2;
3093 oe2->op = oe1->op;
3094 oe2->rank = oe1->rank;
3095 oe1->op = temp.op;
3096 oe1->rank = temp.rank;
3100 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3101 two definitions, otherwise return STMT. */
3103 static inline gimple
3104 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3106 if (TREE_CODE (rhs1) == SSA_NAME
3107 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3108 stmt = SSA_NAME_DEF_STMT (rhs1);
3109 if (TREE_CODE (rhs2) == SSA_NAME
3110 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3111 stmt = SSA_NAME_DEF_STMT (rhs2);
3112 return stmt;
3115 /* Recursively rewrite our linearized statements so that the operators
3116 match those in OPS[OPINDEX], putting the computation in rank
3117 order. Return new lhs. */
3119 static tree
3120 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3121 vec<operand_entry_t> ops, bool changed)
3123 tree rhs1 = gimple_assign_rhs1 (stmt);
3124 tree rhs2 = gimple_assign_rhs2 (stmt);
3125 tree lhs = gimple_assign_lhs (stmt);
3126 operand_entry_t oe;
3128 /* The final recursion case for this function is that you have
3129 exactly two operations left.
3130 If we had one exactly one op in the entire list to start with, we
3131 would have never called this function, and the tail recursion
3132 rewrites them one at a time. */
3133 if (opindex + 2 == ops.length ())
3135 operand_entry_t oe1, oe2;
3137 oe1 = ops[opindex];
3138 oe2 = ops[opindex + 1];
3140 if (rhs1 != oe1->op || rhs2 != oe2->op)
3142 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3143 unsigned int uid = gimple_uid (stmt);
3145 if (dump_file && (dump_flags & TDF_DETAILS))
3147 fprintf (dump_file, "Transforming ");
3148 print_gimple_stmt (dump_file, stmt, 0, 0);
3151 if (changed)
3153 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3154 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3155 stmt
3156 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3157 lhs, oe1->op, oe2->op);
3158 gimple_set_uid (stmt, uid);
3159 gimple_set_visited (stmt, true);
3160 if (insert_point == gsi_stmt (gsi))
3161 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3162 else
3163 insert_stmt_after (stmt, insert_point);
3165 else
3167 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3168 == stmt);
3169 gimple_assign_set_rhs1 (stmt, oe1->op);
3170 gimple_assign_set_rhs2 (stmt, oe2->op);
3171 update_stmt (stmt);
3174 if (rhs1 != oe1->op && rhs1 != oe2->op)
3175 remove_visited_stmt_chain (rhs1);
3177 if (dump_file && (dump_flags & TDF_DETAILS))
3179 fprintf (dump_file, " into ");
3180 print_gimple_stmt (dump_file, stmt, 0, 0);
3183 return lhs;
3186 /* If we hit here, we should have 3 or more ops left. */
3187 gcc_assert (opindex + 2 < ops.length ());
3189 /* Rewrite the next operator. */
3190 oe = ops[opindex];
3192 /* Recurse on the LHS of the binary operator, which is guaranteed to
3193 be the non-leaf side. */
3194 tree new_rhs1
3195 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3196 changed || oe->op != rhs2);
3198 if (oe->op != rhs2 || new_rhs1 != rhs1)
3200 if (dump_file && (dump_flags & TDF_DETAILS))
3202 fprintf (dump_file, "Transforming ");
3203 print_gimple_stmt (dump_file, stmt, 0, 0);
3206 /* If changed is false, this is either opindex == 0
3207 or all outer rhs2's were equal to corresponding oe->op,
3208 and powi_result is NULL.
3209 That means lhs is equivalent before and after reassociation.
3210 Otherwise ensure the old lhs SSA_NAME is not reused and
3211 create a new stmt as well, so that any debug stmts will be
3212 properly adjusted. */
3213 if (changed)
3215 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3216 unsigned int uid = gimple_uid (stmt);
3217 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3219 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3220 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3221 lhs, new_rhs1, oe->op);
3222 gimple_set_uid (stmt, uid);
3223 gimple_set_visited (stmt, true);
3224 if (insert_point == gsi_stmt (gsi))
3225 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3226 else
3227 insert_stmt_after (stmt, insert_point);
3229 else
3231 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3232 == stmt);
3233 gimple_assign_set_rhs1 (stmt, new_rhs1);
3234 gimple_assign_set_rhs2 (stmt, oe->op);
3235 update_stmt (stmt);
3238 if (dump_file && (dump_flags & TDF_DETAILS))
3240 fprintf (dump_file, " into ");
3241 print_gimple_stmt (dump_file, stmt, 0, 0);
3244 return lhs;
3247 /* Find out how many cycles we need to compute statements chain.
3248 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3249 maximum number of independent statements we may execute per cycle. */
3251 static int
3252 get_required_cycles (int ops_num, int cpu_width)
3254 int res;
3255 int elog;
3256 unsigned int rest;
3258 /* While we have more than 2 * cpu_width operands
3259 we may reduce number of operands by cpu_width
3260 per cycle. */
3261 res = ops_num / (2 * cpu_width);
3263 /* Remained operands count may be reduced twice per cycle
3264 until we have only one operand. */
3265 rest = (unsigned)(ops_num - res * cpu_width);
3266 elog = exact_log2 (rest);
3267 if (elog >= 0)
3268 res += elog;
3269 else
3270 res += floor_log2 (rest) + 1;
3272 return res;
3275 /* Returns an optimal number of registers to use for computation of
3276 given statements. */
3278 static int
3279 get_reassociation_width (int ops_num, enum tree_code opc,
3280 enum machine_mode mode)
3282 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3283 int width;
3284 int width_min;
3285 int cycles_best;
3287 if (param_width > 0)
3288 width = param_width;
3289 else
3290 width = targetm.sched.reassociation_width (opc, mode);
3292 if (width == 1)
3293 return width;
3295 /* Get the minimal time required for sequence computation. */
3296 cycles_best = get_required_cycles (ops_num, width);
3298 /* Check if we may use less width and still compute sequence for
3299 the same time. It will allow us to reduce registers usage.
3300 get_required_cycles is monotonically increasing with lower width
3301 so we can perform a binary search for the minimal width that still
3302 results in the optimal cycle count. */
3303 width_min = 1;
3304 while (width > width_min)
3306 int width_mid = (width + width_min) / 2;
3308 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3309 width = width_mid;
3310 else if (width_min < width_mid)
3311 width_min = width_mid;
3312 else
3313 break;
3316 return width;
3319 /* Recursively rewrite our linearized statements so that the operators
3320 match those in OPS[OPINDEX], putting the computation in rank
3321 order and trying to allow operations to be executed in
3322 parallel. */
3324 static void
3325 rewrite_expr_tree_parallel (gimple stmt, int width,
3326 vec<operand_entry_t> ops)
3328 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3329 int op_num = ops.length ();
3330 int stmt_num = op_num - 1;
3331 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3332 int op_index = op_num - 1;
3333 int stmt_index = 0;
3334 int ready_stmts_end = 0;
3335 int i = 0;
3336 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3338 /* We start expression rewriting from the top statements.
3339 So, in this loop we create a full list of statements
3340 we will work with. */
3341 stmts[stmt_num - 1] = stmt;
3342 for (i = stmt_num - 2; i >= 0; i--)
3343 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3345 for (i = 0; i < stmt_num; i++)
3347 tree op1, op2;
3349 /* Determine whether we should use results of
3350 already handled statements or not. */
3351 if (ready_stmts_end == 0
3352 && (i - stmt_index >= width || op_index < 1))
3353 ready_stmts_end = i;
3355 /* Now we choose operands for the next statement. Non zero
3356 value in ready_stmts_end means here that we should use
3357 the result of already generated statements as new operand. */
3358 if (ready_stmts_end > 0)
3360 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3361 if (ready_stmts_end > stmt_index)
3362 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3363 else if (op_index >= 0)
3364 op2 = ops[op_index--]->op;
3365 else
3367 gcc_assert (stmt_index < i);
3368 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3371 if (stmt_index >= ready_stmts_end)
3372 ready_stmts_end = 0;
3374 else
3376 if (op_index > 1)
3377 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3378 op2 = ops[op_index--]->op;
3379 op1 = ops[op_index--]->op;
3382 /* If we emit the last statement then we should put
3383 operands into the last statement. It will also
3384 break the loop. */
3385 if (op_index < 0 && stmt_index == i)
3386 i = stmt_num - 1;
3388 if (dump_file && (dump_flags & TDF_DETAILS))
3390 fprintf (dump_file, "Transforming ");
3391 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3394 /* We keep original statement only for the last one. All
3395 others are recreated. */
3396 if (i == stmt_num - 1)
3398 gimple_assign_set_rhs1 (stmts[i], op1);
3399 gimple_assign_set_rhs2 (stmts[i], op2);
3400 update_stmt (stmts[i]);
3402 else
3403 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3405 if (dump_file && (dump_flags & TDF_DETAILS))
3407 fprintf (dump_file, " into ");
3408 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3412 remove_visited_stmt_chain (last_rhs1);
3415 /* Transform STMT, which is really (A +B) + (C + D) into the left
3416 linear form, ((A+B)+C)+D.
3417 Recurse on D if necessary. */
3419 static void
3420 linearize_expr (gimple stmt)
3422 gimple_stmt_iterator gsi;
3423 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3424 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3425 gimple oldbinrhs = binrhs;
3426 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3427 gimple newbinrhs = NULL;
3428 struct loop *loop = loop_containing_stmt (stmt);
3429 tree lhs = gimple_assign_lhs (stmt);
3431 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3432 && is_reassociable_op (binrhs, rhscode, loop));
3434 gsi = gsi_for_stmt (stmt);
3436 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3437 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3438 make_ssa_name (TREE_TYPE (lhs), NULL),
3439 gimple_assign_lhs (binlhs),
3440 gimple_assign_rhs2 (binrhs));
3441 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3442 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3443 gimple_set_uid (binrhs, gimple_uid (stmt));
3445 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3446 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3448 if (dump_file && (dump_flags & TDF_DETAILS))
3450 fprintf (dump_file, "Linearized: ");
3451 print_gimple_stmt (dump_file, stmt, 0, 0);
3454 reassociate_stats.linearized++;
3455 update_stmt (stmt);
3457 gsi = gsi_for_stmt (oldbinrhs);
3458 gsi_remove (&gsi, true);
3459 release_defs (oldbinrhs);
3461 gimple_set_visited (stmt, true);
3462 gimple_set_visited (binlhs, true);
3463 gimple_set_visited (binrhs, true);
3465 /* Tail recurse on the new rhs if it still needs reassociation. */
3466 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3467 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3468 want to change the algorithm while converting to tuples. */
3469 linearize_expr (stmt);
3472 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3473 it. Otherwise, return NULL. */
3475 static gimple
3476 get_single_immediate_use (tree lhs)
3478 use_operand_p immuse;
3479 gimple immusestmt;
3481 if (TREE_CODE (lhs) == SSA_NAME
3482 && single_imm_use (lhs, &immuse, &immusestmt)
3483 && is_gimple_assign (immusestmt))
3484 return immusestmt;
3486 return NULL;
3489 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3490 representing the negated value. Insertions of any necessary
3491 instructions go before GSI.
3492 This function is recursive in that, if you hand it "a_5" as the
3493 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3494 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3496 static tree
3497 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3499 gimple negatedefstmt = NULL;
3500 tree resultofnegate;
3501 gimple_stmt_iterator gsi;
3502 unsigned int uid;
3504 /* If we are trying to negate a name, defined by an add, negate the
3505 add operands instead. */
3506 if (TREE_CODE (tonegate) == SSA_NAME)
3507 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3508 if (TREE_CODE (tonegate) == SSA_NAME
3509 && is_gimple_assign (negatedefstmt)
3510 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3511 && has_single_use (gimple_assign_lhs (negatedefstmt))
3512 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3514 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3515 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3516 tree lhs = gimple_assign_lhs (negatedefstmt);
3517 gimple g;
3519 gsi = gsi_for_stmt (negatedefstmt);
3520 rhs1 = negate_value (rhs1, &gsi);
3522 gsi = gsi_for_stmt (negatedefstmt);
3523 rhs2 = negate_value (rhs2, &gsi);
3525 gsi = gsi_for_stmt (negatedefstmt);
3526 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3527 gimple_set_visited (negatedefstmt, true);
3528 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3529 gimple_set_uid (g, gimple_uid (negatedefstmt));
3530 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3531 return lhs;
3534 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3535 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3536 NULL_TREE, true, GSI_SAME_STMT);
3537 gsi = *gsip;
3538 uid = gimple_uid (gsi_stmt (gsi));
3539 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3541 gimple stmt = gsi_stmt (gsi);
3542 if (gimple_uid (stmt) != 0)
3543 break;
3544 gimple_set_uid (stmt, uid);
3546 return resultofnegate;
3549 /* Return true if we should break up the subtract in STMT into an add
3550 with negate. This is true when we the subtract operands are really
3551 adds, or the subtract itself is used in an add expression. In
3552 either case, breaking up the subtract into an add with negate
3553 exposes the adds to reassociation. */
3555 static bool
3556 should_break_up_subtract (gimple stmt)
3558 tree lhs = gimple_assign_lhs (stmt);
3559 tree binlhs = gimple_assign_rhs1 (stmt);
3560 tree binrhs = gimple_assign_rhs2 (stmt);
3561 gimple immusestmt;
3562 struct loop *loop = loop_containing_stmt (stmt);
3564 if (TREE_CODE (binlhs) == SSA_NAME
3565 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3566 return true;
3568 if (TREE_CODE (binrhs) == SSA_NAME
3569 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3570 return true;
3572 if (TREE_CODE (lhs) == SSA_NAME
3573 && (immusestmt = get_single_immediate_use (lhs))
3574 && is_gimple_assign (immusestmt)
3575 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3576 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3577 return true;
3578 return false;
3581 /* Transform STMT from A - B into A + -B. */
3583 static void
3584 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3586 tree rhs1 = gimple_assign_rhs1 (stmt);
3587 tree rhs2 = gimple_assign_rhs2 (stmt);
3589 if (dump_file && (dump_flags & TDF_DETAILS))
3591 fprintf (dump_file, "Breaking up subtract ");
3592 print_gimple_stmt (dump_file, stmt, 0, 0);
3595 rhs2 = negate_value (rhs2, gsip);
3596 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3597 update_stmt (stmt);
3600 /* Determine whether STMT is a builtin call that raises an SSA name
3601 to an integer power and has only one use. If so, and this is early
3602 reassociation and unsafe math optimizations are permitted, place
3603 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3604 If any of these conditions does not hold, return FALSE. */
3606 static bool
3607 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3609 tree fndecl, arg1;
3610 REAL_VALUE_TYPE c, cint;
3612 if (!first_pass_instance
3613 || !flag_unsafe_math_optimizations
3614 || !is_gimple_call (stmt)
3615 || !has_single_use (gimple_call_lhs (stmt)))
3616 return false;
3618 fndecl = gimple_call_fndecl (stmt);
3620 if (!fndecl
3621 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3622 return false;
3624 switch (DECL_FUNCTION_CODE (fndecl))
3626 CASE_FLT_FN (BUILT_IN_POW):
3627 *base = gimple_call_arg (stmt, 0);
3628 arg1 = gimple_call_arg (stmt, 1);
3630 if (TREE_CODE (arg1) != REAL_CST)
3631 return false;
3633 c = TREE_REAL_CST (arg1);
3635 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3636 return false;
3638 *exponent = real_to_integer (&c);
3639 real_from_integer (&cint, VOIDmode, *exponent,
3640 *exponent < 0 ? -1 : 0, 0);
3641 if (!real_identical (&c, &cint))
3642 return false;
3644 break;
3646 CASE_FLT_FN (BUILT_IN_POWI):
3647 *base = gimple_call_arg (stmt, 0);
3648 arg1 = gimple_call_arg (stmt, 1);
3650 if (!tree_fits_shwi_p (arg1))
3651 return false;
3653 *exponent = tree_to_shwi (arg1);
3654 break;
3656 default:
3657 return false;
3660 /* Expanding negative exponents is generally unproductive, so we don't
3661 complicate matters with those. Exponents of zero and one should
3662 have been handled by expression folding. */
3663 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3664 return false;
3666 return true;
3669 /* Recursively linearize a binary expression that is the RHS of STMT.
3670 Place the operands of the expression tree in the vector named OPS. */
3672 static void
3673 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3674 bool is_associative, bool set_visited)
3676 tree binlhs = gimple_assign_rhs1 (stmt);
3677 tree binrhs = gimple_assign_rhs2 (stmt);
3678 gimple binlhsdef = NULL, binrhsdef = NULL;
3679 bool binlhsisreassoc = false;
3680 bool binrhsisreassoc = false;
3681 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3682 struct loop *loop = loop_containing_stmt (stmt);
3683 tree base = NULL_TREE;
3684 HOST_WIDE_INT exponent = 0;
3686 if (set_visited)
3687 gimple_set_visited (stmt, true);
3689 if (TREE_CODE (binlhs) == SSA_NAME)
3691 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3692 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3693 && !stmt_could_throw_p (binlhsdef));
3696 if (TREE_CODE (binrhs) == SSA_NAME)
3698 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3699 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3700 && !stmt_could_throw_p (binrhsdef));
3703 /* If the LHS is not reassociable, but the RHS is, we need to swap
3704 them. If neither is reassociable, there is nothing we can do, so
3705 just put them in the ops vector. If the LHS is reassociable,
3706 linearize it. If both are reassociable, then linearize the RHS
3707 and the LHS. */
3709 if (!binlhsisreassoc)
3711 tree temp;
3713 /* If this is not a associative operation like division, give up. */
3714 if (!is_associative)
3716 add_to_ops_vec (ops, binrhs);
3717 return;
3720 if (!binrhsisreassoc)
3722 if (rhscode == MULT_EXPR
3723 && TREE_CODE (binrhs) == SSA_NAME
3724 && acceptable_pow_call (binrhsdef, &base, &exponent))
3726 add_repeat_to_ops_vec (ops, base, exponent);
3727 gimple_set_visited (binrhsdef, true);
3729 else
3730 add_to_ops_vec (ops, binrhs);
3732 if (rhscode == MULT_EXPR
3733 && TREE_CODE (binlhs) == SSA_NAME
3734 && acceptable_pow_call (binlhsdef, &base, &exponent))
3736 add_repeat_to_ops_vec (ops, base, exponent);
3737 gimple_set_visited (binlhsdef, true);
3739 else
3740 add_to_ops_vec (ops, binlhs);
3742 return;
3745 if (dump_file && (dump_flags & TDF_DETAILS))
3747 fprintf (dump_file, "swapping operands of ");
3748 print_gimple_stmt (dump_file, stmt, 0, 0);
3751 swap_ssa_operands (stmt,
3752 gimple_assign_rhs1_ptr (stmt),
3753 gimple_assign_rhs2_ptr (stmt));
3754 update_stmt (stmt);
3756 if (dump_file && (dump_flags & TDF_DETAILS))
3758 fprintf (dump_file, " is now ");
3759 print_gimple_stmt (dump_file, stmt, 0, 0);
3762 /* We want to make it so the lhs is always the reassociative op,
3763 so swap. */
3764 temp = binlhs;
3765 binlhs = binrhs;
3766 binrhs = temp;
3768 else if (binrhsisreassoc)
3770 linearize_expr (stmt);
3771 binlhs = gimple_assign_rhs1 (stmt);
3772 binrhs = gimple_assign_rhs2 (stmt);
3775 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3776 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3777 rhscode, loop));
3778 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3779 is_associative, set_visited);
3781 if (rhscode == MULT_EXPR
3782 && TREE_CODE (binrhs) == SSA_NAME
3783 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3785 add_repeat_to_ops_vec (ops, base, exponent);
3786 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3788 else
3789 add_to_ops_vec (ops, binrhs);
3792 /* Repropagate the negates back into subtracts, since no other pass
3793 currently does it. */
3795 static void
3796 repropagate_negates (void)
3798 unsigned int i = 0;
3799 tree negate;
3801 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3803 gimple user = get_single_immediate_use (negate);
3805 if (!user || !is_gimple_assign (user))
3806 continue;
3808 /* The negate operand can be either operand of a PLUS_EXPR
3809 (it can be the LHS if the RHS is a constant for example).
3811 Force the negate operand to the RHS of the PLUS_EXPR, then
3812 transform the PLUS_EXPR into a MINUS_EXPR. */
3813 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3815 /* If the negated operand appears on the LHS of the
3816 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3817 to force the negated operand to the RHS of the PLUS_EXPR. */
3818 if (gimple_assign_rhs1 (user) == negate)
3820 swap_ssa_operands (user,
3821 gimple_assign_rhs1_ptr (user),
3822 gimple_assign_rhs2_ptr (user));
3825 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3826 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3827 if (gimple_assign_rhs2 (user) == negate)
3829 tree rhs1 = gimple_assign_rhs1 (user);
3830 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3831 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3832 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3833 update_stmt (user);
3836 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3838 if (gimple_assign_rhs1 (user) == negate)
3840 /* We have
3841 x = -a
3842 y = x - b
3843 which we transform into
3844 x = a + b
3845 y = -x .
3846 This pushes down the negate which we possibly can merge
3847 into some other operation, hence insert it into the
3848 plus_negates vector. */
3849 gimple feed = SSA_NAME_DEF_STMT (negate);
3850 tree a = gimple_assign_rhs1 (feed);
3851 tree b = gimple_assign_rhs2 (user);
3852 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3853 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3854 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3855 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3856 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3857 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3858 user = gsi_stmt (gsi2);
3859 update_stmt (user);
3860 gsi_remove (&gsi, true);
3861 release_defs (feed);
3862 plus_negates.safe_push (gimple_assign_lhs (user));
3864 else
3866 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3867 rid of one operation. */
3868 gimple feed = SSA_NAME_DEF_STMT (negate);
3869 tree a = gimple_assign_rhs1 (feed);
3870 tree rhs1 = gimple_assign_rhs1 (user);
3871 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3872 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3873 update_stmt (gsi_stmt (gsi));
3879 /* Returns true if OP is of a type for which we can do reassociation.
3880 That is for integral or non-saturating fixed-point types, and for
3881 floating point type when associative-math is enabled. */
3883 static bool
3884 can_reassociate_p (tree op)
3886 tree type = TREE_TYPE (op);
3887 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3888 || NON_SAT_FIXED_POINT_TYPE_P (type)
3889 || (flag_associative_math && FLOAT_TYPE_P (type)))
3890 return true;
3891 return false;
3894 /* Break up subtract operations in block BB.
3896 We do this top down because we don't know whether the subtract is
3897 part of a possible chain of reassociation except at the top.
3899 IE given
3900 d = f + g
3901 c = a + e
3902 b = c - d
3903 q = b - r
3904 k = t - q
3906 we want to break up k = t - q, but we won't until we've transformed q
3907 = b - r, which won't be broken up until we transform b = c - d.
3909 En passant, clear the GIMPLE visited flag on every statement
3910 and set UIDs within each basic block. */
3912 static void
3913 break_up_subtract_bb (basic_block bb)
3915 gimple_stmt_iterator gsi;
3916 basic_block son;
3917 unsigned int uid = 1;
3919 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3921 gimple stmt = gsi_stmt (gsi);
3922 gimple_set_visited (stmt, false);
3923 gimple_set_uid (stmt, uid++);
3925 if (!is_gimple_assign (stmt)
3926 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3927 continue;
3929 /* Look for simple gimple subtract operations. */
3930 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3932 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3933 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3934 continue;
3936 /* Check for a subtract used only in an addition. If this
3937 is the case, transform it into add of a negate for better
3938 reassociation. IE transform C = A-B into C = A + -B if C
3939 is only used in an addition. */
3940 if (should_break_up_subtract (stmt))
3941 break_up_subtract (stmt, &gsi);
3943 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3944 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3945 plus_negates.safe_push (gimple_assign_lhs (stmt));
3947 for (son = first_dom_son (CDI_DOMINATORS, bb);
3948 son;
3949 son = next_dom_son (CDI_DOMINATORS, son))
3950 break_up_subtract_bb (son);
3953 /* Used for repeated factor analysis. */
3954 struct repeat_factor_d
3956 /* An SSA name that occurs in a multiply chain. */
3957 tree factor;
3959 /* Cached rank of the factor. */
3960 unsigned rank;
3962 /* Number of occurrences of the factor in the chain. */
3963 HOST_WIDE_INT count;
3965 /* An SSA name representing the product of this factor and
3966 all factors appearing later in the repeated factor vector. */
3967 tree repr;
3970 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3971 typedef const struct repeat_factor_d *const_repeat_factor_t;
3974 static vec<repeat_factor> repeat_factor_vec;
3976 /* Used for sorting the repeat factor vector. Sort primarily by
3977 ascending occurrence count, secondarily by descending rank. */
3979 static int
3980 compare_repeat_factors (const void *x1, const void *x2)
3982 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3983 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3985 if (rf1->count != rf2->count)
3986 return rf1->count - rf2->count;
3988 return rf2->rank - rf1->rank;
3991 /* Look for repeated operands in OPS in the multiply tree rooted at
3992 STMT. Replace them with an optimal sequence of multiplies and powi
3993 builtin calls, and remove the used operands from OPS. Return an
3994 SSA name representing the value of the replacement sequence. */
3996 static tree
3997 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3999 unsigned i, j, vec_len;
4000 int ii;
4001 operand_entry_t oe;
4002 repeat_factor_t rf1, rf2;
4003 repeat_factor rfnew;
4004 tree result = NULL_TREE;
4005 tree target_ssa, iter_result;
4006 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4007 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4008 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4009 gimple mul_stmt, pow_stmt;
4011 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4012 target. */
4013 if (!powi_fndecl)
4014 return NULL_TREE;
4016 /* Allocate the repeated factor vector. */
4017 repeat_factor_vec.create (10);
4019 /* Scan the OPS vector for all SSA names in the product and build
4020 up a vector of occurrence counts for each factor. */
4021 FOR_EACH_VEC_ELT (*ops, i, oe)
4023 if (TREE_CODE (oe->op) == SSA_NAME)
4025 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4027 if (rf1->factor == oe->op)
4029 rf1->count += oe->count;
4030 break;
4034 if (j >= repeat_factor_vec.length ())
4036 rfnew.factor = oe->op;
4037 rfnew.rank = oe->rank;
4038 rfnew.count = oe->count;
4039 rfnew.repr = NULL_TREE;
4040 repeat_factor_vec.safe_push (rfnew);
4045 /* Sort the repeated factor vector by (a) increasing occurrence count,
4046 and (b) decreasing rank. */
4047 repeat_factor_vec.qsort (compare_repeat_factors);
4049 /* It is generally best to combine as many base factors as possible
4050 into a product before applying __builtin_powi to the result.
4051 However, the sort order chosen for the repeated factor vector
4052 allows us to cache partial results for the product of the base
4053 factors for subsequent use. When we already have a cached partial
4054 result from a previous iteration, it is best to make use of it
4055 before looking for another __builtin_pow opportunity.
4057 As an example, consider x * x * y * y * y * z * z * z * z.
4058 We want to first compose the product x * y * z, raise it to the
4059 second power, then multiply this by y * z, and finally multiply
4060 by z. This can be done in 5 multiplies provided we cache y * z
4061 for use in both expressions:
4063 t1 = y * z
4064 t2 = t1 * x
4065 t3 = t2 * t2
4066 t4 = t1 * t3
4067 result = t4 * z
4069 If we instead ignored the cached y * z and first multiplied by
4070 the __builtin_pow opportunity z * z, we would get the inferior:
4072 t1 = y * z
4073 t2 = t1 * x
4074 t3 = t2 * t2
4075 t4 = z * z
4076 t5 = t3 * t4
4077 result = t5 * y */
4079 vec_len = repeat_factor_vec.length ();
4081 /* Repeatedly look for opportunities to create a builtin_powi call. */
4082 while (true)
4084 HOST_WIDE_INT power;
4086 /* First look for the largest cached product of factors from
4087 preceding iterations. If found, create a builtin_powi for
4088 it if the minimum occurrence count for its factors is at
4089 least 2, or just use this cached product as our next
4090 multiplicand if the minimum occurrence count is 1. */
4091 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4093 if (rf1->repr && rf1->count > 0)
4094 break;
4097 if (j < vec_len)
4099 power = rf1->count;
4101 if (power == 1)
4103 iter_result = rf1->repr;
4105 if (dump_file && (dump_flags & TDF_DETAILS))
4107 unsigned elt;
4108 repeat_factor_t rf;
4109 fputs ("Multiplying by cached product ", dump_file);
4110 for (elt = j; elt < vec_len; elt++)
4112 rf = &repeat_factor_vec[elt];
4113 print_generic_expr (dump_file, rf->factor, 0);
4114 if (elt < vec_len - 1)
4115 fputs (" * ", dump_file);
4117 fputs ("\n", dump_file);
4120 else
4122 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4123 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4124 build_int_cst (integer_type_node,
4125 power));
4126 gimple_call_set_lhs (pow_stmt, iter_result);
4127 gimple_set_location (pow_stmt, gimple_location (stmt));
4128 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4130 if (dump_file && (dump_flags & TDF_DETAILS))
4132 unsigned elt;
4133 repeat_factor_t rf;
4134 fputs ("Building __builtin_pow call for cached product (",
4135 dump_file);
4136 for (elt = j; elt < vec_len; elt++)
4138 rf = &repeat_factor_vec[elt];
4139 print_generic_expr (dump_file, rf->factor, 0);
4140 if (elt < vec_len - 1)
4141 fputs (" * ", dump_file);
4143 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4144 power);
4148 else
4150 /* Otherwise, find the first factor in the repeated factor
4151 vector whose occurrence count is at least 2. If no such
4152 factor exists, there are no builtin_powi opportunities
4153 remaining. */
4154 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4156 if (rf1->count >= 2)
4157 break;
4160 if (j >= vec_len)
4161 break;
4163 power = rf1->count;
4165 if (dump_file && (dump_flags & TDF_DETAILS))
4167 unsigned elt;
4168 repeat_factor_t rf;
4169 fputs ("Building __builtin_pow call for (", dump_file);
4170 for (elt = j; elt < vec_len; elt++)
4172 rf = &repeat_factor_vec[elt];
4173 print_generic_expr (dump_file, rf->factor, 0);
4174 if (elt < vec_len - 1)
4175 fputs (" * ", dump_file);
4177 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4180 reassociate_stats.pows_created++;
4182 /* Visit each element of the vector in reverse order (so that
4183 high-occurrence elements are visited first, and within the
4184 same occurrence count, lower-ranked elements are visited
4185 first). Form a linear product of all elements in this order
4186 whose occurrencce count is at least that of element J.
4187 Record the SSA name representing the product of each element
4188 with all subsequent elements in the vector. */
4189 if (j == vec_len - 1)
4190 rf1->repr = rf1->factor;
4191 else
4193 for (ii = vec_len - 2; ii >= (int)j; ii--)
4195 tree op1, op2;
4197 rf1 = &repeat_factor_vec[ii];
4198 rf2 = &repeat_factor_vec[ii + 1];
4200 /* Init the last factor's representative to be itself. */
4201 if (!rf2->repr)
4202 rf2->repr = rf2->factor;
4204 op1 = rf1->factor;
4205 op2 = rf2->repr;
4207 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4208 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4209 target_ssa,
4210 op1, op2);
4211 gimple_set_location (mul_stmt, gimple_location (stmt));
4212 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4213 rf1->repr = target_ssa;
4215 /* Don't reprocess the multiply we just introduced. */
4216 gimple_set_visited (mul_stmt, true);
4220 /* Form a call to __builtin_powi for the maximum product
4221 just formed, raised to the power obtained earlier. */
4222 rf1 = &repeat_factor_vec[j];
4223 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4224 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4225 build_int_cst (integer_type_node,
4226 power));
4227 gimple_call_set_lhs (pow_stmt, iter_result);
4228 gimple_set_location (pow_stmt, gimple_location (stmt));
4229 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4232 /* If we previously formed at least one other builtin_powi call,
4233 form the product of this one and those others. */
4234 if (result)
4236 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4237 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4238 result, iter_result);
4239 gimple_set_location (mul_stmt, gimple_location (stmt));
4240 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4241 gimple_set_visited (mul_stmt, true);
4242 result = new_result;
4244 else
4245 result = iter_result;
4247 /* Decrement the occurrence count of each element in the product
4248 by the count found above, and remove this many copies of each
4249 factor from OPS. */
4250 for (i = j; i < vec_len; i++)
4252 unsigned k = power;
4253 unsigned n;
4255 rf1 = &repeat_factor_vec[i];
4256 rf1->count -= power;
4258 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4260 if (oe->op == rf1->factor)
4262 if (oe->count <= k)
4264 ops->ordered_remove (n);
4265 k -= oe->count;
4267 if (k == 0)
4268 break;
4270 else
4272 oe->count -= k;
4273 break;
4280 /* At this point all elements in the repeated factor vector have a
4281 remaining occurrence count of 0 or 1, and those with a count of 1
4282 don't have cached representatives. Re-sort the ops vector and
4283 clean up. */
4284 ops->qsort (sort_by_operand_rank);
4285 repeat_factor_vec.release ();
4287 /* Return the final product computed herein. Note that there may
4288 still be some elements with single occurrence count left in OPS;
4289 those will be handled by the normal reassociation logic. */
4290 return result;
4293 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4295 static void
4296 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4298 tree rhs1;
4300 if (dump_file && (dump_flags & TDF_DETAILS))
4302 fprintf (dump_file, "Transforming ");
4303 print_gimple_stmt (dump_file, stmt, 0, 0);
4306 rhs1 = gimple_assign_rhs1 (stmt);
4307 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4308 update_stmt (stmt);
4309 remove_visited_stmt_chain (rhs1);
4311 if (dump_file && (dump_flags & TDF_DETAILS))
4313 fprintf (dump_file, " into ");
4314 print_gimple_stmt (dump_file, stmt, 0, 0);
4318 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4320 static void
4321 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4322 tree rhs1, tree rhs2)
4324 if (dump_file && (dump_flags & TDF_DETAILS))
4326 fprintf (dump_file, "Transforming ");
4327 print_gimple_stmt (dump_file, stmt, 0, 0);
4330 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4331 update_stmt (gsi_stmt (*gsi));
4332 remove_visited_stmt_chain (rhs1);
4334 if (dump_file && (dump_flags & TDF_DETAILS))
4336 fprintf (dump_file, " into ");
4337 print_gimple_stmt (dump_file, stmt, 0, 0);
4341 /* Reassociate expressions in basic block BB and its post-dominator as
4342 children. */
4344 static void
4345 reassociate_bb (basic_block bb)
4347 gimple_stmt_iterator gsi;
4348 basic_block son;
4349 gimple stmt = last_stmt (bb);
4351 if (stmt && !gimple_visited_p (stmt))
4352 maybe_optimize_range_tests (stmt);
4354 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4356 stmt = gsi_stmt (gsi);
4358 if (is_gimple_assign (stmt)
4359 && !stmt_could_throw_p (stmt))
4361 tree lhs, rhs1, rhs2;
4362 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4364 /* If this is not a gimple binary expression, there is
4365 nothing for us to do with it. */
4366 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4367 continue;
4369 /* If this was part of an already processed statement,
4370 we don't need to touch it again. */
4371 if (gimple_visited_p (stmt))
4373 /* This statement might have become dead because of previous
4374 reassociations. */
4375 if (has_zero_uses (gimple_get_lhs (stmt)))
4377 gsi_remove (&gsi, true);
4378 release_defs (stmt);
4379 /* We might end up removing the last stmt above which
4380 places the iterator to the end of the sequence.
4381 Reset it to the last stmt in this case which might
4382 be the end of the sequence as well if we removed
4383 the last statement of the sequence. In which case
4384 we need to bail out. */
4385 if (gsi_end_p (gsi))
4387 gsi = gsi_last_bb (bb);
4388 if (gsi_end_p (gsi))
4389 break;
4392 continue;
4395 lhs = gimple_assign_lhs (stmt);
4396 rhs1 = gimple_assign_rhs1 (stmt);
4397 rhs2 = gimple_assign_rhs2 (stmt);
4399 /* For non-bit or min/max operations we can't associate
4400 all types. Verify that here. */
4401 if (rhs_code != BIT_IOR_EXPR
4402 && rhs_code != BIT_AND_EXPR
4403 && rhs_code != BIT_XOR_EXPR
4404 && rhs_code != MIN_EXPR
4405 && rhs_code != MAX_EXPR
4406 && (!can_reassociate_p (lhs)
4407 || !can_reassociate_p (rhs1)
4408 || !can_reassociate_p (rhs2)))
4409 continue;
4411 if (associative_tree_code (rhs_code))
4413 auto_vec<operand_entry_t> ops;
4414 tree powi_result = NULL_TREE;
4416 /* There may be no immediate uses left by the time we
4417 get here because we may have eliminated them all. */
4418 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4419 continue;
4421 gimple_set_visited (stmt, true);
4422 linearize_expr_tree (&ops, stmt, true, true);
4423 ops.qsort (sort_by_operand_rank);
4424 optimize_ops_list (rhs_code, &ops);
4425 if (undistribute_ops_list (rhs_code, &ops,
4426 loop_containing_stmt (stmt)))
4428 ops.qsort (sort_by_operand_rank);
4429 optimize_ops_list (rhs_code, &ops);
4432 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4433 optimize_range_tests (rhs_code, &ops);
4435 if (first_pass_instance
4436 && rhs_code == MULT_EXPR
4437 && flag_unsafe_math_optimizations)
4438 powi_result = attempt_builtin_powi (stmt, &ops);
4440 /* If the operand vector is now empty, all operands were
4441 consumed by the __builtin_powi optimization. */
4442 if (ops.length () == 0)
4443 transform_stmt_to_copy (&gsi, stmt, powi_result);
4444 else if (ops.length () == 1)
4446 tree last_op = ops.last ()->op;
4448 if (powi_result)
4449 transform_stmt_to_multiply (&gsi, stmt, last_op,
4450 powi_result);
4451 else
4452 transform_stmt_to_copy (&gsi, stmt, last_op);
4454 else
4456 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4457 int ops_num = ops.length ();
4458 int width = get_reassociation_width (ops_num, rhs_code, mode);
4459 tree new_lhs = lhs;
4461 if (dump_file && (dump_flags & TDF_DETAILS))
4462 fprintf (dump_file,
4463 "Width = %d was chosen for reassociation\n", width);
4465 if (width > 1
4466 && ops.length () > 3)
4467 rewrite_expr_tree_parallel (stmt, width, ops);
4468 else
4470 /* When there are three operands left, we want
4471 to make sure the ones that get the double
4472 binary op are chosen wisely. */
4473 int len = ops.length ();
4474 if (len >= 3)
4475 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4477 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4478 powi_result != NULL);
4481 /* If we combined some repeated factors into a
4482 __builtin_powi call, multiply that result by the
4483 reassociated operands. */
4484 if (powi_result)
4486 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4487 tree type = TREE_TYPE (lhs);
4488 tree target_ssa = make_temp_ssa_name (type, NULL,
4489 "reassocpow");
4490 gimple_set_lhs (lhs_stmt, target_ssa);
4491 update_stmt (lhs_stmt);
4492 if (lhs != new_lhs)
4493 target_ssa = new_lhs;
4494 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4495 powi_result,
4496 target_ssa);
4497 gimple_set_location (mul_stmt, gimple_location (stmt));
4498 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4504 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4505 son;
4506 son = next_dom_son (CDI_POST_DOMINATORS, son))
4507 reassociate_bb (son);
4510 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4511 void debug_ops_vector (vec<operand_entry_t> ops);
4513 /* Dump the operand entry vector OPS to FILE. */
4515 void
4516 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4518 operand_entry_t oe;
4519 unsigned int i;
4521 FOR_EACH_VEC_ELT (ops, i, oe)
4523 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4524 print_generic_expr (file, oe->op, 0);
4528 /* Dump the operand entry vector OPS to STDERR. */
4530 DEBUG_FUNCTION void
4531 debug_ops_vector (vec<operand_entry_t> ops)
4533 dump_ops_vector (stderr, ops);
4536 static void
4537 do_reassoc (void)
4539 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4540 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4543 /* Initialize the reassociation pass. */
4545 static void
4546 init_reassoc (void)
4548 int i;
4549 long rank = 2;
4550 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4552 /* Find the loops, so that we can prevent moving calculations in
4553 them. */
4554 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4556 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4558 operand_entry_pool = create_alloc_pool ("operand entry pool",
4559 sizeof (struct operand_entry), 30);
4560 next_operand_entry_id = 0;
4562 /* Reverse RPO (Reverse Post Order) will give us something where
4563 deeper loops come later. */
4564 pre_and_rev_post_order_compute (NULL, bbs, false);
4565 bb_rank = XCNEWVEC (long, last_basic_block);
4566 operand_rank = pointer_map_create ();
4568 /* Give each default definition a distinct rank. This includes
4569 parameters and the static chain. Walk backwards over all
4570 SSA names so that we get proper rank ordering according
4571 to tree_swap_operands_p. */
4572 for (i = num_ssa_names - 1; i > 0; --i)
4574 tree name = ssa_name (i);
4575 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4576 insert_operand_rank (name, ++rank);
4579 /* Set up rank for each BB */
4580 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4581 bb_rank[bbs[i]] = ++rank << 16;
4583 free (bbs);
4584 calculate_dominance_info (CDI_POST_DOMINATORS);
4585 plus_negates = vNULL;
4588 /* Cleanup after the reassociation pass, and print stats if
4589 requested. */
4591 static void
4592 fini_reassoc (void)
4594 statistics_counter_event (cfun, "Linearized",
4595 reassociate_stats.linearized);
4596 statistics_counter_event (cfun, "Constants eliminated",
4597 reassociate_stats.constants_eliminated);
4598 statistics_counter_event (cfun, "Ops eliminated",
4599 reassociate_stats.ops_eliminated);
4600 statistics_counter_event (cfun, "Statements rewritten",
4601 reassociate_stats.rewritten);
4602 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4603 reassociate_stats.pows_encountered);
4604 statistics_counter_event (cfun, "Built-in powi calls created",
4605 reassociate_stats.pows_created);
4607 pointer_map_destroy (operand_rank);
4608 free_alloc_pool (operand_entry_pool);
4609 free (bb_rank);
4610 plus_negates.release ();
4611 free_dominance_info (CDI_POST_DOMINATORS);
4612 loop_optimizer_finalize ();
4615 /* Gate and execute functions for Reassociation. */
4617 static unsigned int
4618 execute_reassoc (void)
4620 init_reassoc ();
4622 do_reassoc ();
4623 repropagate_negates ();
4625 fini_reassoc ();
4626 return 0;
4629 static bool
4630 gate_tree_ssa_reassoc (void)
4632 return flag_tree_reassoc != 0;
4635 namespace {
4637 const pass_data pass_data_reassoc =
4639 GIMPLE_PASS, /* type */
4640 "reassoc", /* name */
4641 OPTGROUP_NONE, /* optinfo_flags */
4642 true, /* has_gate */
4643 true, /* has_execute */
4644 TV_TREE_REASSOC, /* tv_id */
4645 ( PROP_cfg | PROP_ssa ), /* properties_required */
4646 0, /* properties_provided */
4647 0, /* properties_destroyed */
4648 0, /* todo_flags_start */
4649 ( TODO_verify_ssa
4650 | TODO_update_ssa_only_virtuals
4651 | TODO_verify_flow ), /* todo_flags_finish */
4654 class pass_reassoc : public gimple_opt_pass
4656 public:
4657 pass_reassoc (gcc::context *ctxt)
4658 : gimple_opt_pass (pass_data_reassoc, ctxt)
4661 /* opt_pass methods: */
4662 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4663 bool gate () { return gate_tree_ssa_reassoc (); }
4664 unsigned int execute () { return execute_reassoc (); }
4666 }; // class pass_reassoc
4668 } // anon namespace
4670 gimple_opt_pass *
4671 make_pass_reassoc (gcc::context *ctxt)
4673 return new pass_reassoc (ctxt);