* lto-partition.c (add_symbol_to_partition_1,
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobdf8cca84a30761cc5a949e383849fca192172888
1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-inline.h"
33 #include "pointer-set.h"
34 #include "tree-ssa-alias.h"
35 #include "internal-fn.h"
36 #include "gimple-fold.h"
37 #include "tree-eh.h"
38 #include "gimple-expr.h"
39 #include "is-a.h"
40 #include "gimple.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "gimple-ssa.h"
44 #include "tree-cfg.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
51 #include "expr.h"
52 #include "tree-dfa.h"
53 #include "tree-ssa.h"
54 #include "tree-iterator.h"
55 #include "tree-pass.h"
56 #include "alloc-pool.h"
57 #include "langhooks.h"
58 #include "cfgloop.h"
59 #include "flags.h"
60 #include "target.h"
61 #include "params.h"
62 #include "diagnostic-core.h"
64 /* This is a simple global reassociation pass. It is, in part, based
65 on the LLVM pass of the same name (They do some things more/less
66 than we do, in different orders, etc).
68 It consists of five steps:
70 1. Breaking up subtract operations into addition + negate, where
71 it would promote the reassociation of adds.
73 2. Left linearization of the expression trees, so that (A+B)+(C+D)
74 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
75 During linearization, we place the operands of the binary
76 expressions into a vector of operand_entry_t
78 3. Optimization of the operand lists, eliminating things like a +
79 -a, a & a, etc.
81 3a. Combine repeated factors with the same occurrence counts
82 into a __builtin_powi call that will later be optimized into
83 an optimal number of multiplies.
85 4. Rewrite the expression trees we linearized and optimized so
86 they are in proper rank order.
88 5. Repropagate negates, as nothing else will clean it up ATM.
90 A bit of theory on #4, since nobody seems to write anything down
91 about why it makes sense to do it the way they do it:
93 We could do this much nicer theoretically, but don't (for reasons
94 explained after how to do it theoretically nice :P).
96 In order to promote the most redundancy elimination, you want
97 binary expressions whose operands are the same rank (or
98 preferably, the same value) exposed to the redundancy eliminator,
99 for possible elimination.
101 So the way to do this if we really cared, is to build the new op
102 tree from the leaves to the roots, merging as you go, and putting the
103 new op on the end of the worklist, until you are left with one
104 thing on the worklist.
106 IE if you have to rewrite the following set of operands (listed with
107 rank in parentheses), with opcode PLUS_EXPR:
109 a (1), b (1), c (1), d (2), e (2)
112 We start with our merge worklist empty, and the ops list with all of
113 those on it.
115 You want to first merge all leaves of the same rank, as much as
116 possible.
118 So first build a binary op of
120 mergetmp = a + b, and put "mergetmp" on the merge worklist.
122 Because there is no three operand form of PLUS_EXPR, c is not going to
123 be exposed to redundancy elimination as a rank 1 operand.
125 So you might as well throw it on the merge worklist (you could also
126 consider it to now be a rank two operand, and merge it with d and e,
127 but in this case, you then have evicted e from a binary op. So at
128 least in this situation, you can't win.)
130 Then build a binary op of d + e
131 mergetmp2 = d + e
133 and put mergetmp2 on the merge worklist.
135 so merge worklist = {mergetmp, c, mergetmp2}
137 Continue building binary ops of these operations until you have only
138 one operation left on the worklist.
140 So we have
142 build binary op
143 mergetmp3 = mergetmp + c
145 worklist = {mergetmp2, mergetmp3}
147 mergetmp4 = mergetmp2 + mergetmp3
149 worklist = {mergetmp4}
151 because we have one operation left, we can now just set the original
152 statement equal to the result of that operation.
154 This will at least expose a + b and d + e to redundancy elimination
155 as binary operations.
157 For extra points, you can reuse the old statements to build the
158 mergetmps, since you shouldn't run out.
160 So why don't we do this?
162 Because it's expensive, and rarely will help. Most trees we are
163 reassociating have 3 or less ops. If they have 2 ops, they already
164 will be written into a nice single binary op. If you have 3 ops, a
165 single simple check suffices to tell you whether the first two are of the
166 same rank. If so, you know to order it
168 mergetmp = op1 + op2
169 newstmt = mergetmp + op3
171 instead of
172 mergetmp = op2 + op3
173 newstmt = mergetmp + op1
175 If all three are of the same rank, you can't expose them all in a
176 single binary operator anyway, so the above is *still* the best you
177 can do.
179 Thus, this is what we do. When we have three ops left, we check to see
180 what order to put them in, and call it a day. As a nod to vector sum
181 reduction, we check if any of the ops are really a phi node that is a
182 destructive update for the associating op, and keep the destructive
183 update together for vector sum reduction recognition. */
186 /* Statistics */
187 static struct
189 int linearized;
190 int constants_eliminated;
191 int ops_eliminated;
192 int rewritten;
193 int pows_encountered;
194 int pows_created;
195 } reassociate_stats;
197 /* Operator, rank pair. */
198 typedef struct operand_entry
200 unsigned int rank;
201 int id;
202 tree op;
203 unsigned int count;
204 } *operand_entry_t;
206 static alloc_pool operand_entry_pool;
208 /* This is used to assign a unique ID to each struct operand_entry
209 so that qsort results are identical on different hosts. */
210 static int next_operand_entry_id;
212 /* Starting rank number for a given basic block, so that we can rank
213 operations using unmovable instructions in that BB based on the bb
214 depth. */
215 static long *bb_rank;
217 /* Operand->rank hashtable. */
218 static struct pointer_map_t *operand_rank;
220 /* Forward decls. */
221 static long get_rank (tree);
224 /* Bias amount for loop-carried phis. We want this to be larger than
225 the depth of any reassociation tree we can see, but not larger than
226 the rank difference between two blocks. */
227 #define PHI_LOOP_BIAS (1 << 15)
229 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
230 an innermost loop, and the phi has only a single use which is inside
231 the loop, then the rank is the block rank of the loop latch plus an
232 extra bias for the loop-carried dependence. This causes expressions
233 calculated into an accumulator variable to be independent for each
234 iteration of the loop. If STMT is some other phi, the rank is the
235 block rank of its containing block. */
236 static long
237 phi_rank (gimple stmt)
239 basic_block bb = gimple_bb (stmt);
240 struct loop *father = bb->loop_father;
241 tree res;
242 unsigned i;
243 use_operand_p use;
244 gimple use_stmt;
246 /* We only care about real loops (those with a latch). */
247 if (!father->latch)
248 return bb_rank[bb->index];
250 /* Interesting phis must be in headers of innermost loops. */
251 if (bb != father->header
252 || father->inner)
253 return bb_rank[bb->index];
255 /* Ignore virtual SSA_NAMEs. */
256 res = gimple_phi_result (stmt);
257 if (virtual_operand_p (res))
258 return bb_rank[bb->index];
260 /* The phi definition must have a single use, and that use must be
261 within the loop. Otherwise this isn't an accumulator pattern. */
262 if (!single_imm_use (res, &use, &use_stmt)
263 || gimple_bb (use_stmt)->loop_father != father)
264 return bb_rank[bb->index];
266 /* Look for phi arguments from within the loop. If found, bias this phi. */
267 for (i = 0; i < gimple_phi_num_args (stmt); i++)
269 tree arg = gimple_phi_arg_def (stmt, i);
270 if (TREE_CODE (arg) == SSA_NAME
271 && !SSA_NAME_IS_DEFAULT_DEF (arg))
273 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
274 if (gimple_bb (def_stmt)->loop_father == father)
275 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
279 /* Must be an uninteresting phi. */
280 return bb_rank[bb->index];
283 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
284 loop-carried dependence of an innermost loop, return TRUE; else
285 return FALSE. */
286 static bool
287 loop_carried_phi (tree exp)
289 gimple phi_stmt;
290 long block_rank;
292 if (TREE_CODE (exp) != SSA_NAME
293 || SSA_NAME_IS_DEFAULT_DEF (exp))
294 return false;
296 phi_stmt = SSA_NAME_DEF_STMT (exp);
298 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
299 return false;
301 /* Non-loop-carried phis have block rank. Loop-carried phis have
302 an additional bias added in. If this phi doesn't have block rank,
303 it's biased and should not be propagated. */
304 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
306 if (phi_rank (phi_stmt) != block_rank)
307 return true;
309 return false;
312 /* Return the maximum of RANK and the rank that should be propagated
313 from expression OP. For most operands, this is just the rank of OP.
314 For loop-carried phis, the value is zero to avoid undoing the bias
315 in favor of the phi. */
316 static long
317 propagate_rank (long rank, tree op)
319 long op_rank;
321 if (loop_carried_phi (op))
322 return rank;
324 op_rank = get_rank (op);
326 return MAX (rank, op_rank);
329 /* Look up the operand rank structure for expression E. */
331 static inline long
332 find_operand_rank (tree e)
334 void **slot = pointer_map_contains (operand_rank, e);
335 return slot ? (long) (intptr_t) *slot : -1;
338 /* Insert {E,RANK} into the operand rank hashtable. */
340 static inline void
341 insert_operand_rank (tree e, long rank)
343 void **slot;
344 gcc_assert (rank > 0);
345 slot = pointer_map_insert (operand_rank, e);
346 gcc_assert (!*slot);
347 *slot = (void *) (intptr_t) rank;
350 /* Given an expression E, return the rank of the expression. */
352 static long
353 get_rank (tree e)
355 /* Constants have rank 0. */
356 if (is_gimple_min_invariant (e))
357 return 0;
359 /* SSA_NAME's have the rank of the expression they are the result
361 For globals and uninitialized values, the rank is 0.
362 For function arguments, use the pre-setup rank.
363 For PHI nodes, stores, asm statements, etc, we use the rank of
364 the BB.
365 For simple operations, the rank is the maximum rank of any of
366 its operands, or the bb_rank, whichever is less.
367 I make no claims that this is optimal, however, it gives good
368 results. */
370 /* We make an exception to the normal ranking system to break
371 dependences of accumulator variables in loops. Suppose we
372 have a simple one-block loop containing:
374 x_1 = phi(x_0, x_2)
375 b = a + x_1
376 c = b + d
377 x_2 = c + e
379 As shown, each iteration of the calculation into x is fully
380 dependent upon the iteration before it. We would prefer to
381 see this in the form:
383 x_1 = phi(x_0, x_2)
384 b = a + d
385 c = b + e
386 x_2 = c + x_1
388 If the loop is unrolled, the calculations of b and c from
389 different iterations can be interleaved.
391 To obtain this result during reassociation, we bias the rank
392 of the phi definition x_1 upward, when it is recognized as an
393 accumulator pattern. The artificial rank causes it to be
394 added last, providing the desired independence. */
396 if (TREE_CODE (e) == SSA_NAME)
398 gimple stmt;
399 long rank;
400 int i, n;
401 tree op;
403 if (SSA_NAME_IS_DEFAULT_DEF (e))
404 return find_operand_rank (e);
406 stmt = SSA_NAME_DEF_STMT (e);
407 if (gimple_code (stmt) == GIMPLE_PHI)
408 return phi_rank (stmt);
410 if (!is_gimple_assign (stmt)
411 || gimple_vdef (stmt))
412 return bb_rank[gimple_bb (stmt)->index];
414 /* If we already have a rank for this expression, use that. */
415 rank = find_operand_rank (e);
416 if (rank != -1)
417 return rank;
419 /* Otherwise, find the maximum rank for the operands. As an
420 exception, remove the bias from loop-carried phis when propagating
421 the rank so that dependent operations are not also biased. */
422 rank = 0;
423 if (gimple_assign_single_p (stmt))
425 tree rhs = gimple_assign_rhs1 (stmt);
426 n = TREE_OPERAND_LENGTH (rhs);
427 if (n == 0)
428 rank = propagate_rank (rank, rhs);
429 else
431 for (i = 0; i < n; i++)
433 op = TREE_OPERAND (rhs, i);
435 if (op != NULL_TREE)
436 rank = propagate_rank (rank, op);
440 else
442 n = gimple_num_ops (stmt);
443 for (i = 1; i < n; i++)
445 op = gimple_op (stmt, i);
446 gcc_assert (op);
447 rank = propagate_rank (rank, op);
451 if (dump_file && (dump_flags & TDF_DETAILS))
453 fprintf (dump_file, "Rank for ");
454 print_generic_expr (dump_file, e, 0);
455 fprintf (dump_file, " is %ld\n", (rank + 1));
458 /* Note the rank in the hashtable so we don't recompute it. */
459 insert_operand_rank (e, (rank + 1));
460 return (rank + 1);
463 /* Globals, etc, are rank 0 */
464 return 0;
468 /* We want integer ones to end up last no matter what, since they are
469 the ones we can do the most with. */
470 #define INTEGER_CONST_TYPE 1 << 3
471 #define FLOAT_CONST_TYPE 1 << 2
472 #define OTHER_CONST_TYPE 1 << 1
474 /* Classify an invariant tree into integer, float, or other, so that
475 we can sort them to be near other constants of the same type. */
476 static inline int
477 constant_type (tree t)
479 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
480 return INTEGER_CONST_TYPE;
481 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
482 return FLOAT_CONST_TYPE;
483 else
484 return OTHER_CONST_TYPE;
487 /* qsort comparison function to sort operand entries PA and PB by rank
488 so that the sorted array is ordered by rank in decreasing order. */
489 static int
490 sort_by_operand_rank (const void *pa, const void *pb)
492 const operand_entry_t oea = *(const operand_entry_t *)pa;
493 const operand_entry_t oeb = *(const operand_entry_t *)pb;
495 /* It's nicer for optimize_expression if constants that are likely
496 to fold when added/multiplied//whatever are put next to each
497 other. Since all constants have rank 0, order them by type. */
498 if (oeb->rank == 0 && oea->rank == 0)
500 if (constant_type (oeb->op) != constant_type (oea->op))
501 return constant_type (oeb->op) - constant_type (oea->op);
502 else
503 /* To make sorting result stable, we use unique IDs to determine
504 order. */
505 return oeb->id - oea->id;
508 /* Lastly, make sure the versions that are the same go next to each
509 other. We use SSA_NAME_VERSION because it's stable. */
510 if ((oeb->rank - oea->rank == 0)
511 && TREE_CODE (oea->op) == SSA_NAME
512 && TREE_CODE (oeb->op) == SSA_NAME)
514 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
515 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
516 else
517 return oeb->id - oea->id;
520 if (oeb->rank != oea->rank)
521 return oeb->rank - oea->rank;
522 else
523 return oeb->id - oea->id;
526 /* Add an operand entry to *OPS for the tree operand OP. */
528 static void
529 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
531 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
533 oe->op = op;
534 oe->rank = get_rank (op);
535 oe->id = next_operand_entry_id++;
536 oe->count = 1;
537 ops->safe_push (oe);
540 /* Add an operand entry to *OPS for the tree operand OP with repeat
541 count REPEAT. */
543 static void
544 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
545 HOST_WIDE_INT repeat)
547 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
549 oe->op = op;
550 oe->rank = get_rank (op);
551 oe->id = next_operand_entry_id++;
552 oe->count = repeat;
553 ops->safe_push (oe);
555 reassociate_stats.pows_encountered++;
558 /* Return true if STMT is reassociable operation containing a binary
559 operation with tree code CODE, and is inside LOOP. */
561 static bool
562 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
564 basic_block bb = gimple_bb (stmt);
566 if (gimple_bb (stmt) == NULL)
567 return false;
569 if (!flow_bb_inside_loop_p (loop, bb))
570 return false;
572 if (is_gimple_assign (stmt)
573 && gimple_assign_rhs_code (stmt) == code
574 && has_single_use (gimple_assign_lhs (stmt)))
575 return true;
577 return false;
581 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
582 operand of the negate operation. Otherwise, return NULL. */
584 static tree
585 get_unary_op (tree name, enum tree_code opcode)
587 gimple stmt = SSA_NAME_DEF_STMT (name);
589 if (!is_gimple_assign (stmt))
590 return NULL_TREE;
592 if (gimple_assign_rhs_code (stmt) == opcode)
593 return gimple_assign_rhs1 (stmt);
594 return NULL_TREE;
597 /* If CURR and LAST are a pair of ops that OPCODE allows us to
598 eliminate through equivalences, do so, remove them from OPS, and
599 return true. Otherwise, return false. */
601 static bool
602 eliminate_duplicate_pair (enum tree_code opcode,
603 vec<operand_entry_t> *ops,
604 bool *all_done,
605 unsigned int i,
606 operand_entry_t curr,
607 operand_entry_t last)
610 /* If we have two of the same op, and the opcode is & |, min, or max,
611 we can eliminate one of them.
612 If we have two of the same op, and the opcode is ^, we can
613 eliminate both of them. */
615 if (last && last->op == curr->op)
617 switch (opcode)
619 case MAX_EXPR:
620 case MIN_EXPR:
621 case BIT_IOR_EXPR:
622 case BIT_AND_EXPR:
623 if (dump_file && (dump_flags & TDF_DETAILS))
625 fprintf (dump_file, "Equivalence: ");
626 print_generic_expr (dump_file, curr->op, 0);
627 fprintf (dump_file, " [&|minmax] ");
628 print_generic_expr (dump_file, last->op, 0);
629 fprintf (dump_file, " -> ");
630 print_generic_stmt (dump_file, last->op, 0);
633 ops->ordered_remove (i);
634 reassociate_stats.ops_eliminated ++;
636 return true;
638 case BIT_XOR_EXPR:
639 if (dump_file && (dump_flags & TDF_DETAILS))
641 fprintf (dump_file, "Equivalence: ");
642 print_generic_expr (dump_file, curr->op, 0);
643 fprintf (dump_file, " ^ ");
644 print_generic_expr (dump_file, last->op, 0);
645 fprintf (dump_file, " -> nothing\n");
648 reassociate_stats.ops_eliminated += 2;
650 if (ops->length () == 2)
652 ops->create (0);
653 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
654 *all_done = true;
656 else
658 ops->ordered_remove (i-1);
659 ops->ordered_remove (i-1);
662 return true;
664 default:
665 break;
668 return false;
671 static vec<tree> plus_negates;
673 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
674 expression, look in OPS for a corresponding positive operation to cancel
675 it out. If we find one, remove the other from OPS, replace
676 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
677 return false. */
679 static bool
680 eliminate_plus_minus_pair (enum tree_code opcode,
681 vec<operand_entry_t> *ops,
682 unsigned int currindex,
683 operand_entry_t curr)
685 tree negateop;
686 tree notop;
687 unsigned int i;
688 operand_entry_t oe;
690 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
691 return false;
693 negateop = get_unary_op (curr->op, NEGATE_EXPR);
694 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
695 if (negateop == NULL_TREE && notop == NULL_TREE)
696 return false;
698 /* Any non-negated version will have a rank that is one less than
699 the current rank. So once we hit those ranks, if we don't find
700 one, we can stop. */
702 for (i = currindex + 1;
703 ops->iterate (i, &oe)
704 && oe->rank >= curr->rank - 1 ;
705 i++)
707 if (oe->op == negateop)
710 if (dump_file && (dump_flags & TDF_DETAILS))
712 fprintf (dump_file, "Equivalence: ");
713 print_generic_expr (dump_file, negateop, 0);
714 fprintf (dump_file, " + -");
715 print_generic_expr (dump_file, oe->op, 0);
716 fprintf (dump_file, " -> 0\n");
719 ops->ordered_remove (i);
720 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
721 ops->ordered_remove (currindex);
722 reassociate_stats.ops_eliminated ++;
724 return true;
726 else if (oe->op == notop)
728 tree op_type = TREE_TYPE (oe->op);
730 if (dump_file && (dump_flags & TDF_DETAILS))
732 fprintf (dump_file, "Equivalence: ");
733 print_generic_expr (dump_file, notop, 0);
734 fprintf (dump_file, " + ~");
735 print_generic_expr (dump_file, oe->op, 0);
736 fprintf (dump_file, " -> -1\n");
739 ops->ordered_remove (i);
740 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
741 ops->ordered_remove (currindex);
742 reassociate_stats.ops_eliminated ++;
744 return true;
748 /* CURR->OP is a negate expr in a plus expr: save it for later
749 inspection in repropagate_negates(). */
750 if (negateop != NULL_TREE)
751 plus_negates.safe_push (curr->op);
753 return false;
756 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
757 bitwise not expression, look in OPS for a corresponding operand to
758 cancel it out. If we find one, remove the other from OPS, replace
759 OPS[CURRINDEX] with 0, and return true. Otherwise, return
760 false. */
762 static bool
763 eliminate_not_pairs (enum tree_code opcode,
764 vec<operand_entry_t> *ops,
765 unsigned int currindex,
766 operand_entry_t curr)
768 tree notop;
769 unsigned int i;
770 operand_entry_t oe;
772 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
773 || TREE_CODE (curr->op) != SSA_NAME)
774 return false;
776 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
777 if (notop == NULL_TREE)
778 return false;
780 /* Any non-not version will have a rank that is one less than
781 the current rank. So once we hit those ranks, if we don't find
782 one, we can stop. */
784 for (i = currindex + 1;
785 ops->iterate (i, &oe)
786 && oe->rank >= curr->rank - 1;
787 i++)
789 if (oe->op == notop)
791 if (dump_file && (dump_flags & TDF_DETAILS))
793 fprintf (dump_file, "Equivalence: ");
794 print_generic_expr (dump_file, notop, 0);
795 if (opcode == BIT_AND_EXPR)
796 fprintf (dump_file, " & ~");
797 else if (opcode == BIT_IOR_EXPR)
798 fprintf (dump_file, " | ~");
799 print_generic_expr (dump_file, oe->op, 0);
800 if (opcode == BIT_AND_EXPR)
801 fprintf (dump_file, " -> 0\n");
802 else if (opcode == BIT_IOR_EXPR)
803 fprintf (dump_file, " -> -1\n");
806 if (opcode == BIT_AND_EXPR)
807 oe->op = build_zero_cst (TREE_TYPE (oe->op));
808 else if (opcode == BIT_IOR_EXPR)
809 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
810 TYPE_PRECISION (TREE_TYPE (oe->op)));
812 reassociate_stats.ops_eliminated += ops->length () - 1;
813 ops->truncate (0);
814 ops->quick_push (oe);
815 return true;
819 return false;
822 /* Use constant value that may be present in OPS to try to eliminate
823 operands. Note that this function is only really used when we've
824 eliminated ops for other reasons, or merged constants. Across
825 single statements, fold already does all of this, plus more. There
826 is little point in duplicating logic, so I've only included the
827 identities that I could ever construct testcases to trigger. */
829 static void
830 eliminate_using_constants (enum tree_code opcode,
831 vec<operand_entry_t> *ops)
833 operand_entry_t oelast = ops->last ();
834 tree type = TREE_TYPE (oelast->op);
836 if (oelast->rank == 0
837 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
839 switch (opcode)
841 case BIT_AND_EXPR:
842 if (integer_zerop (oelast->op))
844 if (ops->length () != 1)
846 if (dump_file && (dump_flags & TDF_DETAILS))
847 fprintf (dump_file, "Found & 0, removing all other ops\n");
849 reassociate_stats.ops_eliminated += ops->length () - 1;
851 ops->truncate (0);
852 ops->quick_push (oelast);
853 return;
856 else if (integer_all_onesp (oelast->op))
858 if (ops->length () != 1)
860 if (dump_file && (dump_flags & TDF_DETAILS))
861 fprintf (dump_file, "Found & -1, removing\n");
862 ops->pop ();
863 reassociate_stats.ops_eliminated++;
866 break;
867 case BIT_IOR_EXPR:
868 if (integer_all_onesp (oelast->op))
870 if (ops->length () != 1)
872 if (dump_file && (dump_flags & TDF_DETAILS))
873 fprintf (dump_file, "Found | -1, removing all other ops\n");
875 reassociate_stats.ops_eliminated += ops->length () - 1;
877 ops->truncate (0);
878 ops->quick_push (oelast);
879 return;
882 else if (integer_zerop (oelast->op))
884 if (ops->length () != 1)
886 if (dump_file && (dump_flags & TDF_DETAILS))
887 fprintf (dump_file, "Found | 0, removing\n");
888 ops->pop ();
889 reassociate_stats.ops_eliminated++;
892 break;
893 case MULT_EXPR:
894 if (integer_zerop (oelast->op)
895 || (FLOAT_TYPE_P (type)
896 && !HONOR_NANS (TYPE_MODE (type))
897 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
898 && real_zerop (oelast->op)))
900 if (ops->length () != 1)
902 if (dump_file && (dump_flags & TDF_DETAILS))
903 fprintf (dump_file, "Found * 0, removing all other ops\n");
905 reassociate_stats.ops_eliminated += ops->length () - 1;
906 ops->truncate (1);
907 ops->quick_push (oelast);
908 return;
911 else if (integer_onep (oelast->op)
912 || (FLOAT_TYPE_P (type)
913 && !HONOR_SNANS (TYPE_MODE (type))
914 && real_onep (oelast->op)))
916 if (ops->length () != 1)
918 if (dump_file && (dump_flags & TDF_DETAILS))
919 fprintf (dump_file, "Found * 1, removing\n");
920 ops->pop ();
921 reassociate_stats.ops_eliminated++;
922 return;
925 break;
926 case BIT_XOR_EXPR:
927 case PLUS_EXPR:
928 case MINUS_EXPR:
929 if (integer_zerop (oelast->op)
930 || (FLOAT_TYPE_P (type)
931 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
932 && fold_real_zero_addition_p (type, oelast->op,
933 opcode == MINUS_EXPR)))
935 if (ops->length () != 1)
937 if (dump_file && (dump_flags & TDF_DETAILS))
938 fprintf (dump_file, "Found [|^+] 0, removing\n");
939 ops->pop ();
940 reassociate_stats.ops_eliminated++;
941 return;
944 break;
945 default:
946 break;
952 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
953 bool, bool);
955 /* Structure for tracking and counting operands. */
956 typedef struct oecount_s {
957 int cnt;
958 int id;
959 enum tree_code oecode;
960 tree op;
961 } oecount;
964 /* The heap for the oecount hashtable and the sorted list of operands. */
965 static vec<oecount> cvec;
968 /* Oecount hashtable helpers. */
970 struct oecount_hasher : typed_noop_remove <void>
972 /* Note that this hash table stores integers, not pointers.
973 So, observe the casting in the member functions. */
974 typedef void value_type;
975 typedef void compare_type;
976 static inline hashval_t hash (const value_type *);
977 static inline bool equal (const value_type *, const compare_type *);
980 /* Hash function for oecount. */
982 inline hashval_t
983 oecount_hasher::hash (const value_type *p)
985 const oecount *c = &cvec[(size_t)p - 42];
986 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
989 /* Comparison function for oecount. */
991 inline bool
992 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
994 const oecount *c1 = &cvec[(size_t)p1 - 42];
995 const oecount *c2 = &cvec[(size_t)p2 - 42];
996 return (c1->oecode == c2->oecode
997 && c1->op == c2->op);
1000 /* Comparison function for qsort sorting oecount elements by count. */
1002 static int
1003 oecount_cmp (const void *p1, const void *p2)
1005 const oecount *c1 = (const oecount *)p1;
1006 const oecount *c2 = (const oecount *)p2;
1007 if (c1->cnt != c2->cnt)
1008 return c1->cnt - c2->cnt;
1009 else
1010 /* If counts are identical, use unique IDs to stabilize qsort. */
1011 return c1->id - c2->id;
1014 /* Return TRUE iff STMT represents a builtin call that raises OP
1015 to some exponent. */
1017 static bool
1018 stmt_is_power_of_op (gimple stmt, tree op)
1020 tree fndecl;
1022 if (!is_gimple_call (stmt))
1023 return false;
1025 fndecl = gimple_call_fndecl (stmt);
1027 if (!fndecl
1028 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1029 return false;
1031 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1033 CASE_FLT_FN (BUILT_IN_POW):
1034 CASE_FLT_FN (BUILT_IN_POWI):
1035 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1037 default:
1038 return false;
1042 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1043 in place and return the result. Assumes that stmt_is_power_of_op
1044 was previously called for STMT and returned TRUE. */
1046 static HOST_WIDE_INT
1047 decrement_power (gimple stmt)
1049 REAL_VALUE_TYPE c, cint;
1050 HOST_WIDE_INT power;
1051 tree arg1;
1053 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1055 CASE_FLT_FN (BUILT_IN_POW):
1056 arg1 = gimple_call_arg (stmt, 1);
1057 c = TREE_REAL_CST (arg1);
1058 power = real_to_integer (&c) - 1;
1059 real_from_integer (&cint, VOIDmode, power, 0, 0);
1060 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1061 return power;
1063 CASE_FLT_FN (BUILT_IN_POWI):
1064 arg1 = gimple_call_arg (stmt, 1);
1065 power = TREE_INT_CST_LOW (arg1) - 1;
1066 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1067 return power;
1069 default:
1070 gcc_unreachable ();
1074 /* Find the single immediate use of STMT's LHS, and replace it
1075 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1076 replace *DEF with OP as well. */
1078 static void
1079 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1081 tree lhs;
1082 gimple use_stmt;
1083 use_operand_p use;
1084 gimple_stmt_iterator gsi;
1086 if (is_gimple_call (stmt))
1087 lhs = gimple_call_lhs (stmt);
1088 else
1089 lhs = gimple_assign_lhs (stmt);
1091 gcc_assert (has_single_use (lhs));
1092 single_imm_use (lhs, &use, &use_stmt);
1093 if (lhs == *def)
1094 *def = op;
1095 SET_USE (use, op);
1096 if (TREE_CODE (op) != SSA_NAME)
1097 update_stmt (use_stmt);
1098 gsi = gsi_for_stmt (stmt);
1099 unlink_stmt_vdef (stmt);
1100 gsi_remove (&gsi, true);
1101 release_defs (stmt);
1104 /* Walks the linear chain with result *DEF searching for an operation
1105 with operand OP and code OPCODE removing that from the chain. *DEF
1106 is updated if there is only one operand but no operation left. */
1108 static void
1109 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1111 gimple stmt = SSA_NAME_DEF_STMT (*def);
1115 tree name;
1117 if (opcode == MULT_EXPR
1118 && stmt_is_power_of_op (stmt, op))
1120 if (decrement_power (stmt) == 1)
1121 propagate_op_to_single_use (op, stmt, def);
1122 return;
1125 name = gimple_assign_rhs1 (stmt);
1127 /* If this is the operation we look for and one of the operands
1128 is ours simply propagate the other operand into the stmts
1129 single use. */
1130 if (gimple_assign_rhs_code (stmt) == opcode
1131 && (name == op
1132 || gimple_assign_rhs2 (stmt) == op))
1134 if (name == op)
1135 name = gimple_assign_rhs2 (stmt);
1136 propagate_op_to_single_use (name, stmt, def);
1137 return;
1140 /* We might have a multiply of two __builtin_pow* calls, and
1141 the operand might be hiding in the rightmost one. */
1142 if (opcode == MULT_EXPR
1143 && gimple_assign_rhs_code (stmt) == opcode
1144 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1145 && has_single_use (gimple_assign_rhs2 (stmt)))
1147 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1148 if (stmt_is_power_of_op (stmt2, op))
1150 if (decrement_power (stmt2) == 1)
1151 propagate_op_to_single_use (op, stmt2, def);
1152 return;
1156 /* Continue walking the chain. */
1157 gcc_assert (name != op
1158 && TREE_CODE (name) == SSA_NAME);
1159 stmt = SSA_NAME_DEF_STMT (name);
1161 while (1);
1164 /* Returns true if statement S1 dominates statement S2. Like
1165 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1167 static bool
1168 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1170 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1172 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1173 SSA_NAME. Assume it lives at the beginning of function and
1174 thus dominates everything. */
1175 if (!bb1 || s1 == s2)
1176 return true;
1178 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1179 if (!bb2)
1180 return false;
1182 if (bb1 == bb2)
1184 /* PHIs in the same basic block are assumed to be
1185 executed all in parallel, if only one stmt is a PHI,
1186 it dominates the other stmt in the same basic block. */
1187 if (gimple_code (s1) == GIMPLE_PHI)
1188 return true;
1190 if (gimple_code (s2) == GIMPLE_PHI)
1191 return false;
1193 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1195 if (gimple_uid (s1) < gimple_uid (s2))
1196 return true;
1198 if (gimple_uid (s1) > gimple_uid (s2))
1199 return false;
1201 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1202 unsigned int uid = gimple_uid (s1);
1203 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1205 gimple s = gsi_stmt (gsi);
1206 if (gimple_uid (s) != uid)
1207 break;
1208 if (s == s2)
1209 return true;
1212 return false;
1215 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1218 /* Insert STMT after INSERT_POINT. */
1220 static void
1221 insert_stmt_after (gimple stmt, gimple insert_point)
1223 gimple_stmt_iterator gsi;
1224 basic_block bb;
1226 if (gimple_code (insert_point) == GIMPLE_PHI)
1227 bb = gimple_bb (insert_point);
1228 else if (!stmt_ends_bb_p (insert_point))
1230 gsi = gsi_for_stmt (insert_point);
1231 gimple_set_uid (stmt, gimple_uid (insert_point));
1232 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1233 return;
1235 else
1236 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1237 thus if it must end a basic block, it should be a call that can
1238 throw, or some assignment that can throw. If it throws, the LHS
1239 of it will not be initialized though, so only valid places using
1240 the SSA_NAME should be dominated by the fallthru edge. */
1241 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1242 gsi = gsi_after_labels (bb);
1243 if (gsi_end_p (gsi))
1245 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1246 gimple_set_uid (stmt,
1247 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1249 else
1250 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1251 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1254 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1255 the result. Places the statement after the definition of either
1256 OP1 or OP2. Returns the new statement. */
1258 static gimple
1259 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1261 gimple op1def = NULL, op2def = NULL;
1262 gimple_stmt_iterator gsi;
1263 tree op;
1264 gimple sum;
1266 /* Create the addition statement. */
1267 op = make_ssa_name (type, NULL);
1268 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1270 /* Find an insertion place and insert. */
1271 if (TREE_CODE (op1) == SSA_NAME)
1272 op1def = SSA_NAME_DEF_STMT (op1);
1273 if (TREE_CODE (op2) == SSA_NAME)
1274 op2def = SSA_NAME_DEF_STMT (op2);
1275 if ((!op1def || gimple_nop_p (op1def))
1276 && (!op2def || gimple_nop_p (op2def)))
1278 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1279 if (gsi_end_p (gsi))
1281 gimple_stmt_iterator gsi2
1282 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1283 gimple_set_uid (sum,
1284 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1286 else
1287 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1288 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1290 else
1292 gimple insert_point;
1293 if ((!op1def || gimple_nop_p (op1def))
1294 || (op2def && !gimple_nop_p (op2def)
1295 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1296 insert_point = op2def;
1297 else
1298 insert_point = op1def;
1299 insert_stmt_after (sum, insert_point);
1301 update_stmt (sum);
1303 return sum;
1306 /* Perform un-distribution of divisions and multiplications.
1307 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1308 to (A + B) / X for real X.
1310 The algorithm is organized as follows.
1312 - First we walk the addition chain *OPS looking for summands that
1313 are defined by a multiplication or a real division. This results
1314 in the candidates bitmap with relevant indices into *OPS.
1316 - Second we build the chains of multiplications or divisions for
1317 these candidates, counting the number of occurrences of (operand, code)
1318 pairs in all of the candidates chains.
1320 - Third we sort the (operand, code) pairs by number of occurrence and
1321 process them starting with the pair with the most uses.
1323 * For each such pair we walk the candidates again to build a
1324 second candidate bitmap noting all multiplication/division chains
1325 that have at least one occurrence of (operand, code).
1327 * We build an alternate addition chain only covering these
1328 candidates with one (operand, code) operation removed from their
1329 multiplication/division chain.
1331 * The first candidate gets replaced by the alternate addition chain
1332 multiplied/divided by the operand.
1334 * All candidate chains get disabled for further processing and
1335 processing of (operand, code) pairs continues.
1337 The alternate addition chains built are re-processed by the main
1338 reassociation algorithm which allows optimizing a * x * y + b * y * x
1339 to (a + b ) * x * y in one invocation of the reassociation pass. */
1341 static bool
1342 undistribute_ops_list (enum tree_code opcode,
1343 vec<operand_entry_t> *ops, struct loop *loop)
1345 unsigned int length = ops->length ();
1346 operand_entry_t oe1;
1347 unsigned i, j;
1348 sbitmap candidates, candidates2;
1349 unsigned nr_candidates, nr_candidates2;
1350 sbitmap_iterator sbi0;
1351 vec<operand_entry_t> *subops;
1352 hash_table <oecount_hasher> ctable;
1353 bool changed = false;
1354 int next_oecount_id = 0;
1356 if (length <= 1
1357 || opcode != PLUS_EXPR)
1358 return false;
1360 /* Build a list of candidates to process. */
1361 candidates = sbitmap_alloc (length);
1362 bitmap_clear (candidates);
1363 nr_candidates = 0;
1364 FOR_EACH_VEC_ELT (*ops, i, oe1)
1366 enum tree_code dcode;
1367 gimple oe1def;
1369 if (TREE_CODE (oe1->op) != SSA_NAME)
1370 continue;
1371 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1372 if (!is_gimple_assign (oe1def))
1373 continue;
1374 dcode = gimple_assign_rhs_code (oe1def);
1375 if ((dcode != MULT_EXPR
1376 && dcode != RDIV_EXPR)
1377 || !is_reassociable_op (oe1def, dcode, loop))
1378 continue;
1380 bitmap_set_bit (candidates, i);
1381 nr_candidates++;
1384 if (nr_candidates < 2)
1386 sbitmap_free (candidates);
1387 return false;
1390 if (dump_file && (dump_flags & TDF_DETAILS))
1392 fprintf (dump_file, "searching for un-distribute opportunities ");
1393 print_generic_expr (dump_file,
1394 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1395 fprintf (dump_file, " %d\n", nr_candidates);
1398 /* Build linearized sub-operand lists and the counting table. */
1399 cvec.create (0);
1400 ctable.create (15);
1401 /* ??? Macro arguments cannot have multi-argument template types in
1402 them. This typedef is needed to workaround that limitation. */
1403 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1404 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1405 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1407 gimple oedef;
1408 enum tree_code oecode;
1409 unsigned j;
1411 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1412 oecode = gimple_assign_rhs_code (oedef);
1413 linearize_expr_tree (&subops[i], oedef,
1414 associative_tree_code (oecode), false);
1416 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1418 oecount c;
1419 void **slot;
1420 size_t idx;
1421 c.oecode = oecode;
1422 c.cnt = 1;
1423 c.id = next_oecount_id++;
1424 c.op = oe1->op;
1425 cvec.safe_push (c);
1426 idx = cvec.length () + 41;
1427 slot = ctable.find_slot ((void *)idx, INSERT);
1428 if (!*slot)
1430 *slot = (void *)idx;
1432 else
1434 cvec.pop ();
1435 cvec[(size_t)*slot - 42].cnt++;
1439 ctable.dispose ();
1441 /* Sort the counting table. */
1442 cvec.qsort (oecount_cmp);
1444 if (dump_file && (dump_flags & TDF_DETAILS))
1446 oecount *c;
1447 fprintf (dump_file, "Candidates:\n");
1448 FOR_EACH_VEC_ELT (cvec, j, c)
1450 fprintf (dump_file, " %u %s: ", c->cnt,
1451 c->oecode == MULT_EXPR
1452 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1453 print_generic_expr (dump_file, c->op, 0);
1454 fprintf (dump_file, "\n");
1458 /* Process the (operand, code) pairs in order of most occurrence. */
1459 candidates2 = sbitmap_alloc (length);
1460 while (!cvec.is_empty ())
1462 oecount *c = &cvec.last ();
1463 if (c->cnt < 2)
1464 break;
1466 /* Now collect the operands in the outer chain that contain
1467 the common operand in their inner chain. */
1468 bitmap_clear (candidates2);
1469 nr_candidates2 = 0;
1470 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1472 gimple oedef;
1473 enum tree_code oecode;
1474 unsigned j;
1475 tree op = (*ops)[i]->op;
1477 /* If we undistributed in this chain already this may be
1478 a constant. */
1479 if (TREE_CODE (op) != SSA_NAME)
1480 continue;
1482 oedef = SSA_NAME_DEF_STMT (op);
1483 oecode = gimple_assign_rhs_code (oedef);
1484 if (oecode != c->oecode)
1485 continue;
1487 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1489 if (oe1->op == c->op)
1491 bitmap_set_bit (candidates2, i);
1492 ++nr_candidates2;
1493 break;
1498 if (nr_candidates2 >= 2)
1500 operand_entry_t oe1, oe2;
1501 gimple prod;
1502 int first = bitmap_first_set_bit (candidates2);
1504 /* Build the new addition chain. */
1505 oe1 = (*ops)[first];
1506 if (dump_file && (dump_flags & TDF_DETAILS))
1508 fprintf (dump_file, "Building (");
1509 print_generic_expr (dump_file, oe1->op, 0);
1511 zero_one_operation (&oe1->op, c->oecode, c->op);
1512 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1514 gimple sum;
1515 oe2 = (*ops)[i];
1516 if (dump_file && (dump_flags & TDF_DETAILS))
1518 fprintf (dump_file, " + ");
1519 print_generic_expr (dump_file, oe2->op, 0);
1521 zero_one_operation (&oe2->op, c->oecode, c->op);
1522 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1523 oe1->op, oe2->op, opcode);
1524 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1525 oe2->rank = 0;
1526 oe1->op = gimple_get_lhs (sum);
1529 /* Apply the multiplication/division. */
1530 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1531 oe1->op, c->op, c->oecode);
1532 if (dump_file && (dump_flags & TDF_DETAILS))
1534 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1535 print_generic_expr (dump_file, c->op, 0);
1536 fprintf (dump_file, "\n");
1539 /* Record it in the addition chain and disable further
1540 undistribution with this op. */
1541 oe1->op = gimple_assign_lhs (prod);
1542 oe1->rank = get_rank (oe1->op);
1543 subops[first].release ();
1545 changed = true;
1548 cvec.pop ();
1551 for (i = 0; i < ops->length (); ++i)
1552 subops[i].release ();
1553 free (subops);
1554 cvec.release ();
1555 sbitmap_free (candidates);
1556 sbitmap_free (candidates2);
1558 return changed;
1561 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1562 expression, examine the other OPS to see if any of them are comparisons
1563 of the same values, which we may be able to combine or eliminate.
1564 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1566 static bool
1567 eliminate_redundant_comparison (enum tree_code opcode,
1568 vec<operand_entry_t> *ops,
1569 unsigned int currindex,
1570 operand_entry_t curr)
1572 tree op1, op2;
1573 enum tree_code lcode, rcode;
1574 gimple def1, def2;
1575 int i;
1576 operand_entry_t oe;
1578 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1579 return false;
1581 /* Check that CURR is a comparison. */
1582 if (TREE_CODE (curr->op) != SSA_NAME)
1583 return false;
1584 def1 = SSA_NAME_DEF_STMT (curr->op);
1585 if (!is_gimple_assign (def1))
1586 return false;
1587 lcode = gimple_assign_rhs_code (def1);
1588 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1589 return false;
1590 op1 = gimple_assign_rhs1 (def1);
1591 op2 = gimple_assign_rhs2 (def1);
1593 /* Now look for a similar comparison in the remaining OPS. */
1594 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1596 tree t;
1598 if (TREE_CODE (oe->op) != SSA_NAME)
1599 continue;
1600 def2 = SSA_NAME_DEF_STMT (oe->op);
1601 if (!is_gimple_assign (def2))
1602 continue;
1603 rcode = gimple_assign_rhs_code (def2);
1604 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1605 continue;
1607 /* If we got here, we have a match. See if we can combine the
1608 two comparisons. */
1609 if (opcode == BIT_IOR_EXPR)
1610 t = maybe_fold_or_comparisons (lcode, op1, op2,
1611 rcode, gimple_assign_rhs1 (def2),
1612 gimple_assign_rhs2 (def2));
1613 else
1614 t = maybe_fold_and_comparisons (lcode, op1, op2,
1615 rcode, gimple_assign_rhs1 (def2),
1616 gimple_assign_rhs2 (def2));
1617 if (!t)
1618 continue;
1620 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1621 always give us a boolean_type_node value back. If the original
1622 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1623 we need to convert. */
1624 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1625 t = fold_convert (TREE_TYPE (curr->op), t);
1627 if (TREE_CODE (t) != INTEGER_CST
1628 && !operand_equal_p (t, curr->op, 0))
1630 enum tree_code subcode;
1631 tree newop1, newop2;
1632 if (!COMPARISON_CLASS_P (t))
1633 continue;
1634 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1635 STRIP_USELESS_TYPE_CONVERSION (newop1);
1636 STRIP_USELESS_TYPE_CONVERSION (newop2);
1637 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1638 continue;
1641 if (dump_file && (dump_flags & TDF_DETAILS))
1643 fprintf (dump_file, "Equivalence: ");
1644 print_generic_expr (dump_file, curr->op, 0);
1645 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1646 print_generic_expr (dump_file, oe->op, 0);
1647 fprintf (dump_file, " -> ");
1648 print_generic_expr (dump_file, t, 0);
1649 fprintf (dump_file, "\n");
1652 /* Now we can delete oe, as it has been subsumed by the new combined
1653 expression t. */
1654 ops->ordered_remove (i);
1655 reassociate_stats.ops_eliminated ++;
1657 /* If t is the same as curr->op, we're done. Otherwise we must
1658 replace curr->op with t. Special case is if we got a constant
1659 back, in which case we add it to the end instead of in place of
1660 the current entry. */
1661 if (TREE_CODE (t) == INTEGER_CST)
1663 ops->ordered_remove (currindex);
1664 add_to_ops_vec (ops, t);
1666 else if (!operand_equal_p (t, curr->op, 0))
1668 gimple sum;
1669 enum tree_code subcode;
1670 tree newop1;
1671 tree newop2;
1672 gcc_assert (COMPARISON_CLASS_P (t));
1673 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1674 STRIP_USELESS_TYPE_CONVERSION (newop1);
1675 STRIP_USELESS_TYPE_CONVERSION (newop2);
1676 gcc_checking_assert (is_gimple_val (newop1)
1677 && is_gimple_val (newop2));
1678 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1679 curr->op = gimple_get_lhs (sum);
1681 return true;
1684 return false;
1687 /* Perform various identities and other optimizations on the list of
1688 operand entries, stored in OPS. The tree code for the binary
1689 operation between all the operands is OPCODE. */
1691 static void
1692 optimize_ops_list (enum tree_code opcode,
1693 vec<operand_entry_t> *ops)
1695 unsigned int length = ops->length ();
1696 unsigned int i;
1697 operand_entry_t oe;
1698 operand_entry_t oelast = NULL;
1699 bool iterate = false;
1701 if (length == 1)
1702 return;
1704 oelast = ops->last ();
1706 /* If the last two are constants, pop the constants off, merge them
1707 and try the next two. */
1708 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1710 operand_entry_t oelm1 = (*ops)[length - 2];
1712 if (oelm1->rank == 0
1713 && is_gimple_min_invariant (oelm1->op)
1714 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1715 TREE_TYPE (oelast->op)))
1717 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1718 oelm1->op, oelast->op);
1720 if (folded && is_gimple_min_invariant (folded))
1722 if (dump_file && (dump_flags & TDF_DETAILS))
1723 fprintf (dump_file, "Merging constants\n");
1725 ops->pop ();
1726 ops->pop ();
1728 add_to_ops_vec (ops, folded);
1729 reassociate_stats.constants_eliminated++;
1731 optimize_ops_list (opcode, ops);
1732 return;
1737 eliminate_using_constants (opcode, ops);
1738 oelast = NULL;
1740 for (i = 0; ops->iterate (i, &oe);)
1742 bool done = false;
1744 if (eliminate_not_pairs (opcode, ops, i, oe))
1745 return;
1746 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1747 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1748 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1750 if (done)
1751 return;
1752 iterate = true;
1753 oelast = NULL;
1754 continue;
1756 oelast = oe;
1757 i++;
1760 length = ops->length ();
1761 oelast = ops->last ();
1763 if (iterate)
1764 optimize_ops_list (opcode, ops);
1767 /* The following functions are subroutines to optimize_range_tests and allow
1768 it to try to change a logical combination of comparisons into a range
1769 test.
1771 For example, both
1772 X == 2 || X == 5 || X == 3 || X == 4
1774 X >= 2 && X <= 5
1775 are converted to
1776 (unsigned) (X - 2) <= 3
1778 For more information see comments above fold_test_range in fold-const.c,
1779 this implementation is for GIMPLE. */
1781 struct range_entry
1783 tree exp;
1784 tree low;
1785 tree high;
1786 bool in_p;
1787 bool strict_overflow_p;
1788 unsigned int idx, next;
1791 /* This is similar to make_range in fold-const.c, but on top of
1792 GIMPLE instead of trees. If EXP is non-NULL, it should be
1793 an SSA_NAME and STMT argument is ignored, otherwise STMT
1794 argument should be a GIMPLE_COND. */
1796 static void
1797 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1799 int in_p;
1800 tree low, high;
1801 bool is_bool, strict_overflow_p;
1803 r->exp = NULL_TREE;
1804 r->in_p = false;
1805 r->strict_overflow_p = false;
1806 r->low = NULL_TREE;
1807 r->high = NULL_TREE;
1808 if (exp != NULL_TREE
1809 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1810 return;
1812 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1813 and see if we can refine the range. Some of the cases below may not
1814 happen, but it doesn't seem worth worrying about this. We "continue"
1815 the outer loop when we've changed something; otherwise we "break"
1816 the switch, which will "break" the while. */
1817 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1818 high = low;
1819 in_p = 0;
1820 strict_overflow_p = false;
1821 is_bool = false;
1822 if (exp == NULL_TREE)
1823 is_bool = true;
1824 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1826 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1827 is_bool = true;
1828 else
1829 return;
1831 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1832 is_bool = true;
1834 while (1)
1836 enum tree_code code;
1837 tree arg0, arg1, exp_type;
1838 tree nexp;
1839 location_t loc;
1841 if (exp != NULL_TREE)
1843 if (TREE_CODE (exp) != SSA_NAME
1844 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1845 break;
1847 stmt = SSA_NAME_DEF_STMT (exp);
1848 if (!is_gimple_assign (stmt))
1849 break;
1851 code = gimple_assign_rhs_code (stmt);
1852 arg0 = gimple_assign_rhs1 (stmt);
1853 arg1 = gimple_assign_rhs2 (stmt);
1854 exp_type = TREE_TYPE (exp);
1856 else
1858 code = gimple_cond_code (stmt);
1859 arg0 = gimple_cond_lhs (stmt);
1860 arg1 = gimple_cond_rhs (stmt);
1861 exp_type = boolean_type_node;
1864 if (TREE_CODE (arg0) != SSA_NAME)
1865 break;
1866 loc = gimple_location (stmt);
1867 switch (code)
1869 case BIT_NOT_EXPR:
1870 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1871 /* Ensure the range is either +[-,0], +[0,0],
1872 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1873 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1874 or similar expression of unconditional true or
1875 false, it should not be negated. */
1876 && ((high && integer_zerop (high))
1877 || (low && integer_onep (low))))
1879 in_p = !in_p;
1880 exp = arg0;
1881 continue;
1883 break;
1884 case SSA_NAME:
1885 exp = arg0;
1886 continue;
1887 CASE_CONVERT:
1888 if (is_bool)
1889 goto do_default;
1890 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1892 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1893 is_bool = true;
1894 else
1895 return;
1897 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1898 is_bool = true;
1899 goto do_default;
1900 case EQ_EXPR:
1901 case NE_EXPR:
1902 case LT_EXPR:
1903 case LE_EXPR:
1904 case GE_EXPR:
1905 case GT_EXPR:
1906 is_bool = true;
1907 /* FALLTHRU */
1908 default:
1909 if (!is_bool)
1910 return;
1911 do_default:
1912 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1913 &low, &high, &in_p,
1914 &strict_overflow_p);
1915 if (nexp != NULL_TREE)
1917 exp = nexp;
1918 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1919 continue;
1921 break;
1923 break;
1925 if (is_bool)
1927 r->exp = exp;
1928 r->in_p = in_p;
1929 r->low = low;
1930 r->high = high;
1931 r->strict_overflow_p = strict_overflow_p;
1935 /* Comparison function for qsort. Sort entries
1936 without SSA_NAME exp first, then with SSA_NAMEs sorted
1937 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1938 by increasing ->low and if ->low is the same, by increasing
1939 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1940 maximum. */
1942 static int
1943 range_entry_cmp (const void *a, const void *b)
1945 const struct range_entry *p = (const struct range_entry *) a;
1946 const struct range_entry *q = (const struct range_entry *) b;
1948 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1950 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1952 /* Group range_entries for the same SSA_NAME together. */
1953 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1954 return -1;
1955 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1956 return 1;
1957 /* If ->low is different, NULL low goes first, then by
1958 ascending low. */
1959 if (p->low != NULL_TREE)
1961 if (q->low != NULL_TREE)
1963 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1964 p->low, q->low);
1965 if (tem && integer_onep (tem))
1966 return -1;
1967 tem = fold_binary (GT_EXPR, boolean_type_node,
1968 p->low, q->low);
1969 if (tem && integer_onep (tem))
1970 return 1;
1972 else
1973 return 1;
1975 else if (q->low != NULL_TREE)
1976 return -1;
1977 /* If ->high is different, NULL high goes last, before that by
1978 ascending high. */
1979 if (p->high != NULL_TREE)
1981 if (q->high != NULL_TREE)
1983 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1984 p->high, q->high);
1985 if (tem && integer_onep (tem))
1986 return -1;
1987 tem = fold_binary (GT_EXPR, boolean_type_node,
1988 p->high, q->high);
1989 if (tem && integer_onep (tem))
1990 return 1;
1992 else
1993 return -1;
1995 else if (p->high != NULL_TREE)
1996 return 1;
1997 /* If both ranges are the same, sort below by ascending idx. */
1999 else
2000 return 1;
2002 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2003 return -1;
2005 if (p->idx < q->idx)
2006 return -1;
2007 else
2009 gcc_checking_assert (p->idx > q->idx);
2010 return 1;
2014 /* Helper routine of optimize_range_test.
2015 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2016 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2017 OPCODE and OPS are arguments of optimize_range_tests. Return
2018 true if the range merge has been successful.
2019 If OPCODE is ERROR_MARK, this is called from within
2020 maybe_optimize_range_tests and is performing inter-bb range optimization.
2021 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2022 oe->rank. */
2024 static bool
2025 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2026 unsigned int count, enum tree_code opcode,
2027 vec<operand_entry_t> *ops, tree exp, bool in_p,
2028 tree low, tree high, bool strict_overflow_p)
2030 operand_entry_t oe = (*ops)[range->idx];
2031 tree op = oe->op;
2032 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2033 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2034 location_t loc = gimple_location (stmt);
2035 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2036 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2037 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2038 gimple_stmt_iterator gsi;
2040 if (tem == NULL_TREE)
2041 return false;
2043 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2044 warning_at (loc, OPT_Wstrict_overflow,
2045 "assuming signed overflow does not occur "
2046 "when simplifying range test");
2048 if (dump_file && (dump_flags & TDF_DETAILS))
2050 struct range_entry *r;
2051 fprintf (dump_file, "Optimizing range tests ");
2052 print_generic_expr (dump_file, range->exp, 0);
2053 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2054 print_generic_expr (dump_file, range->low, 0);
2055 fprintf (dump_file, ", ");
2056 print_generic_expr (dump_file, range->high, 0);
2057 fprintf (dump_file, "]");
2058 for (r = otherrange; r < otherrange + count; r++)
2060 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2061 print_generic_expr (dump_file, r->low, 0);
2062 fprintf (dump_file, ", ");
2063 print_generic_expr (dump_file, r->high, 0);
2064 fprintf (dump_file, "]");
2066 fprintf (dump_file, "\n into ");
2067 print_generic_expr (dump_file, tem, 0);
2068 fprintf (dump_file, "\n");
2071 if (opcode == BIT_IOR_EXPR
2072 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2073 tem = invert_truthvalue_loc (loc, tem);
2075 tem = fold_convert_loc (loc, optype, tem);
2076 gsi = gsi_for_stmt (stmt);
2077 /* In rare cases range->exp can be equal to lhs of stmt.
2078 In that case we have to insert after the stmt rather then before
2079 it. */
2080 if (op == range->exp)
2081 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2082 GSI_CONTINUE_LINKING);
2083 else
2085 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2086 GSI_SAME_STMT);
2087 gsi_prev (&gsi);
2089 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2090 if (gimple_uid (gsi_stmt (gsi)))
2091 break;
2092 else
2093 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2095 oe->op = tem;
2096 range->exp = exp;
2097 range->low = low;
2098 range->high = high;
2099 range->in_p = in_p;
2100 range->strict_overflow_p = false;
2102 for (range = otherrange; range < otherrange + count; range++)
2104 oe = (*ops)[range->idx];
2105 /* Now change all the other range test immediate uses, so that
2106 those tests will be optimized away. */
2107 if (opcode == ERROR_MARK)
2109 if (oe->op)
2110 oe->op = build_int_cst (TREE_TYPE (oe->op),
2111 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2112 else
2113 oe->op = (oe->rank == BIT_IOR_EXPR
2114 ? boolean_false_node : boolean_true_node);
2116 else
2117 oe->op = error_mark_node;
2118 range->exp = NULL_TREE;
2120 return true;
2123 /* Optimize X == CST1 || X == CST2
2124 if popcount (CST1 ^ CST2) == 1 into
2125 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2126 Similarly for ranges. E.g.
2127 X != 2 && X != 3 && X != 10 && X != 11
2128 will be transformed by the previous optimization into
2129 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2130 and this loop can transform that into
2131 !(((X & ~8) - 2U) <= 1U). */
2133 static bool
2134 optimize_range_tests_xor (enum tree_code opcode, tree type,
2135 tree lowi, tree lowj, tree highi, tree highj,
2136 vec<operand_entry_t> *ops,
2137 struct range_entry *rangei,
2138 struct range_entry *rangej)
2140 tree lowxor, highxor, tem, exp;
2141 /* Check highi ^ lowi == highj ^ lowj and
2142 popcount (highi ^ lowi) == 1. */
2143 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2144 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2145 return false;
2146 if (tree_log2 (lowxor) < 0)
2147 return false;
2148 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2149 if (!tree_int_cst_equal (lowxor, highxor))
2150 return false;
2152 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2153 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2154 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2155 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2156 if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2157 rangei->in_p, lowj, highj,
2158 rangei->strict_overflow_p
2159 || rangej->strict_overflow_p))
2160 return true;
2161 return false;
2164 /* Optimize X == CST1 || X == CST2
2165 if popcount (CST2 - CST1) == 1 into
2166 ((X - CST1) & ~(CST2 - CST1)) == 0.
2167 Similarly for ranges. E.g.
2168 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2169 || X == 75 || X == 45
2170 will be transformed by the previous optimization into
2171 (X - 43U) <= 3U || (X - 75U) <= 3U
2172 and this loop can transform that into
2173 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2174 static bool
2175 optimize_range_tests_diff (enum tree_code opcode, tree type,
2176 tree lowi, tree lowj, tree highi, tree highj,
2177 vec<operand_entry_t> *ops,
2178 struct range_entry *rangei,
2179 struct range_entry *rangej)
2181 tree tem1, tem2, mask;
2182 /* Check highi - lowi == highj - lowj. */
2183 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2184 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2185 return false;
2186 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2187 if (!tree_int_cst_equal (tem1, tem2))
2188 return false;
2189 /* Check popcount (lowj - lowi) == 1. */
2190 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2191 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2192 return false;
2193 if (tree_log2 (tem1) < 0)
2194 return false;
2196 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2197 tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2198 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2199 lowj = build_int_cst (type, 0);
2200 if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2201 rangei->in_p, lowj, tem2,
2202 rangei->strict_overflow_p
2203 || rangej->strict_overflow_p))
2204 return true;
2205 return false;
2208 /* It does some common checks for function optimize_range_tests_xor and
2209 optimize_range_tests_diff.
2210 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2211 Else it calls optimize_range_tests_diff. */
2213 static bool
2214 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2215 bool optimize_xor, vec<operand_entry_t> *ops,
2216 struct range_entry *ranges)
2218 int i, j;
2219 bool any_changes = false;
2220 for (i = first; i < length; i++)
2222 tree lowi, highi, lowj, highj, type, tem;
2224 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2225 continue;
2226 type = TREE_TYPE (ranges[i].exp);
2227 if (!INTEGRAL_TYPE_P (type))
2228 continue;
2229 lowi = ranges[i].low;
2230 if (lowi == NULL_TREE)
2231 lowi = TYPE_MIN_VALUE (type);
2232 highi = ranges[i].high;
2233 if (highi == NULL_TREE)
2234 continue;
2235 for (j = i + 1; j < length && j < i + 64; j++)
2237 bool changes;
2238 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2239 continue;
2240 lowj = ranges[j].low;
2241 if (lowj == NULL_TREE)
2242 continue;
2243 highj = ranges[j].high;
2244 if (highj == NULL_TREE)
2245 highj = TYPE_MAX_VALUE (type);
2246 /* Check lowj > highi. */
2247 tem = fold_binary (GT_EXPR, boolean_type_node,
2248 lowj, highi);
2249 if (tem == NULL_TREE || !integer_onep (tem))
2250 continue;
2251 if (optimize_xor)
2252 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2253 highi, highj, ops,
2254 ranges + i, ranges + j);
2255 else
2256 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2257 highi, highj, ops,
2258 ranges + i, ranges + j);
2259 if (changes)
2261 any_changes = true;
2262 break;
2266 return any_changes;
2269 /* Optimize range tests, similarly how fold_range_test optimizes
2270 it on trees. The tree code for the binary
2271 operation between all the operands is OPCODE.
2272 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2273 maybe_optimize_range_tests for inter-bb range optimization.
2274 In that case if oe->op is NULL, oe->id is bb->index whose
2275 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2276 the actual opcode. */
2278 static bool
2279 optimize_range_tests (enum tree_code opcode,
2280 vec<operand_entry_t> *ops)
2282 unsigned int length = ops->length (), i, j, first;
2283 operand_entry_t oe;
2284 struct range_entry *ranges;
2285 bool any_changes = false;
2287 if (length == 1)
2288 return false;
2290 ranges = XNEWVEC (struct range_entry, length);
2291 for (i = 0; i < length; i++)
2293 oe = (*ops)[i];
2294 ranges[i].idx = i;
2295 init_range_entry (ranges + i, oe->op,
2296 oe->op ? NULL :
2297 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2298 /* For | invert it now, we will invert it again before emitting
2299 the optimized expression. */
2300 if (opcode == BIT_IOR_EXPR
2301 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2302 ranges[i].in_p = !ranges[i].in_p;
2305 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2306 for (i = 0; i < length; i++)
2307 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2308 break;
2310 /* Try to merge ranges. */
2311 for (first = i; i < length; i++)
2313 tree low = ranges[i].low;
2314 tree high = ranges[i].high;
2315 int in_p = ranges[i].in_p;
2316 bool strict_overflow_p = ranges[i].strict_overflow_p;
2317 int update_fail_count = 0;
2319 for (j = i + 1; j < length; j++)
2321 if (ranges[i].exp != ranges[j].exp)
2322 break;
2323 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2324 ranges[j].in_p, ranges[j].low, ranges[j].high))
2325 break;
2326 strict_overflow_p |= ranges[j].strict_overflow_p;
2329 if (j == i + 1)
2330 continue;
2332 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2333 ops, ranges[i].exp, in_p, low, high,
2334 strict_overflow_p))
2336 i = j - 1;
2337 any_changes = true;
2339 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2340 while update_range_test would fail. */
2341 else if (update_fail_count == 64)
2342 i = j - 1;
2343 else
2344 ++update_fail_count;
2347 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2348 ops, ranges);
2350 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2351 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2352 ops, ranges);
2354 if (any_changes && opcode != ERROR_MARK)
2356 j = 0;
2357 FOR_EACH_VEC_ELT (*ops, i, oe)
2359 if (oe->op == error_mark_node)
2360 continue;
2361 else if (i != j)
2362 (*ops)[j] = oe;
2363 j++;
2365 ops->truncate (j);
2368 XDELETEVEC (ranges);
2369 return any_changes;
2372 /* Return true if STMT is a cast like:
2373 <bb N>:
2375 _123 = (int) _234;
2377 <bb M>:
2378 # _345 = PHI <_123(N), 1(...), 1(...)>
2379 where _234 has bool type, _123 has single use and
2380 bb N has a single successor M. This is commonly used in
2381 the last block of a range test. */
2383 static bool
2384 final_range_test_p (gimple stmt)
2386 basic_block bb, rhs_bb;
2387 edge e;
2388 tree lhs, rhs;
2389 use_operand_p use_p;
2390 gimple use_stmt;
2392 if (!gimple_assign_cast_p (stmt))
2393 return false;
2394 bb = gimple_bb (stmt);
2395 if (!single_succ_p (bb))
2396 return false;
2397 e = single_succ_edge (bb);
2398 if (e->flags & EDGE_COMPLEX)
2399 return false;
2401 lhs = gimple_assign_lhs (stmt);
2402 rhs = gimple_assign_rhs1 (stmt);
2403 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2404 || TREE_CODE (rhs) != SSA_NAME
2405 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2406 return false;
2408 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2409 if (!single_imm_use (lhs, &use_p, &use_stmt))
2410 return false;
2412 if (gimple_code (use_stmt) != GIMPLE_PHI
2413 || gimple_bb (use_stmt) != e->dest)
2414 return false;
2416 /* And that the rhs is defined in the same loop. */
2417 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2418 if (rhs_bb == NULL
2419 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2420 return false;
2422 return true;
2425 /* Return true if BB is suitable basic block for inter-bb range test
2426 optimization. If BACKWARD is true, BB should be the only predecessor
2427 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2428 or compared with to find a common basic block to which all conditions
2429 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2430 be the only predecessor of BB. */
2432 static bool
2433 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2434 bool backward)
2436 edge_iterator ei, ei2;
2437 edge e, e2;
2438 gimple stmt;
2439 gimple_stmt_iterator gsi;
2440 bool other_edge_seen = false;
2441 bool is_cond;
2443 if (test_bb == bb)
2444 return false;
2445 /* Check last stmt first. */
2446 stmt = last_stmt (bb);
2447 if (stmt == NULL
2448 || (gimple_code (stmt) != GIMPLE_COND
2449 && (backward || !final_range_test_p (stmt)))
2450 || gimple_visited_p (stmt)
2451 || stmt_could_throw_p (stmt)
2452 || *other_bb == bb)
2453 return false;
2454 is_cond = gimple_code (stmt) == GIMPLE_COND;
2455 if (is_cond)
2457 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2458 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2459 to *OTHER_BB (if not set yet, try to find it out). */
2460 if (EDGE_COUNT (bb->succs) != 2)
2461 return false;
2462 FOR_EACH_EDGE (e, ei, bb->succs)
2464 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2465 return false;
2466 if (e->dest == test_bb)
2468 if (backward)
2469 continue;
2470 else
2471 return false;
2473 if (e->dest == bb)
2474 return false;
2475 if (*other_bb == NULL)
2477 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2478 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2479 return false;
2480 else if (e->dest == e2->dest)
2481 *other_bb = e->dest;
2482 if (*other_bb == NULL)
2483 return false;
2485 if (e->dest == *other_bb)
2486 other_edge_seen = true;
2487 else if (backward)
2488 return false;
2490 if (*other_bb == NULL || !other_edge_seen)
2491 return false;
2493 else if (single_succ (bb) != *other_bb)
2494 return false;
2496 /* Now check all PHIs of *OTHER_BB. */
2497 e = find_edge (bb, *other_bb);
2498 e2 = find_edge (test_bb, *other_bb);
2499 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2501 gimple phi = gsi_stmt (gsi);
2502 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2503 corresponding to BB and TEST_BB predecessor must be the same. */
2504 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2505 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2507 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2508 one of the PHIs should have the lhs of the last stmt in
2509 that block as PHI arg and that PHI should have 0 or 1
2510 corresponding to it in all other range test basic blocks
2511 considered. */
2512 if (!is_cond)
2514 if (gimple_phi_arg_def (phi, e->dest_idx)
2515 == gimple_assign_lhs (stmt)
2516 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2517 || integer_onep (gimple_phi_arg_def (phi,
2518 e2->dest_idx))))
2519 continue;
2521 else
2523 gimple test_last = last_stmt (test_bb);
2524 if (gimple_code (test_last) != GIMPLE_COND
2525 && gimple_phi_arg_def (phi, e2->dest_idx)
2526 == gimple_assign_lhs (test_last)
2527 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2528 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2529 continue;
2532 return false;
2535 return true;
2538 /* Return true if BB doesn't have side-effects that would disallow
2539 range test optimization, all SSA_NAMEs set in the bb are consumed
2540 in the bb and there are no PHIs. */
2542 static bool
2543 no_side_effect_bb (basic_block bb)
2545 gimple_stmt_iterator gsi;
2546 gimple last;
2548 if (!gimple_seq_empty_p (phi_nodes (bb)))
2549 return false;
2550 last = last_stmt (bb);
2551 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2553 gimple stmt = gsi_stmt (gsi);
2554 tree lhs;
2555 imm_use_iterator imm_iter;
2556 use_operand_p use_p;
2558 if (is_gimple_debug (stmt))
2559 continue;
2560 if (gimple_has_side_effects (stmt))
2561 return false;
2562 if (stmt == last)
2563 return true;
2564 if (!is_gimple_assign (stmt))
2565 return false;
2566 lhs = gimple_assign_lhs (stmt);
2567 if (TREE_CODE (lhs) != SSA_NAME)
2568 return false;
2569 if (gimple_assign_rhs_could_trap_p (stmt))
2570 return false;
2571 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2573 gimple use_stmt = USE_STMT (use_p);
2574 if (is_gimple_debug (use_stmt))
2575 continue;
2576 if (gimple_bb (use_stmt) != bb)
2577 return false;
2580 return false;
2583 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2584 return true and fill in *OPS recursively. */
2586 static bool
2587 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2588 struct loop *loop)
2590 gimple stmt = SSA_NAME_DEF_STMT (var);
2591 tree rhs[2];
2592 int i;
2594 if (!is_reassociable_op (stmt, code, loop))
2595 return false;
2597 rhs[0] = gimple_assign_rhs1 (stmt);
2598 rhs[1] = gimple_assign_rhs2 (stmt);
2599 gimple_set_visited (stmt, true);
2600 for (i = 0; i < 2; i++)
2601 if (TREE_CODE (rhs[i]) == SSA_NAME
2602 && !get_ops (rhs[i], code, ops, loop)
2603 && has_single_use (rhs[i]))
2605 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2607 oe->op = rhs[i];
2608 oe->rank = code;
2609 oe->id = 0;
2610 oe->count = 1;
2611 ops->safe_push (oe);
2613 return true;
2616 /* Find the ops that were added by get_ops starting from VAR, see if
2617 they were changed during update_range_test and if yes, create new
2618 stmts. */
2620 static tree
2621 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2622 unsigned int *pidx, struct loop *loop)
2624 gimple stmt = SSA_NAME_DEF_STMT (var);
2625 tree rhs[4];
2626 int i;
2628 if (!is_reassociable_op (stmt, code, loop))
2629 return NULL;
2631 rhs[0] = gimple_assign_rhs1 (stmt);
2632 rhs[1] = gimple_assign_rhs2 (stmt);
2633 rhs[2] = rhs[0];
2634 rhs[3] = rhs[1];
2635 for (i = 0; i < 2; i++)
2636 if (TREE_CODE (rhs[i]) == SSA_NAME)
2638 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2639 if (rhs[2 + i] == NULL_TREE)
2641 if (has_single_use (rhs[i]))
2642 rhs[2 + i] = ops[(*pidx)++]->op;
2643 else
2644 rhs[2 + i] = rhs[i];
2647 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2648 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2650 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2651 var = make_ssa_name (TREE_TYPE (var), NULL);
2652 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2653 var, rhs[2], rhs[3]);
2654 gimple_set_uid (g, gimple_uid (stmt));
2655 gimple_set_visited (g, true);
2656 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2658 return var;
2661 /* Structure to track the initial value passed to get_ops and
2662 the range in the ops vector for each basic block. */
2664 struct inter_bb_range_test_entry
2666 tree op;
2667 unsigned int first_idx, last_idx;
2670 /* Inter-bb range test optimization. */
2672 static void
2673 maybe_optimize_range_tests (gimple stmt)
2675 basic_block first_bb = gimple_bb (stmt);
2676 basic_block last_bb = first_bb;
2677 basic_block other_bb = NULL;
2678 basic_block bb;
2679 edge_iterator ei;
2680 edge e;
2681 auto_vec<operand_entry_t> ops;
2682 auto_vec<inter_bb_range_test_entry> bbinfo;
2683 bool any_changes = false;
2685 /* Consider only basic blocks that end with GIMPLE_COND or
2686 a cast statement satisfying final_range_test_p. All
2687 but the last bb in the first_bb .. last_bb range
2688 should end with GIMPLE_COND. */
2689 if (gimple_code (stmt) == GIMPLE_COND)
2691 if (EDGE_COUNT (first_bb->succs) != 2)
2692 return;
2694 else if (final_range_test_p (stmt))
2695 other_bb = single_succ (first_bb);
2696 else
2697 return;
2699 if (stmt_could_throw_p (stmt))
2700 return;
2702 /* As relative ordering of post-dominator sons isn't fixed,
2703 maybe_optimize_range_tests can be called first on any
2704 bb in the range we want to optimize. So, start searching
2705 backwards, if first_bb can be set to a predecessor. */
2706 while (single_pred_p (first_bb))
2708 basic_block pred_bb = single_pred (first_bb);
2709 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2710 break;
2711 if (!no_side_effect_bb (first_bb))
2712 break;
2713 first_bb = pred_bb;
2715 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2716 Before starting forward search in last_bb successors, find
2717 out the other_bb. */
2718 if (first_bb == last_bb)
2720 other_bb = NULL;
2721 /* As non-GIMPLE_COND last stmt always terminates the range,
2722 if forward search didn't discover anything, just give up. */
2723 if (gimple_code (stmt) != GIMPLE_COND)
2724 return;
2725 /* Look at both successors. Either it ends with a GIMPLE_COND
2726 and satisfies suitable_cond_bb, or ends with a cast and
2727 other_bb is that cast's successor. */
2728 FOR_EACH_EDGE (e, ei, first_bb->succs)
2729 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2730 || e->dest == first_bb)
2731 return;
2732 else if (single_pred_p (e->dest))
2734 stmt = last_stmt (e->dest);
2735 if (stmt
2736 && gimple_code (stmt) == GIMPLE_COND
2737 && EDGE_COUNT (e->dest->succs) == 2)
2739 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2740 break;
2741 else
2742 other_bb = NULL;
2744 else if (stmt
2745 && final_range_test_p (stmt)
2746 && find_edge (first_bb, single_succ (e->dest)))
2748 other_bb = single_succ (e->dest);
2749 if (other_bb == first_bb)
2750 other_bb = NULL;
2753 if (other_bb == NULL)
2754 return;
2756 /* Now do the forward search, moving last_bb to successor bbs
2757 that aren't other_bb. */
2758 while (EDGE_COUNT (last_bb->succs) == 2)
2760 FOR_EACH_EDGE (e, ei, last_bb->succs)
2761 if (e->dest != other_bb)
2762 break;
2763 if (e == NULL)
2764 break;
2765 if (!single_pred_p (e->dest))
2766 break;
2767 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2768 break;
2769 if (!no_side_effect_bb (e->dest))
2770 break;
2771 last_bb = e->dest;
2773 if (first_bb == last_bb)
2774 return;
2775 /* Here basic blocks first_bb through last_bb's predecessor
2776 end with GIMPLE_COND, all of them have one of the edges to
2777 other_bb and another to another block in the range,
2778 all blocks except first_bb don't have side-effects and
2779 last_bb ends with either GIMPLE_COND, or cast satisfying
2780 final_range_test_p. */
2781 for (bb = last_bb; ; bb = single_pred (bb))
2783 enum tree_code code;
2784 tree lhs, rhs;
2785 inter_bb_range_test_entry bb_ent;
2787 bb_ent.op = NULL_TREE;
2788 bb_ent.first_idx = ops.length ();
2789 bb_ent.last_idx = bb_ent.first_idx;
2790 e = find_edge (bb, other_bb);
2791 stmt = last_stmt (bb);
2792 gimple_set_visited (stmt, true);
2793 if (gimple_code (stmt) != GIMPLE_COND)
2795 use_operand_p use_p;
2796 gimple phi;
2797 edge e2;
2798 unsigned int d;
2800 lhs = gimple_assign_lhs (stmt);
2801 rhs = gimple_assign_rhs1 (stmt);
2802 gcc_assert (bb == last_bb);
2804 /* stmt is
2805 _123 = (int) _234;
2807 followed by:
2808 <bb M>:
2809 # _345 = PHI <_123(N), 1(...), 1(...)>
2811 or 0 instead of 1. If it is 0, the _234
2812 range test is anded together with all the
2813 other range tests, if it is 1, it is ored with
2814 them. */
2815 single_imm_use (lhs, &use_p, &phi);
2816 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2817 e2 = find_edge (first_bb, other_bb);
2818 d = e2->dest_idx;
2819 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2820 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2821 code = BIT_AND_EXPR;
2822 else
2824 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2825 code = BIT_IOR_EXPR;
2828 /* If _234 SSA_NAME_DEF_STMT is
2829 _234 = _567 | _789;
2830 (or &, corresponding to 1/0 in the phi arguments,
2831 push into ops the individual range test arguments
2832 of the bitwise or resp. and, recursively. */
2833 if (!get_ops (rhs, code, &ops,
2834 loop_containing_stmt (stmt))
2835 && has_single_use (rhs))
2837 /* Otherwise, push the _234 range test itself. */
2838 operand_entry_t oe
2839 = (operand_entry_t) pool_alloc (operand_entry_pool);
2841 oe->op = rhs;
2842 oe->rank = code;
2843 oe->id = 0;
2844 oe->count = 1;
2845 ops.safe_push (oe);
2846 bb_ent.last_idx++;
2848 else
2849 bb_ent.last_idx = ops.length ();
2850 bb_ent.op = rhs;
2851 bbinfo.safe_push (bb_ent);
2852 continue;
2854 /* Otherwise stmt is GIMPLE_COND. */
2855 code = gimple_cond_code (stmt);
2856 lhs = gimple_cond_lhs (stmt);
2857 rhs = gimple_cond_rhs (stmt);
2858 if (TREE_CODE (lhs) == SSA_NAME
2859 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2860 && ((code != EQ_EXPR && code != NE_EXPR)
2861 || rhs != boolean_false_node
2862 /* Either push into ops the individual bitwise
2863 or resp. and operands, depending on which
2864 edge is other_bb. */
2865 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2866 ^ (code == EQ_EXPR))
2867 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2868 loop_containing_stmt (stmt))))
2870 /* Or push the GIMPLE_COND stmt itself. */
2871 operand_entry_t oe
2872 = (operand_entry_t) pool_alloc (operand_entry_pool);
2874 oe->op = NULL;
2875 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2876 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2877 /* oe->op = NULL signs that there is no SSA_NAME
2878 for the range test, and oe->id instead is the
2879 basic block number, at which's end the GIMPLE_COND
2880 is. */
2881 oe->id = bb->index;
2882 oe->count = 1;
2883 ops.safe_push (oe);
2884 bb_ent.op = NULL;
2885 bb_ent.last_idx++;
2887 else if (ops.length () > bb_ent.first_idx)
2889 bb_ent.op = lhs;
2890 bb_ent.last_idx = ops.length ();
2892 bbinfo.safe_push (bb_ent);
2893 if (bb == first_bb)
2894 break;
2896 if (ops.length () > 1)
2897 any_changes = optimize_range_tests (ERROR_MARK, &ops);
2898 if (any_changes)
2900 unsigned int idx;
2901 /* update_ops relies on has_single_use predicates returning the
2902 same values as it did during get_ops earlier. Additionally it
2903 never removes statements, only adds new ones and it should walk
2904 from the single imm use and check the predicate already before
2905 making those changes.
2906 On the other side, the handling of GIMPLE_COND directly can turn
2907 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2908 it needs to be done in a separate loop afterwards. */
2909 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2911 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2912 && bbinfo[idx].op != NULL_TREE)
2914 tree new_op;
2916 stmt = last_stmt (bb);
2917 new_op = update_ops (bbinfo[idx].op,
2918 (enum tree_code)
2919 ops[bbinfo[idx].first_idx]->rank,
2920 ops, &bbinfo[idx].first_idx,
2921 loop_containing_stmt (stmt));
2922 if (new_op == NULL_TREE)
2924 gcc_assert (bb == last_bb);
2925 new_op = ops[bbinfo[idx].first_idx++]->op;
2927 if (bbinfo[idx].op != new_op)
2929 imm_use_iterator iter;
2930 use_operand_p use_p;
2931 gimple use_stmt, cast_stmt = NULL;
2933 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2934 if (is_gimple_debug (use_stmt))
2935 continue;
2936 else if (gimple_code (use_stmt) == GIMPLE_COND
2937 || gimple_code (use_stmt) == GIMPLE_PHI)
2938 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2939 SET_USE (use_p, new_op);
2940 else if (gimple_assign_cast_p (use_stmt))
2941 cast_stmt = use_stmt;
2942 else
2943 gcc_unreachable ();
2944 if (cast_stmt)
2946 gcc_assert (bb == last_bb);
2947 tree lhs = gimple_assign_lhs (cast_stmt);
2948 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2949 enum tree_code rhs_code
2950 = gimple_assign_rhs_code (cast_stmt);
2951 gimple g;
2952 if (is_gimple_min_invariant (new_op))
2954 new_op = fold_convert (TREE_TYPE (lhs), new_op);
2955 g = gimple_build_assign (new_lhs, new_op);
2957 else
2958 g = gimple_build_assign_with_ops (rhs_code, new_lhs,
2959 new_op, NULL_TREE);
2960 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2961 gimple_set_uid (g, gimple_uid (cast_stmt));
2962 gimple_set_visited (g, true);
2963 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2964 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2965 if (is_gimple_debug (use_stmt))
2966 continue;
2967 else if (gimple_code (use_stmt) == GIMPLE_COND
2968 || gimple_code (use_stmt) == GIMPLE_PHI)
2969 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2970 SET_USE (use_p, new_lhs);
2971 else
2972 gcc_unreachable ();
2976 if (bb == first_bb)
2977 break;
2979 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2981 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2982 && bbinfo[idx].op == NULL_TREE
2983 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
2985 stmt = last_stmt (bb);
2986 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
2987 gimple_cond_make_false (stmt);
2988 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
2989 gimple_cond_make_true (stmt);
2990 else
2992 gimple_cond_set_code (stmt, NE_EXPR);
2993 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
2994 gimple_cond_set_rhs (stmt, boolean_false_node);
2996 update_stmt (stmt);
2998 if (bb == first_bb)
2999 break;
3004 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3005 of STMT in it's operands. This is also known as a "destructive
3006 update" operation. */
3008 static bool
3009 is_phi_for_stmt (gimple stmt, tree operand)
3011 gimple def_stmt;
3012 tree lhs;
3013 use_operand_p arg_p;
3014 ssa_op_iter i;
3016 if (TREE_CODE (operand) != SSA_NAME)
3017 return false;
3019 lhs = gimple_assign_lhs (stmt);
3021 def_stmt = SSA_NAME_DEF_STMT (operand);
3022 if (gimple_code (def_stmt) != GIMPLE_PHI)
3023 return false;
3025 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3026 if (lhs == USE_FROM_PTR (arg_p))
3027 return true;
3028 return false;
3031 /* Remove def stmt of VAR if VAR has zero uses and recurse
3032 on rhs1 operand if so. */
3034 static void
3035 remove_visited_stmt_chain (tree var)
3037 gimple stmt;
3038 gimple_stmt_iterator gsi;
3040 while (1)
3042 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3043 return;
3044 stmt = SSA_NAME_DEF_STMT (var);
3045 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3047 var = gimple_assign_rhs1 (stmt);
3048 gsi = gsi_for_stmt (stmt);
3049 gsi_remove (&gsi, true);
3050 release_defs (stmt);
3052 else
3053 return;
3057 /* This function checks three consequtive operands in
3058 passed operands vector OPS starting from OPINDEX and
3059 swaps two operands if it is profitable for binary operation
3060 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3062 We pair ops with the same rank if possible.
3064 The alternative we try is to see if STMT is a destructive
3065 update style statement, which is like:
3066 b = phi (a, ...)
3067 a = c + b;
3068 In that case, we want to use the destructive update form to
3069 expose the possible vectorizer sum reduction opportunity.
3070 In that case, the third operand will be the phi node. This
3071 check is not performed if STMT is null.
3073 We could, of course, try to be better as noted above, and do a
3074 lot of work to try to find these opportunities in >3 operand
3075 cases, but it is unlikely to be worth it. */
3077 static void
3078 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3079 unsigned int opindex, gimple stmt)
3081 operand_entry_t oe1, oe2, oe3;
3083 oe1 = ops[opindex];
3084 oe2 = ops[opindex + 1];
3085 oe3 = ops[opindex + 2];
3087 if ((oe1->rank == oe2->rank
3088 && oe2->rank != oe3->rank)
3089 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3090 && !is_phi_for_stmt (stmt, oe1->op)
3091 && !is_phi_for_stmt (stmt, oe2->op)))
3093 struct operand_entry temp = *oe3;
3094 oe3->op = oe1->op;
3095 oe3->rank = oe1->rank;
3096 oe1->op = temp.op;
3097 oe1->rank= temp.rank;
3099 else if ((oe1->rank == oe3->rank
3100 && oe2->rank != oe3->rank)
3101 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3102 && !is_phi_for_stmt (stmt, oe1->op)
3103 && !is_phi_for_stmt (stmt, oe3->op)))
3105 struct operand_entry temp = *oe2;
3106 oe2->op = oe1->op;
3107 oe2->rank = oe1->rank;
3108 oe1->op = temp.op;
3109 oe1->rank = temp.rank;
3113 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3114 two definitions, otherwise return STMT. */
3116 static inline gimple
3117 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3119 if (TREE_CODE (rhs1) == SSA_NAME
3120 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3121 stmt = SSA_NAME_DEF_STMT (rhs1);
3122 if (TREE_CODE (rhs2) == SSA_NAME
3123 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3124 stmt = SSA_NAME_DEF_STMT (rhs2);
3125 return stmt;
3128 /* Recursively rewrite our linearized statements so that the operators
3129 match those in OPS[OPINDEX], putting the computation in rank
3130 order. Return new lhs. */
3132 static tree
3133 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3134 vec<operand_entry_t> ops, bool changed)
3136 tree rhs1 = gimple_assign_rhs1 (stmt);
3137 tree rhs2 = gimple_assign_rhs2 (stmt);
3138 tree lhs = gimple_assign_lhs (stmt);
3139 operand_entry_t oe;
3141 /* The final recursion case for this function is that you have
3142 exactly two operations left.
3143 If we had one exactly one op in the entire list to start with, we
3144 would have never called this function, and the tail recursion
3145 rewrites them one at a time. */
3146 if (opindex + 2 == ops.length ())
3148 operand_entry_t oe1, oe2;
3150 oe1 = ops[opindex];
3151 oe2 = ops[opindex + 1];
3153 if (rhs1 != oe1->op || rhs2 != oe2->op)
3155 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3156 unsigned int uid = gimple_uid (stmt);
3158 if (dump_file && (dump_flags & TDF_DETAILS))
3160 fprintf (dump_file, "Transforming ");
3161 print_gimple_stmt (dump_file, stmt, 0, 0);
3164 if (changed)
3166 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3167 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3168 stmt
3169 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3170 lhs, oe1->op, oe2->op);
3171 gimple_set_uid (stmt, uid);
3172 gimple_set_visited (stmt, true);
3173 if (insert_point == gsi_stmt (gsi))
3174 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3175 else
3176 insert_stmt_after (stmt, insert_point);
3178 else
3180 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3181 == stmt);
3182 gimple_assign_set_rhs1 (stmt, oe1->op);
3183 gimple_assign_set_rhs2 (stmt, oe2->op);
3184 update_stmt (stmt);
3187 if (rhs1 != oe1->op && rhs1 != oe2->op)
3188 remove_visited_stmt_chain (rhs1);
3190 if (dump_file && (dump_flags & TDF_DETAILS))
3192 fprintf (dump_file, " into ");
3193 print_gimple_stmt (dump_file, stmt, 0, 0);
3196 return lhs;
3199 /* If we hit here, we should have 3 or more ops left. */
3200 gcc_assert (opindex + 2 < ops.length ());
3202 /* Rewrite the next operator. */
3203 oe = ops[opindex];
3205 /* Recurse on the LHS of the binary operator, which is guaranteed to
3206 be the non-leaf side. */
3207 tree new_rhs1
3208 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3209 changed || oe->op != rhs2);
3211 if (oe->op != rhs2 || new_rhs1 != rhs1)
3213 if (dump_file && (dump_flags & TDF_DETAILS))
3215 fprintf (dump_file, "Transforming ");
3216 print_gimple_stmt (dump_file, stmt, 0, 0);
3219 /* If changed is false, this is either opindex == 0
3220 or all outer rhs2's were equal to corresponding oe->op,
3221 and powi_result is NULL.
3222 That means lhs is equivalent before and after reassociation.
3223 Otherwise ensure the old lhs SSA_NAME is not reused and
3224 create a new stmt as well, so that any debug stmts will be
3225 properly adjusted. */
3226 if (changed)
3228 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3229 unsigned int uid = gimple_uid (stmt);
3230 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3232 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3233 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3234 lhs, new_rhs1, oe->op);
3235 gimple_set_uid (stmt, uid);
3236 gimple_set_visited (stmt, true);
3237 if (insert_point == gsi_stmt (gsi))
3238 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3239 else
3240 insert_stmt_after (stmt, insert_point);
3242 else
3244 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3245 == stmt);
3246 gimple_assign_set_rhs1 (stmt, new_rhs1);
3247 gimple_assign_set_rhs2 (stmt, oe->op);
3248 update_stmt (stmt);
3251 if (dump_file && (dump_flags & TDF_DETAILS))
3253 fprintf (dump_file, " into ");
3254 print_gimple_stmt (dump_file, stmt, 0, 0);
3257 return lhs;
3260 /* Find out how many cycles we need to compute statements chain.
3261 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3262 maximum number of independent statements we may execute per cycle. */
3264 static int
3265 get_required_cycles (int ops_num, int cpu_width)
3267 int res;
3268 int elog;
3269 unsigned int rest;
3271 /* While we have more than 2 * cpu_width operands
3272 we may reduce number of operands by cpu_width
3273 per cycle. */
3274 res = ops_num / (2 * cpu_width);
3276 /* Remained operands count may be reduced twice per cycle
3277 until we have only one operand. */
3278 rest = (unsigned)(ops_num - res * cpu_width);
3279 elog = exact_log2 (rest);
3280 if (elog >= 0)
3281 res += elog;
3282 else
3283 res += floor_log2 (rest) + 1;
3285 return res;
3288 /* Returns an optimal number of registers to use for computation of
3289 given statements. */
3291 static int
3292 get_reassociation_width (int ops_num, enum tree_code opc,
3293 enum machine_mode mode)
3295 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3296 int width;
3297 int width_min;
3298 int cycles_best;
3300 if (param_width > 0)
3301 width = param_width;
3302 else
3303 width = targetm.sched.reassociation_width (opc, mode);
3305 if (width == 1)
3306 return width;
3308 /* Get the minimal time required for sequence computation. */
3309 cycles_best = get_required_cycles (ops_num, width);
3311 /* Check if we may use less width and still compute sequence for
3312 the same time. It will allow us to reduce registers usage.
3313 get_required_cycles is monotonically increasing with lower width
3314 so we can perform a binary search for the minimal width that still
3315 results in the optimal cycle count. */
3316 width_min = 1;
3317 while (width > width_min)
3319 int width_mid = (width + width_min) / 2;
3321 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3322 width = width_mid;
3323 else if (width_min < width_mid)
3324 width_min = width_mid;
3325 else
3326 break;
3329 return width;
3332 /* Recursively rewrite our linearized statements so that the operators
3333 match those in OPS[OPINDEX], putting the computation in rank
3334 order and trying to allow operations to be executed in
3335 parallel. */
3337 static void
3338 rewrite_expr_tree_parallel (gimple stmt, int width,
3339 vec<operand_entry_t> ops)
3341 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3342 int op_num = ops.length ();
3343 int stmt_num = op_num - 1;
3344 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3345 int op_index = op_num - 1;
3346 int stmt_index = 0;
3347 int ready_stmts_end = 0;
3348 int i = 0;
3349 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3351 /* We start expression rewriting from the top statements.
3352 So, in this loop we create a full list of statements
3353 we will work with. */
3354 stmts[stmt_num - 1] = stmt;
3355 for (i = stmt_num - 2; i >= 0; i--)
3356 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3358 for (i = 0; i < stmt_num; i++)
3360 tree op1, op2;
3362 /* Determine whether we should use results of
3363 already handled statements or not. */
3364 if (ready_stmts_end == 0
3365 && (i - stmt_index >= width || op_index < 1))
3366 ready_stmts_end = i;
3368 /* Now we choose operands for the next statement. Non zero
3369 value in ready_stmts_end means here that we should use
3370 the result of already generated statements as new operand. */
3371 if (ready_stmts_end > 0)
3373 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3374 if (ready_stmts_end > stmt_index)
3375 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3376 else if (op_index >= 0)
3377 op2 = ops[op_index--]->op;
3378 else
3380 gcc_assert (stmt_index < i);
3381 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3384 if (stmt_index >= ready_stmts_end)
3385 ready_stmts_end = 0;
3387 else
3389 if (op_index > 1)
3390 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3391 op2 = ops[op_index--]->op;
3392 op1 = ops[op_index--]->op;
3395 /* If we emit the last statement then we should put
3396 operands into the last statement. It will also
3397 break the loop. */
3398 if (op_index < 0 && stmt_index == i)
3399 i = stmt_num - 1;
3401 if (dump_file && (dump_flags & TDF_DETAILS))
3403 fprintf (dump_file, "Transforming ");
3404 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3407 /* We keep original statement only for the last one. All
3408 others are recreated. */
3409 if (i == stmt_num - 1)
3411 gimple_assign_set_rhs1 (stmts[i], op1);
3412 gimple_assign_set_rhs2 (stmts[i], op2);
3413 update_stmt (stmts[i]);
3415 else
3416 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3418 if (dump_file && (dump_flags & TDF_DETAILS))
3420 fprintf (dump_file, " into ");
3421 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3425 remove_visited_stmt_chain (last_rhs1);
3428 /* Transform STMT, which is really (A +B) + (C + D) into the left
3429 linear form, ((A+B)+C)+D.
3430 Recurse on D if necessary. */
3432 static void
3433 linearize_expr (gimple stmt)
3435 gimple_stmt_iterator gsi;
3436 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3437 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3438 gimple oldbinrhs = binrhs;
3439 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3440 gimple newbinrhs = NULL;
3441 struct loop *loop = loop_containing_stmt (stmt);
3442 tree lhs = gimple_assign_lhs (stmt);
3444 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3445 && is_reassociable_op (binrhs, rhscode, loop));
3447 gsi = gsi_for_stmt (stmt);
3449 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3450 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3451 make_ssa_name (TREE_TYPE (lhs), NULL),
3452 gimple_assign_lhs (binlhs),
3453 gimple_assign_rhs2 (binrhs));
3454 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3455 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3456 gimple_set_uid (binrhs, gimple_uid (stmt));
3458 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3459 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3461 if (dump_file && (dump_flags & TDF_DETAILS))
3463 fprintf (dump_file, "Linearized: ");
3464 print_gimple_stmt (dump_file, stmt, 0, 0);
3467 reassociate_stats.linearized++;
3468 update_stmt (stmt);
3470 gsi = gsi_for_stmt (oldbinrhs);
3471 gsi_remove (&gsi, true);
3472 release_defs (oldbinrhs);
3474 gimple_set_visited (stmt, true);
3475 gimple_set_visited (binlhs, true);
3476 gimple_set_visited (binrhs, true);
3478 /* Tail recurse on the new rhs if it still needs reassociation. */
3479 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3480 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3481 want to change the algorithm while converting to tuples. */
3482 linearize_expr (stmt);
3485 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3486 it. Otherwise, return NULL. */
3488 static gimple
3489 get_single_immediate_use (tree lhs)
3491 use_operand_p immuse;
3492 gimple immusestmt;
3494 if (TREE_CODE (lhs) == SSA_NAME
3495 && single_imm_use (lhs, &immuse, &immusestmt)
3496 && is_gimple_assign (immusestmt))
3497 return immusestmt;
3499 return NULL;
3502 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3503 representing the negated value. Insertions of any necessary
3504 instructions go before GSI.
3505 This function is recursive in that, if you hand it "a_5" as the
3506 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3507 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3509 static tree
3510 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3512 gimple negatedefstmt = NULL;
3513 tree resultofnegate;
3514 gimple_stmt_iterator gsi;
3515 unsigned int uid;
3517 /* If we are trying to negate a name, defined by an add, negate the
3518 add operands instead. */
3519 if (TREE_CODE (tonegate) == SSA_NAME)
3520 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3521 if (TREE_CODE (tonegate) == SSA_NAME
3522 && is_gimple_assign (negatedefstmt)
3523 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3524 && has_single_use (gimple_assign_lhs (negatedefstmt))
3525 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3527 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3528 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3529 tree lhs = gimple_assign_lhs (negatedefstmt);
3530 gimple g;
3532 gsi = gsi_for_stmt (negatedefstmt);
3533 rhs1 = negate_value (rhs1, &gsi);
3535 gsi = gsi_for_stmt (negatedefstmt);
3536 rhs2 = negate_value (rhs2, &gsi);
3538 gsi = gsi_for_stmt (negatedefstmt);
3539 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3540 gimple_set_visited (negatedefstmt, true);
3541 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3542 gimple_set_uid (g, gimple_uid (negatedefstmt));
3543 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3544 return lhs;
3547 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3548 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3549 NULL_TREE, true, GSI_SAME_STMT);
3550 gsi = *gsip;
3551 uid = gimple_uid (gsi_stmt (gsi));
3552 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3554 gimple stmt = gsi_stmt (gsi);
3555 if (gimple_uid (stmt) != 0)
3556 break;
3557 gimple_set_uid (stmt, uid);
3559 return resultofnegate;
3562 /* Return true if we should break up the subtract in STMT into an add
3563 with negate. This is true when we the subtract operands are really
3564 adds, or the subtract itself is used in an add expression. In
3565 either case, breaking up the subtract into an add with negate
3566 exposes the adds to reassociation. */
3568 static bool
3569 should_break_up_subtract (gimple stmt)
3571 tree lhs = gimple_assign_lhs (stmt);
3572 tree binlhs = gimple_assign_rhs1 (stmt);
3573 tree binrhs = gimple_assign_rhs2 (stmt);
3574 gimple immusestmt;
3575 struct loop *loop = loop_containing_stmt (stmt);
3577 if (TREE_CODE (binlhs) == SSA_NAME
3578 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3579 return true;
3581 if (TREE_CODE (binrhs) == SSA_NAME
3582 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3583 return true;
3585 if (TREE_CODE (lhs) == SSA_NAME
3586 && (immusestmt = get_single_immediate_use (lhs))
3587 && is_gimple_assign (immusestmt)
3588 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3589 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3590 return true;
3591 return false;
3594 /* Transform STMT from A - B into A + -B. */
3596 static void
3597 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3599 tree rhs1 = gimple_assign_rhs1 (stmt);
3600 tree rhs2 = gimple_assign_rhs2 (stmt);
3602 if (dump_file && (dump_flags & TDF_DETAILS))
3604 fprintf (dump_file, "Breaking up subtract ");
3605 print_gimple_stmt (dump_file, stmt, 0, 0);
3608 rhs2 = negate_value (rhs2, gsip);
3609 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3610 update_stmt (stmt);
3613 /* Determine whether STMT is a builtin call that raises an SSA name
3614 to an integer power and has only one use. If so, and this is early
3615 reassociation and unsafe math optimizations are permitted, place
3616 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3617 If any of these conditions does not hold, return FALSE. */
3619 static bool
3620 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3622 tree fndecl, arg1;
3623 REAL_VALUE_TYPE c, cint;
3625 if (!first_pass_instance
3626 || !flag_unsafe_math_optimizations
3627 || !is_gimple_call (stmt)
3628 || !has_single_use (gimple_call_lhs (stmt)))
3629 return false;
3631 fndecl = gimple_call_fndecl (stmt);
3633 if (!fndecl
3634 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3635 return false;
3637 switch (DECL_FUNCTION_CODE (fndecl))
3639 CASE_FLT_FN (BUILT_IN_POW):
3640 *base = gimple_call_arg (stmt, 0);
3641 arg1 = gimple_call_arg (stmt, 1);
3643 if (TREE_CODE (arg1) != REAL_CST)
3644 return false;
3646 c = TREE_REAL_CST (arg1);
3648 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3649 return false;
3651 *exponent = real_to_integer (&c);
3652 real_from_integer (&cint, VOIDmode, *exponent,
3653 *exponent < 0 ? -1 : 0, 0);
3654 if (!real_identical (&c, &cint))
3655 return false;
3657 break;
3659 CASE_FLT_FN (BUILT_IN_POWI):
3660 *base = gimple_call_arg (stmt, 0);
3661 arg1 = gimple_call_arg (stmt, 1);
3663 if (!tree_fits_shwi_p (arg1))
3664 return false;
3666 *exponent = tree_to_shwi (arg1);
3667 break;
3669 default:
3670 return false;
3673 /* Expanding negative exponents is generally unproductive, so we don't
3674 complicate matters with those. Exponents of zero and one should
3675 have been handled by expression folding. */
3676 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3677 return false;
3679 return true;
3682 /* Recursively linearize a binary expression that is the RHS of STMT.
3683 Place the operands of the expression tree in the vector named OPS. */
3685 static void
3686 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3687 bool is_associative, bool set_visited)
3689 tree binlhs = gimple_assign_rhs1 (stmt);
3690 tree binrhs = gimple_assign_rhs2 (stmt);
3691 gimple binlhsdef = NULL, binrhsdef = NULL;
3692 bool binlhsisreassoc = false;
3693 bool binrhsisreassoc = false;
3694 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3695 struct loop *loop = loop_containing_stmt (stmt);
3696 tree base = NULL_TREE;
3697 HOST_WIDE_INT exponent = 0;
3699 if (set_visited)
3700 gimple_set_visited (stmt, true);
3702 if (TREE_CODE (binlhs) == SSA_NAME)
3704 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3705 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3706 && !stmt_could_throw_p (binlhsdef));
3709 if (TREE_CODE (binrhs) == SSA_NAME)
3711 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3712 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3713 && !stmt_could_throw_p (binrhsdef));
3716 /* If the LHS is not reassociable, but the RHS is, we need to swap
3717 them. If neither is reassociable, there is nothing we can do, so
3718 just put them in the ops vector. If the LHS is reassociable,
3719 linearize it. If both are reassociable, then linearize the RHS
3720 and the LHS. */
3722 if (!binlhsisreassoc)
3724 tree temp;
3726 /* If this is not a associative operation like division, give up. */
3727 if (!is_associative)
3729 add_to_ops_vec (ops, binrhs);
3730 return;
3733 if (!binrhsisreassoc)
3735 if (rhscode == MULT_EXPR
3736 && TREE_CODE (binrhs) == SSA_NAME
3737 && acceptable_pow_call (binrhsdef, &base, &exponent))
3739 add_repeat_to_ops_vec (ops, base, exponent);
3740 gimple_set_visited (binrhsdef, true);
3742 else
3743 add_to_ops_vec (ops, binrhs);
3745 if (rhscode == MULT_EXPR
3746 && TREE_CODE (binlhs) == SSA_NAME
3747 && acceptable_pow_call (binlhsdef, &base, &exponent))
3749 add_repeat_to_ops_vec (ops, base, exponent);
3750 gimple_set_visited (binlhsdef, true);
3752 else
3753 add_to_ops_vec (ops, binlhs);
3755 return;
3758 if (dump_file && (dump_flags & TDF_DETAILS))
3760 fprintf (dump_file, "swapping operands of ");
3761 print_gimple_stmt (dump_file, stmt, 0, 0);
3764 swap_ssa_operands (stmt,
3765 gimple_assign_rhs1_ptr (stmt),
3766 gimple_assign_rhs2_ptr (stmt));
3767 update_stmt (stmt);
3769 if (dump_file && (dump_flags & TDF_DETAILS))
3771 fprintf (dump_file, " is now ");
3772 print_gimple_stmt (dump_file, stmt, 0, 0);
3775 /* We want to make it so the lhs is always the reassociative op,
3776 so swap. */
3777 temp = binlhs;
3778 binlhs = binrhs;
3779 binrhs = temp;
3781 else if (binrhsisreassoc)
3783 linearize_expr (stmt);
3784 binlhs = gimple_assign_rhs1 (stmt);
3785 binrhs = gimple_assign_rhs2 (stmt);
3788 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3789 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3790 rhscode, loop));
3791 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3792 is_associative, set_visited);
3794 if (rhscode == MULT_EXPR
3795 && TREE_CODE (binrhs) == SSA_NAME
3796 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3798 add_repeat_to_ops_vec (ops, base, exponent);
3799 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3801 else
3802 add_to_ops_vec (ops, binrhs);
3805 /* Repropagate the negates back into subtracts, since no other pass
3806 currently does it. */
3808 static void
3809 repropagate_negates (void)
3811 unsigned int i = 0;
3812 tree negate;
3814 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3816 gimple user = get_single_immediate_use (negate);
3818 if (!user || !is_gimple_assign (user))
3819 continue;
3821 /* The negate operand can be either operand of a PLUS_EXPR
3822 (it can be the LHS if the RHS is a constant for example).
3824 Force the negate operand to the RHS of the PLUS_EXPR, then
3825 transform the PLUS_EXPR into a MINUS_EXPR. */
3826 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3828 /* If the negated operand appears on the LHS of the
3829 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3830 to force the negated operand to the RHS of the PLUS_EXPR. */
3831 if (gimple_assign_rhs1 (user) == negate)
3833 swap_ssa_operands (user,
3834 gimple_assign_rhs1_ptr (user),
3835 gimple_assign_rhs2_ptr (user));
3838 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3839 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3840 if (gimple_assign_rhs2 (user) == negate)
3842 tree rhs1 = gimple_assign_rhs1 (user);
3843 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3844 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3845 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3846 update_stmt (user);
3849 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3851 if (gimple_assign_rhs1 (user) == negate)
3853 /* We have
3854 x = -a
3855 y = x - b
3856 which we transform into
3857 x = a + b
3858 y = -x .
3859 This pushes down the negate which we possibly can merge
3860 into some other operation, hence insert it into the
3861 plus_negates vector. */
3862 gimple feed = SSA_NAME_DEF_STMT (negate);
3863 tree a = gimple_assign_rhs1 (feed);
3864 tree b = gimple_assign_rhs2 (user);
3865 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3866 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3867 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3868 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3869 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3870 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3871 user = gsi_stmt (gsi2);
3872 update_stmt (user);
3873 gsi_remove (&gsi, true);
3874 release_defs (feed);
3875 plus_negates.safe_push (gimple_assign_lhs (user));
3877 else
3879 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3880 rid of one operation. */
3881 gimple feed = SSA_NAME_DEF_STMT (negate);
3882 tree a = gimple_assign_rhs1 (feed);
3883 tree rhs1 = gimple_assign_rhs1 (user);
3884 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3885 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3886 update_stmt (gsi_stmt (gsi));
3892 /* Returns true if OP is of a type for which we can do reassociation.
3893 That is for integral or non-saturating fixed-point types, and for
3894 floating point type when associative-math is enabled. */
3896 static bool
3897 can_reassociate_p (tree op)
3899 tree type = TREE_TYPE (op);
3900 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3901 || NON_SAT_FIXED_POINT_TYPE_P (type)
3902 || (flag_associative_math && FLOAT_TYPE_P (type)))
3903 return true;
3904 return false;
3907 /* Break up subtract operations in block BB.
3909 We do this top down because we don't know whether the subtract is
3910 part of a possible chain of reassociation except at the top.
3912 IE given
3913 d = f + g
3914 c = a + e
3915 b = c - d
3916 q = b - r
3917 k = t - q
3919 we want to break up k = t - q, but we won't until we've transformed q
3920 = b - r, which won't be broken up until we transform b = c - d.
3922 En passant, clear the GIMPLE visited flag on every statement
3923 and set UIDs within each basic block. */
3925 static void
3926 break_up_subtract_bb (basic_block bb)
3928 gimple_stmt_iterator gsi;
3929 basic_block son;
3930 unsigned int uid = 1;
3932 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3934 gimple stmt = gsi_stmt (gsi);
3935 gimple_set_visited (stmt, false);
3936 gimple_set_uid (stmt, uid++);
3938 if (!is_gimple_assign (stmt)
3939 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3940 continue;
3942 /* Look for simple gimple subtract operations. */
3943 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3945 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3946 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3947 continue;
3949 /* Check for a subtract used only in an addition. If this
3950 is the case, transform it into add of a negate for better
3951 reassociation. IE transform C = A-B into C = A + -B if C
3952 is only used in an addition. */
3953 if (should_break_up_subtract (stmt))
3954 break_up_subtract (stmt, &gsi);
3956 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3957 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3958 plus_negates.safe_push (gimple_assign_lhs (stmt));
3960 for (son = first_dom_son (CDI_DOMINATORS, bb);
3961 son;
3962 son = next_dom_son (CDI_DOMINATORS, son))
3963 break_up_subtract_bb (son);
3966 /* Used for repeated factor analysis. */
3967 struct repeat_factor_d
3969 /* An SSA name that occurs in a multiply chain. */
3970 tree factor;
3972 /* Cached rank of the factor. */
3973 unsigned rank;
3975 /* Number of occurrences of the factor in the chain. */
3976 HOST_WIDE_INT count;
3978 /* An SSA name representing the product of this factor and
3979 all factors appearing later in the repeated factor vector. */
3980 tree repr;
3983 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3984 typedef const struct repeat_factor_d *const_repeat_factor_t;
3987 static vec<repeat_factor> repeat_factor_vec;
3989 /* Used for sorting the repeat factor vector. Sort primarily by
3990 ascending occurrence count, secondarily by descending rank. */
3992 static int
3993 compare_repeat_factors (const void *x1, const void *x2)
3995 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3996 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3998 if (rf1->count != rf2->count)
3999 return rf1->count - rf2->count;
4001 return rf2->rank - rf1->rank;
4004 /* Look for repeated operands in OPS in the multiply tree rooted at
4005 STMT. Replace them with an optimal sequence of multiplies and powi
4006 builtin calls, and remove the used operands from OPS. Return an
4007 SSA name representing the value of the replacement sequence. */
4009 static tree
4010 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4012 unsigned i, j, vec_len;
4013 int ii;
4014 operand_entry_t oe;
4015 repeat_factor_t rf1, rf2;
4016 repeat_factor rfnew;
4017 tree result = NULL_TREE;
4018 tree target_ssa, iter_result;
4019 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4020 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4021 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4022 gimple mul_stmt, pow_stmt;
4024 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4025 target. */
4026 if (!powi_fndecl)
4027 return NULL_TREE;
4029 /* Allocate the repeated factor vector. */
4030 repeat_factor_vec.create (10);
4032 /* Scan the OPS vector for all SSA names in the product and build
4033 up a vector of occurrence counts for each factor. */
4034 FOR_EACH_VEC_ELT (*ops, i, oe)
4036 if (TREE_CODE (oe->op) == SSA_NAME)
4038 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4040 if (rf1->factor == oe->op)
4042 rf1->count += oe->count;
4043 break;
4047 if (j >= repeat_factor_vec.length ())
4049 rfnew.factor = oe->op;
4050 rfnew.rank = oe->rank;
4051 rfnew.count = oe->count;
4052 rfnew.repr = NULL_TREE;
4053 repeat_factor_vec.safe_push (rfnew);
4058 /* Sort the repeated factor vector by (a) increasing occurrence count,
4059 and (b) decreasing rank. */
4060 repeat_factor_vec.qsort (compare_repeat_factors);
4062 /* It is generally best to combine as many base factors as possible
4063 into a product before applying __builtin_powi to the result.
4064 However, the sort order chosen for the repeated factor vector
4065 allows us to cache partial results for the product of the base
4066 factors for subsequent use. When we already have a cached partial
4067 result from a previous iteration, it is best to make use of it
4068 before looking for another __builtin_pow opportunity.
4070 As an example, consider x * x * y * y * y * z * z * z * z.
4071 We want to first compose the product x * y * z, raise it to the
4072 second power, then multiply this by y * z, and finally multiply
4073 by z. This can be done in 5 multiplies provided we cache y * z
4074 for use in both expressions:
4076 t1 = y * z
4077 t2 = t1 * x
4078 t3 = t2 * t2
4079 t4 = t1 * t3
4080 result = t4 * z
4082 If we instead ignored the cached y * z and first multiplied by
4083 the __builtin_pow opportunity z * z, we would get the inferior:
4085 t1 = y * z
4086 t2 = t1 * x
4087 t3 = t2 * t2
4088 t4 = z * z
4089 t5 = t3 * t4
4090 result = t5 * y */
4092 vec_len = repeat_factor_vec.length ();
4094 /* Repeatedly look for opportunities to create a builtin_powi call. */
4095 while (true)
4097 HOST_WIDE_INT power;
4099 /* First look for the largest cached product of factors from
4100 preceding iterations. If found, create a builtin_powi for
4101 it if the minimum occurrence count for its factors is at
4102 least 2, or just use this cached product as our next
4103 multiplicand if the minimum occurrence count is 1. */
4104 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4106 if (rf1->repr && rf1->count > 0)
4107 break;
4110 if (j < vec_len)
4112 power = rf1->count;
4114 if (power == 1)
4116 iter_result = rf1->repr;
4118 if (dump_file && (dump_flags & TDF_DETAILS))
4120 unsigned elt;
4121 repeat_factor_t rf;
4122 fputs ("Multiplying by cached product ", dump_file);
4123 for (elt = j; elt < vec_len; elt++)
4125 rf = &repeat_factor_vec[elt];
4126 print_generic_expr (dump_file, rf->factor, 0);
4127 if (elt < vec_len - 1)
4128 fputs (" * ", dump_file);
4130 fputs ("\n", dump_file);
4133 else
4135 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4136 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4137 build_int_cst (integer_type_node,
4138 power));
4139 gimple_call_set_lhs (pow_stmt, iter_result);
4140 gimple_set_location (pow_stmt, gimple_location (stmt));
4141 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4143 if (dump_file && (dump_flags & TDF_DETAILS))
4145 unsigned elt;
4146 repeat_factor_t rf;
4147 fputs ("Building __builtin_pow call for cached product (",
4148 dump_file);
4149 for (elt = j; elt < vec_len; elt++)
4151 rf = &repeat_factor_vec[elt];
4152 print_generic_expr (dump_file, rf->factor, 0);
4153 if (elt < vec_len - 1)
4154 fputs (" * ", dump_file);
4156 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4157 power);
4161 else
4163 /* Otherwise, find the first factor in the repeated factor
4164 vector whose occurrence count is at least 2. If no such
4165 factor exists, there are no builtin_powi opportunities
4166 remaining. */
4167 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4169 if (rf1->count >= 2)
4170 break;
4173 if (j >= vec_len)
4174 break;
4176 power = rf1->count;
4178 if (dump_file && (dump_flags & TDF_DETAILS))
4180 unsigned elt;
4181 repeat_factor_t rf;
4182 fputs ("Building __builtin_pow call for (", dump_file);
4183 for (elt = j; elt < vec_len; elt++)
4185 rf = &repeat_factor_vec[elt];
4186 print_generic_expr (dump_file, rf->factor, 0);
4187 if (elt < vec_len - 1)
4188 fputs (" * ", dump_file);
4190 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4193 reassociate_stats.pows_created++;
4195 /* Visit each element of the vector in reverse order (so that
4196 high-occurrence elements are visited first, and within the
4197 same occurrence count, lower-ranked elements are visited
4198 first). Form a linear product of all elements in this order
4199 whose occurrencce count is at least that of element J.
4200 Record the SSA name representing the product of each element
4201 with all subsequent elements in the vector. */
4202 if (j == vec_len - 1)
4203 rf1->repr = rf1->factor;
4204 else
4206 for (ii = vec_len - 2; ii >= (int)j; ii--)
4208 tree op1, op2;
4210 rf1 = &repeat_factor_vec[ii];
4211 rf2 = &repeat_factor_vec[ii + 1];
4213 /* Init the last factor's representative to be itself. */
4214 if (!rf2->repr)
4215 rf2->repr = rf2->factor;
4217 op1 = rf1->factor;
4218 op2 = rf2->repr;
4220 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4221 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4222 target_ssa,
4223 op1, op2);
4224 gimple_set_location (mul_stmt, gimple_location (stmt));
4225 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4226 rf1->repr = target_ssa;
4228 /* Don't reprocess the multiply we just introduced. */
4229 gimple_set_visited (mul_stmt, true);
4233 /* Form a call to __builtin_powi for the maximum product
4234 just formed, raised to the power obtained earlier. */
4235 rf1 = &repeat_factor_vec[j];
4236 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4237 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4238 build_int_cst (integer_type_node,
4239 power));
4240 gimple_call_set_lhs (pow_stmt, iter_result);
4241 gimple_set_location (pow_stmt, gimple_location (stmt));
4242 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4245 /* If we previously formed at least one other builtin_powi call,
4246 form the product of this one and those others. */
4247 if (result)
4249 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4250 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4251 result, iter_result);
4252 gimple_set_location (mul_stmt, gimple_location (stmt));
4253 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4254 gimple_set_visited (mul_stmt, true);
4255 result = new_result;
4257 else
4258 result = iter_result;
4260 /* Decrement the occurrence count of each element in the product
4261 by the count found above, and remove this many copies of each
4262 factor from OPS. */
4263 for (i = j; i < vec_len; i++)
4265 unsigned k = power;
4266 unsigned n;
4268 rf1 = &repeat_factor_vec[i];
4269 rf1->count -= power;
4271 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4273 if (oe->op == rf1->factor)
4275 if (oe->count <= k)
4277 ops->ordered_remove (n);
4278 k -= oe->count;
4280 if (k == 0)
4281 break;
4283 else
4285 oe->count -= k;
4286 break;
4293 /* At this point all elements in the repeated factor vector have a
4294 remaining occurrence count of 0 or 1, and those with a count of 1
4295 don't have cached representatives. Re-sort the ops vector and
4296 clean up. */
4297 ops->qsort (sort_by_operand_rank);
4298 repeat_factor_vec.release ();
4300 /* Return the final product computed herein. Note that there may
4301 still be some elements with single occurrence count left in OPS;
4302 those will be handled by the normal reassociation logic. */
4303 return result;
4306 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4308 static void
4309 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4311 tree rhs1;
4313 if (dump_file && (dump_flags & TDF_DETAILS))
4315 fprintf (dump_file, "Transforming ");
4316 print_gimple_stmt (dump_file, stmt, 0, 0);
4319 rhs1 = gimple_assign_rhs1 (stmt);
4320 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4321 update_stmt (stmt);
4322 remove_visited_stmt_chain (rhs1);
4324 if (dump_file && (dump_flags & TDF_DETAILS))
4326 fprintf (dump_file, " into ");
4327 print_gimple_stmt (dump_file, stmt, 0, 0);
4331 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4333 static void
4334 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4335 tree rhs1, tree rhs2)
4337 if (dump_file && (dump_flags & TDF_DETAILS))
4339 fprintf (dump_file, "Transforming ");
4340 print_gimple_stmt (dump_file, stmt, 0, 0);
4343 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4344 update_stmt (gsi_stmt (*gsi));
4345 remove_visited_stmt_chain (rhs1);
4347 if (dump_file && (dump_flags & TDF_DETAILS))
4349 fprintf (dump_file, " into ");
4350 print_gimple_stmt (dump_file, stmt, 0, 0);
4354 /* Reassociate expressions in basic block BB and its post-dominator as
4355 children. */
4357 static void
4358 reassociate_bb (basic_block bb)
4360 gimple_stmt_iterator gsi;
4361 basic_block son;
4362 gimple stmt = last_stmt (bb);
4364 if (stmt && !gimple_visited_p (stmt))
4365 maybe_optimize_range_tests (stmt);
4367 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4369 stmt = gsi_stmt (gsi);
4371 if (is_gimple_assign (stmt)
4372 && !stmt_could_throw_p (stmt))
4374 tree lhs, rhs1, rhs2;
4375 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4377 /* If this is not a gimple binary expression, there is
4378 nothing for us to do with it. */
4379 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4380 continue;
4382 /* If this was part of an already processed statement,
4383 we don't need to touch it again. */
4384 if (gimple_visited_p (stmt))
4386 /* This statement might have become dead because of previous
4387 reassociations. */
4388 if (has_zero_uses (gimple_get_lhs (stmt)))
4390 gsi_remove (&gsi, true);
4391 release_defs (stmt);
4392 /* We might end up removing the last stmt above which
4393 places the iterator to the end of the sequence.
4394 Reset it to the last stmt in this case which might
4395 be the end of the sequence as well if we removed
4396 the last statement of the sequence. In which case
4397 we need to bail out. */
4398 if (gsi_end_p (gsi))
4400 gsi = gsi_last_bb (bb);
4401 if (gsi_end_p (gsi))
4402 break;
4405 continue;
4408 lhs = gimple_assign_lhs (stmt);
4409 rhs1 = gimple_assign_rhs1 (stmt);
4410 rhs2 = gimple_assign_rhs2 (stmt);
4412 /* For non-bit or min/max operations we can't associate
4413 all types. Verify that here. */
4414 if (rhs_code != BIT_IOR_EXPR
4415 && rhs_code != BIT_AND_EXPR
4416 && rhs_code != BIT_XOR_EXPR
4417 && rhs_code != MIN_EXPR
4418 && rhs_code != MAX_EXPR
4419 && (!can_reassociate_p (lhs)
4420 || !can_reassociate_p (rhs1)
4421 || !can_reassociate_p (rhs2)))
4422 continue;
4424 if (associative_tree_code (rhs_code))
4426 auto_vec<operand_entry_t> ops;
4427 tree powi_result = NULL_TREE;
4429 /* There may be no immediate uses left by the time we
4430 get here because we may have eliminated them all. */
4431 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4432 continue;
4434 gimple_set_visited (stmt, true);
4435 linearize_expr_tree (&ops, stmt, true, true);
4436 ops.qsort (sort_by_operand_rank);
4437 optimize_ops_list (rhs_code, &ops);
4438 if (undistribute_ops_list (rhs_code, &ops,
4439 loop_containing_stmt (stmt)))
4441 ops.qsort (sort_by_operand_rank);
4442 optimize_ops_list (rhs_code, &ops);
4445 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4446 optimize_range_tests (rhs_code, &ops);
4448 if (first_pass_instance
4449 && rhs_code == MULT_EXPR
4450 && flag_unsafe_math_optimizations)
4451 powi_result = attempt_builtin_powi (stmt, &ops);
4453 /* If the operand vector is now empty, all operands were
4454 consumed by the __builtin_powi optimization. */
4455 if (ops.length () == 0)
4456 transform_stmt_to_copy (&gsi, stmt, powi_result);
4457 else if (ops.length () == 1)
4459 tree last_op = ops.last ()->op;
4461 if (powi_result)
4462 transform_stmt_to_multiply (&gsi, stmt, last_op,
4463 powi_result);
4464 else
4465 transform_stmt_to_copy (&gsi, stmt, last_op);
4467 else
4469 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4470 int ops_num = ops.length ();
4471 int width = get_reassociation_width (ops_num, rhs_code, mode);
4472 tree new_lhs = lhs;
4474 if (dump_file && (dump_flags & TDF_DETAILS))
4475 fprintf (dump_file,
4476 "Width = %d was chosen for reassociation\n", width);
4478 if (width > 1
4479 && ops.length () > 3)
4480 rewrite_expr_tree_parallel (stmt, width, ops);
4481 else
4483 /* When there are three operands left, we want
4484 to make sure the ones that get the double
4485 binary op are chosen wisely. */
4486 int len = ops.length ();
4487 if (len >= 3)
4488 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4490 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4491 powi_result != NULL);
4494 /* If we combined some repeated factors into a
4495 __builtin_powi call, multiply that result by the
4496 reassociated operands. */
4497 if (powi_result)
4499 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4500 tree type = TREE_TYPE (lhs);
4501 tree target_ssa = make_temp_ssa_name (type, NULL,
4502 "reassocpow");
4503 gimple_set_lhs (lhs_stmt, target_ssa);
4504 update_stmt (lhs_stmt);
4505 if (lhs != new_lhs)
4506 target_ssa = new_lhs;
4507 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4508 powi_result,
4509 target_ssa);
4510 gimple_set_location (mul_stmt, gimple_location (stmt));
4511 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4517 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4518 son;
4519 son = next_dom_son (CDI_POST_DOMINATORS, son))
4520 reassociate_bb (son);
4523 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4524 void debug_ops_vector (vec<operand_entry_t> ops);
4526 /* Dump the operand entry vector OPS to FILE. */
4528 void
4529 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4531 operand_entry_t oe;
4532 unsigned int i;
4534 FOR_EACH_VEC_ELT (ops, i, oe)
4536 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4537 print_generic_expr (file, oe->op, 0);
4541 /* Dump the operand entry vector OPS to STDERR. */
4543 DEBUG_FUNCTION void
4544 debug_ops_vector (vec<operand_entry_t> ops)
4546 dump_ops_vector (stderr, ops);
4549 static void
4550 do_reassoc (void)
4552 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4553 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4556 /* Initialize the reassociation pass. */
4558 static void
4559 init_reassoc (void)
4561 int i;
4562 long rank = 2;
4563 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4565 /* Find the loops, so that we can prevent moving calculations in
4566 them. */
4567 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4569 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4571 operand_entry_pool = create_alloc_pool ("operand entry pool",
4572 sizeof (struct operand_entry), 30);
4573 next_operand_entry_id = 0;
4575 /* Reverse RPO (Reverse Post Order) will give us something where
4576 deeper loops come later. */
4577 pre_and_rev_post_order_compute (NULL, bbs, false);
4578 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4579 operand_rank = pointer_map_create ();
4581 /* Give each default definition a distinct rank. This includes
4582 parameters and the static chain. Walk backwards over all
4583 SSA names so that we get proper rank ordering according
4584 to tree_swap_operands_p. */
4585 for (i = num_ssa_names - 1; i > 0; --i)
4587 tree name = ssa_name (i);
4588 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4589 insert_operand_rank (name, ++rank);
4592 /* Set up rank for each BB */
4593 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4594 bb_rank[bbs[i]] = ++rank << 16;
4596 free (bbs);
4597 calculate_dominance_info (CDI_POST_DOMINATORS);
4598 plus_negates = vNULL;
4601 /* Cleanup after the reassociation pass, and print stats if
4602 requested. */
4604 static void
4605 fini_reassoc (void)
4607 statistics_counter_event (cfun, "Linearized",
4608 reassociate_stats.linearized);
4609 statistics_counter_event (cfun, "Constants eliminated",
4610 reassociate_stats.constants_eliminated);
4611 statistics_counter_event (cfun, "Ops eliminated",
4612 reassociate_stats.ops_eliminated);
4613 statistics_counter_event (cfun, "Statements rewritten",
4614 reassociate_stats.rewritten);
4615 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4616 reassociate_stats.pows_encountered);
4617 statistics_counter_event (cfun, "Built-in powi calls created",
4618 reassociate_stats.pows_created);
4620 pointer_map_destroy (operand_rank);
4621 free_alloc_pool (operand_entry_pool);
4622 free (bb_rank);
4623 plus_negates.release ();
4624 free_dominance_info (CDI_POST_DOMINATORS);
4625 loop_optimizer_finalize ();
4628 /* Gate and execute functions for Reassociation. */
4630 static unsigned int
4631 execute_reassoc (void)
4633 init_reassoc ();
4635 do_reassoc ();
4636 repropagate_negates ();
4638 fini_reassoc ();
4639 return 0;
4642 static bool
4643 gate_tree_ssa_reassoc (void)
4645 return flag_tree_reassoc != 0;
4648 namespace {
4650 const pass_data pass_data_reassoc =
4652 GIMPLE_PASS, /* type */
4653 "reassoc", /* name */
4654 OPTGROUP_NONE, /* optinfo_flags */
4655 true, /* has_gate */
4656 true, /* has_execute */
4657 TV_TREE_REASSOC, /* tv_id */
4658 ( PROP_cfg | PROP_ssa ), /* properties_required */
4659 0, /* properties_provided */
4660 0, /* properties_destroyed */
4661 0, /* todo_flags_start */
4662 ( TODO_verify_ssa
4663 | TODO_update_ssa_only_virtuals
4664 | TODO_verify_flow ), /* todo_flags_finish */
4667 class pass_reassoc : public gimple_opt_pass
4669 public:
4670 pass_reassoc (gcc::context *ctxt)
4671 : gimple_opt_pass (pass_data_reassoc, ctxt)
4674 /* opt_pass methods: */
4675 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4676 bool gate () { return gate_tree_ssa_reassoc (); }
4677 unsigned int execute () { return execute_reassoc (); }
4679 }; // class pass_reassoc
4681 } // anon namespace
4683 gimple_opt_pass *
4684 make_pass_reassoc (gcc::context *ctxt)
4686 return new pass_reassoc (ctxt);