2013-02-25 Tom de Vries <tom@codesourcery.com>
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobf53526b56d39661eec80ba5b4f569dfa8f47a344
1 /* Reassociation for trees.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "tree.h"
26 #include "basic-block.h"
27 #include "gimple-pretty-print.h"
28 #include "tree-inline.h"
29 #include "tree-flow.h"
30 #include "gimple.h"
31 #include "tree-iterator.h"
32 #include "tree-pass.h"
33 #include "alloc-pool.h"
34 #include "vec.h"
35 #include "langhooks.h"
36 #include "pointer-set.h"
37 #include "cfgloop.h"
38 #include "flags.h"
39 #include "target.h"
40 #include "params.h"
41 #include "diagnostic-core.h"
43 /* This is a simple global reassociation pass. It is, in part, based
44 on the LLVM pass of the same name (They do some things more/less
45 than we do, in different orders, etc).
47 It consists of five steps:
49 1. Breaking up subtract operations into addition + negate, where
50 it would promote the reassociation of adds.
52 2. Left linearization of the expression trees, so that (A+B)+(C+D)
53 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
54 During linearization, we place the operands of the binary
55 expressions into a vector of operand_entry_t
57 3. Optimization of the operand lists, eliminating things like a +
58 -a, a & a, etc.
60 3a. Combine repeated factors with the same occurrence counts
61 into a __builtin_powi call that will later be optimized into
62 an optimal number of multiplies.
64 4. Rewrite the expression trees we linearized and optimized so
65 they are in proper rank order.
67 5. Repropagate negates, as nothing else will clean it up ATM.
69 A bit of theory on #4, since nobody seems to write anything down
70 about why it makes sense to do it the way they do it:
72 We could do this much nicer theoretically, but don't (for reasons
73 explained after how to do it theoretically nice :P).
75 In order to promote the most redundancy elimination, you want
76 binary expressions whose operands are the same rank (or
77 preferably, the same value) exposed to the redundancy eliminator,
78 for possible elimination.
80 So the way to do this if we really cared, is to build the new op
81 tree from the leaves to the roots, merging as you go, and putting the
82 new op on the end of the worklist, until you are left with one
83 thing on the worklist.
85 IE if you have to rewrite the following set of operands (listed with
86 rank in parentheses), with opcode PLUS_EXPR:
88 a (1), b (1), c (1), d (2), e (2)
91 We start with our merge worklist empty, and the ops list with all of
92 those on it.
94 You want to first merge all leaves of the same rank, as much as
95 possible.
97 So first build a binary op of
99 mergetmp = a + b, and put "mergetmp" on the merge worklist.
101 Because there is no three operand form of PLUS_EXPR, c is not going to
102 be exposed to redundancy elimination as a rank 1 operand.
104 So you might as well throw it on the merge worklist (you could also
105 consider it to now be a rank two operand, and merge it with d and e,
106 but in this case, you then have evicted e from a binary op. So at
107 least in this situation, you can't win.)
109 Then build a binary op of d + e
110 mergetmp2 = d + e
112 and put mergetmp2 on the merge worklist.
114 so merge worklist = {mergetmp, c, mergetmp2}
116 Continue building binary ops of these operations until you have only
117 one operation left on the worklist.
119 So we have
121 build binary op
122 mergetmp3 = mergetmp + c
124 worklist = {mergetmp2, mergetmp3}
126 mergetmp4 = mergetmp2 + mergetmp3
128 worklist = {mergetmp4}
130 because we have one operation left, we can now just set the original
131 statement equal to the result of that operation.
133 This will at least expose a + b and d + e to redundancy elimination
134 as binary operations.
136 For extra points, you can reuse the old statements to build the
137 mergetmps, since you shouldn't run out.
139 So why don't we do this?
141 Because it's expensive, and rarely will help. Most trees we are
142 reassociating have 3 or less ops. If they have 2 ops, they already
143 will be written into a nice single binary op. If you have 3 ops, a
144 single simple check suffices to tell you whether the first two are of the
145 same rank. If so, you know to order it
147 mergetmp = op1 + op2
148 newstmt = mergetmp + op3
150 instead of
151 mergetmp = op2 + op3
152 newstmt = mergetmp + op1
154 If all three are of the same rank, you can't expose them all in a
155 single binary operator anyway, so the above is *still* the best you
156 can do.
158 Thus, this is what we do. When we have three ops left, we check to see
159 what order to put them in, and call it a day. As a nod to vector sum
160 reduction, we check if any of the ops are really a phi node that is a
161 destructive update for the associating op, and keep the destructive
162 update together for vector sum reduction recognition. */
165 /* Statistics */
166 static struct
168 int linearized;
169 int constants_eliminated;
170 int ops_eliminated;
171 int rewritten;
172 int pows_encountered;
173 int pows_created;
174 } reassociate_stats;
176 /* Operator, rank pair. */
177 typedef struct operand_entry
179 unsigned int rank;
180 int id;
181 tree op;
182 unsigned int count;
183 } *operand_entry_t;
185 static alloc_pool operand_entry_pool;
187 /* This is used to assign a unique ID to each struct operand_entry
188 so that qsort results are identical on different hosts. */
189 static int next_operand_entry_id;
191 /* Starting rank number for a given basic block, so that we can rank
192 operations using unmovable instructions in that BB based on the bb
193 depth. */
194 static long *bb_rank;
196 /* Operand->rank hashtable. */
197 static struct pointer_map_t *operand_rank;
199 /* Forward decls. */
200 static long get_rank (tree);
203 /* Bias amount for loop-carried phis. We want this to be larger than
204 the depth of any reassociation tree we can see, but not larger than
205 the rank difference between two blocks. */
206 #define PHI_LOOP_BIAS (1 << 15)
208 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
209 an innermost loop, and the phi has only a single use which is inside
210 the loop, then the rank is the block rank of the loop latch plus an
211 extra bias for the loop-carried dependence. This causes expressions
212 calculated into an accumulator variable to be independent for each
213 iteration of the loop. If STMT is some other phi, the rank is the
214 block rank of its containing block. */
215 static long
216 phi_rank (gimple stmt)
218 basic_block bb = gimple_bb (stmt);
219 struct loop *father = bb->loop_father;
220 tree res;
221 unsigned i;
222 use_operand_p use;
223 gimple use_stmt;
225 /* We only care about real loops (those with a latch). */
226 if (!father->latch)
227 return bb_rank[bb->index];
229 /* Interesting phis must be in headers of innermost loops. */
230 if (bb != father->header
231 || father->inner)
232 return bb_rank[bb->index];
234 /* Ignore virtual SSA_NAMEs. */
235 res = gimple_phi_result (stmt);
236 if (virtual_operand_p (res))
237 return bb_rank[bb->index];
239 /* The phi definition must have a single use, and that use must be
240 within the loop. Otherwise this isn't an accumulator pattern. */
241 if (!single_imm_use (res, &use, &use_stmt)
242 || gimple_bb (use_stmt)->loop_father != father)
243 return bb_rank[bb->index];
245 /* Look for phi arguments from within the loop. If found, bias this phi. */
246 for (i = 0; i < gimple_phi_num_args (stmt); i++)
248 tree arg = gimple_phi_arg_def (stmt, i);
249 if (TREE_CODE (arg) == SSA_NAME
250 && !SSA_NAME_IS_DEFAULT_DEF (arg))
252 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
253 if (gimple_bb (def_stmt)->loop_father == father)
254 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
258 /* Must be an uninteresting phi. */
259 return bb_rank[bb->index];
262 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
263 loop-carried dependence of an innermost loop, return TRUE; else
264 return FALSE. */
265 static bool
266 loop_carried_phi (tree exp)
268 gimple phi_stmt;
269 long block_rank;
271 if (TREE_CODE (exp) != SSA_NAME
272 || SSA_NAME_IS_DEFAULT_DEF (exp))
273 return false;
275 phi_stmt = SSA_NAME_DEF_STMT (exp);
277 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
278 return false;
280 /* Non-loop-carried phis have block rank. Loop-carried phis have
281 an additional bias added in. If this phi doesn't have block rank,
282 it's biased and should not be propagated. */
283 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
285 if (phi_rank (phi_stmt) != block_rank)
286 return true;
288 return false;
291 /* Return the maximum of RANK and the rank that should be propagated
292 from expression OP. For most operands, this is just the rank of OP.
293 For loop-carried phis, the value is zero to avoid undoing the bias
294 in favor of the phi. */
295 static long
296 propagate_rank (long rank, tree op)
298 long op_rank;
300 if (loop_carried_phi (op))
301 return rank;
303 op_rank = get_rank (op);
305 return MAX (rank, op_rank);
308 /* Look up the operand rank structure for expression E. */
310 static inline long
311 find_operand_rank (tree e)
313 void **slot = pointer_map_contains (operand_rank, e);
314 return slot ? (long) (intptr_t) *slot : -1;
317 /* Insert {E,RANK} into the operand rank hashtable. */
319 static inline void
320 insert_operand_rank (tree e, long rank)
322 void **slot;
323 gcc_assert (rank > 0);
324 slot = pointer_map_insert (operand_rank, e);
325 gcc_assert (!*slot);
326 *slot = (void *) (intptr_t) rank;
329 /* Given an expression E, return the rank of the expression. */
331 static long
332 get_rank (tree e)
334 /* Constants have rank 0. */
335 if (is_gimple_min_invariant (e))
336 return 0;
338 /* SSA_NAME's have the rank of the expression they are the result
340 For globals and uninitialized values, the rank is 0.
341 For function arguments, use the pre-setup rank.
342 For PHI nodes, stores, asm statements, etc, we use the rank of
343 the BB.
344 For simple operations, the rank is the maximum rank of any of
345 its operands, or the bb_rank, whichever is less.
346 I make no claims that this is optimal, however, it gives good
347 results. */
349 /* We make an exception to the normal ranking system to break
350 dependences of accumulator variables in loops. Suppose we
351 have a simple one-block loop containing:
353 x_1 = phi(x_0, x_2)
354 b = a + x_1
355 c = b + d
356 x_2 = c + e
358 As shown, each iteration of the calculation into x is fully
359 dependent upon the iteration before it. We would prefer to
360 see this in the form:
362 x_1 = phi(x_0, x_2)
363 b = a + d
364 c = b + e
365 x_2 = c + x_1
367 If the loop is unrolled, the calculations of b and c from
368 different iterations can be interleaved.
370 To obtain this result during reassociation, we bias the rank
371 of the phi definition x_1 upward, when it is recognized as an
372 accumulator pattern. The artificial rank causes it to be
373 added last, providing the desired independence. */
375 if (TREE_CODE (e) == SSA_NAME)
377 gimple stmt;
378 long rank;
379 int i, n;
380 tree op;
382 if (SSA_NAME_IS_DEFAULT_DEF (e))
383 return find_operand_rank (e);
385 stmt = SSA_NAME_DEF_STMT (e);
386 if (gimple_code (stmt) == GIMPLE_PHI)
387 return phi_rank (stmt);
389 if (!is_gimple_assign (stmt)
390 || gimple_vdef (stmt))
391 return bb_rank[gimple_bb (stmt)->index];
393 /* If we already have a rank for this expression, use that. */
394 rank = find_operand_rank (e);
395 if (rank != -1)
396 return rank;
398 /* Otherwise, find the maximum rank for the operands. As an
399 exception, remove the bias from loop-carried phis when propagating
400 the rank so that dependent operations are not also biased. */
401 rank = 0;
402 if (gimple_assign_single_p (stmt))
404 tree rhs = gimple_assign_rhs1 (stmt);
405 n = TREE_OPERAND_LENGTH (rhs);
406 if (n == 0)
407 rank = propagate_rank (rank, rhs);
408 else
410 for (i = 0; i < n; i++)
412 op = TREE_OPERAND (rhs, i);
414 if (op != NULL_TREE)
415 rank = propagate_rank (rank, op);
419 else
421 n = gimple_num_ops (stmt);
422 for (i = 1; i < n; i++)
424 op = gimple_op (stmt, i);
425 gcc_assert (op);
426 rank = propagate_rank (rank, op);
430 if (dump_file && (dump_flags & TDF_DETAILS))
432 fprintf (dump_file, "Rank for ");
433 print_generic_expr (dump_file, e, 0);
434 fprintf (dump_file, " is %ld\n", (rank + 1));
437 /* Note the rank in the hashtable so we don't recompute it. */
438 insert_operand_rank (e, (rank + 1));
439 return (rank + 1);
442 /* Globals, etc, are rank 0 */
443 return 0;
447 /* We want integer ones to end up last no matter what, since they are
448 the ones we can do the most with. */
449 #define INTEGER_CONST_TYPE 1 << 3
450 #define FLOAT_CONST_TYPE 1 << 2
451 #define OTHER_CONST_TYPE 1 << 1
453 /* Classify an invariant tree into integer, float, or other, so that
454 we can sort them to be near other constants of the same type. */
455 static inline int
456 constant_type (tree t)
458 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
459 return INTEGER_CONST_TYPE;
460 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
461 return FLOAT_CONST_TYPE;
462 else
463 return OTHER_CONST_TYPE;
466 /* qsort comparison function to sort operand entries PA and PB by rank
467 so that the sorted array is ordered by rank in decreasing order. */
468 static int
469 sort_by_operand_rank (const void *pa, const void *pb)
471 const operand_entry_t oea = *(const operand_entry_t *)pa;
472 const operand_entry_t oeb = *(const operand_entry_t *)pb;
474 /* It's nicer for optimize_expression if constants that are likely
475 to fold when added/multiplied//whatever are put next to each
476 other. Since all constants have rank 0, order them by type. */
477 if (oeb->rank == 0 && oea->rank == 0)
479 if (constant_type (oeb->op) != constant_type (oea->op))
480 return constant_type (oeb->op) - constant_type (oea->op);
481 else
482 /* To make sorting result stable, we use unique IDs to determine
483 order. */
484 return oeb->id - oea->id;
487 /* Lastly, make sure the versions that are the same go next to each
488 other. We use SSA_NAME_VERSION because it's stable. */
489 if ((oeb->rank - oea->rank == 0)
490 && TREE_CODE (oea->op) == SSA_NAME
491 && TREE_CODE (oeb->op) == SSA_NAME)
493 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
494 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
495 else
496 return oeb->id - oea->id;
499 if (oeb->rank != oea->rank)
500 return oeb->rank - oea->rank;
501 else
502 return oeb->id - oea->id;
505 /* Add an operand entry to *OPS for the tree operand OP. */
507 static void
508 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
510 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
512 oe->op = op;
513 oe->rank = get_rank (op);
514 oe->id = next_operand_entry_id++;
515 oe->count = 1;
516 ops->safe_push (oe);
519 /* Add an operand entry to *OPS for the tree operand OP with repeat
520 count REPEAT. */
522 static void
523 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
524 HOST_WIDE_INT repeat)
526 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
528 oe->op = op;
529 oe->rank = get_rank (op);
530 oe->id = next_operand_entry_id++;
531 oe->count = repeat;
532 ops->safe_push (oe);
534 reassociate_stats.pows_encountered++;
537 /* Return true if STMT is reassociable operation containing a binary
538 operation with tree code CODE, and is inside LOOP. */
540 static bool
541 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
543 basic_block bb = gimple_bb (stmt);
545 if (gimple_bb (stmt) == NULL)
546 return false;
548 if (!flow_bb_inside_loop_p (loop, bb))
549 return false;
551 if (is_gimple_assign (stmt)
552 && gimple_assign_rhs_code (stmt) == code
553 && has_single_use (gimple_assign_lhs (stmt)))
554 return true;
556 return false;
560 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
561 operand of the negate operation. Otherwise, return NULL. */
563 static tree
564 get_unary_op (tree name, enum tree_code opcode)
566 gimple stmt = SSA_NAME_DEF_STMT (name);
568 if (!is_gimple_assign (stmt))
569 return NULL_TREE;
571 if (gimple_assign_rhs_code (stmt) == opcode)
572 return gimple_assign_rhs1 (stmt);
573 return NULL_TREE;
576 /* If CURR and LAST are a pair of ops that OPCODE allows us to
577 eliminate through equivalences, do so, remove them from OPS, and
578 return true. Otherwise, return false. */
580 static bool
581 eliminate_duplicate_pair (enum tree_code opcode,
582 vec<operand_entry_t> *ops,
583 bool *all_done,
584 unsigned int i,
585 operand_entry_t curr,
586 operand_entry_t last)
589 /* If we have two of the same op, and the opcode is & |, min, or max,
590 we can eliminate one of them.
591 If we have two of the same op, and the opcode is ^, we can
592 eliminate both of them. */
594 if (last && last->op == curr->op)
596 switch (opcode)
598 case MAX_EXPR:
599 case MIN_EXPR:
600 case BIT_IOR_EXPR:
601 case BIT_AND_EXPR:
602 if (dump_file && (dump_flags & TDF_DETAILS))
604 fprintf (dump_file, "Equivalence: ");
605 print_generic_expr (dump_file, curr->op, 0);
606 fprintf (dump_file, " [&|minmax] ");
607 print_generic_expr (dump_file, last->op, 0);
608 fprintf (dump_file, " -> ");
609 print_generic_stmt (dump_file, last->op, 0);
612 ops->ordered_remove (i);
613 reassociate_stats.ops_eliminated ++;
615 return true;
617 case BIT_XOR_EXPR:
618 if (dump_file && (dump_flags & TDF_DETAILS))
620 fprintf (dump_file, "Equivalence: ");
621 print_generic_expr (dump_file, curr->op, 0);
622 fprintf (dump_file, " ^ ");
623 print_generic_expr (dump_file, last->op, 0);
624 fprintf (dump_file, " -> nothing\n");
627 reassociate_stats.ops_eliminated += 2;
629 if (ops->length () == 2)
631 ops->create (0);
632 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
633 *all_done = true;
635 else
637 ops->ordered_remove (i-1);
638 ops->ordered_remove (i-1);
641 return true;
643 default:
644 break;
647 return false;
650 static vec<tree> plus_negates;
652 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
653 expression, look in OPS for a corresponding positive operation to cancel
654 it out. If we find one, remove the other from OPS, replace
655 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
656 return false. */
658 static bool
659 eliminate_plus_minus_pair (enum tree_code opcode,
660 vec<operand_entry_t> *ops,
661 unsigned int currindex,
662 operand_entry_t curr)
664 tree negateop;
665 tree notop;
666 unsigned int i;
667 operand_entry_t oe;
669 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
670 return false;
672 negateop = get_unary_op (curr->op, NEGATE_EXPR);
673 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
674 if (negateop == NULL_TREE && notop == NULL_TREE)
675 return false;
677 /* Any non-negated version will have a rank that is one less than
678 the current rank. So once we hit those ranks, if we don't find
679 one, we can stop. */
681 for (i = currindex + 1;
682 ops->iterate (i, &oe)
683 && oe->rank >= curr->rank - 1 ;
684 i++)
686 if (oe->op == negateop)
689 if (dump_file && (dump_flags & TDF_DETAILS))
691 fprintf (dump_file, "Equivalence: ");
692 print_generic_expr (dump_file, negateop, 0);
693 fprintf (dump_file, " + -");
694 print_generic_expr (dump_file, oe->op, 0);
695 fprintf (dump_file, " -> 0\n");
698 ops->ordered_remove (i);
699 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
700 ops->ordered_remove (currindex);
701 reassociate_stats.ops_eliminated ++;
703 return true;
705 else if (oe->op == notop)
707 tree op_type = TREE_TYPE (oe->op);
709 if (dump_file && (dump_flags & TDF_DETAILS))
711 fprintf (dump_file, "Equivalence: ");
712 print_generic_expr (dump_file, notop, 0);
713 fprintf (dump_file, " + ~");
714 print_generic_expr (dump_file, oe->op, 0);
715 fprintf (dump_file, " -> -1\n");
718 ops->ordered_remove (i);
719 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
720 ops->ordered_remove (currindex);
721 reassociate_stats.ops_eliminated ++;
723 return true;
727 /* CURR->OP is a negate expr in a plus expr: save it for later
728 inspection in repropagate_negates(). */
729 if (negateop != NULL_TREE)
730 plus_negates.safe_push (curr->op);
732 return false;
735 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
736 bitwise not expression, look in OPS for a corresponding operand to
737 cancel it out. If we find one, remove the other from OPS, replace
738 OPS[CURRINDEX] with 0, and return true. Otherwise, return
739 false. */
741 static bool
742 eliminate_not_pairs (enum tree_code opcode,
743 vec<operand_entry_t> *ops,
744 unsigned int currindex,
745 operand_entry_t curr)
747 tree notop;
748 unsigned int i;
749 operand_entry_t oe;
751 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
752 || TREE_CODE (curr->op) != SSA_NAME)
753 return false;
755 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
756 if (notop == NULL_TREE)
757 return false;
759 /* Any non-not version will have a rank that is one less than
760 the current rank. So once we hit those ranks, if we don't find
761 one, we can stop. */
763 for (i = currindex + 1;
764 ops->iterate (i, &oe)
765 && oe->rank >= curr->rank - 1;
766 i++)
768 if (oe->op == notop)
770 if (dump_file && (dump_flags & TDF_DETAILS))
772 fprintf (dump_file, "Equivalence: ");
773 print_generic_expr (dump_file, notop, 0);
774 if (opcode == BIT_AND_EXPR)
775 fprintf (dump_file, " & ~");
776 else if (opcode == BIT_IOR_EXPR)
777 fprintf (dump_file, " | ~");
778 print_generic_expr (dump_file, oe->op, 0);
779 if (opcode == BIT_AND_EXPR)
780 fprintf (dump_file, " -> 0\n");
781 else if (opcode == BIT_IOR_EXPR)
782 fprintf (dump_file, " -> -1\n");
785 if (opcode == BIT_AND_EXPR)
786 oe->op = build_zero_cst (TREE_TYPE (oe->op));
787 else if (opcode == BIT_IOR_EXPR)
788 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
789 TYPE_PRECISION (TREE_TYPE (oe->op)));
791 reassociate_stats.ops_eliminated += ops->length () - 1;
792 ops->truncate (0);
793 ops->quick_push (oe);
794 return true;
798 return false;
801 /* Use constant value that may be present in OPS to try to eliminate
802 operands. Note that this function is only really used when we've
803 eliminated ops for other reasons, or merged constants. Across
804 single statements, fold already does all of this, plus more. There
805 is little point in duplicating logic, so I've only included the
806 identities that I could ever construct testcases to trigger. */
808 static void
809 eliminate_using_constants (enum tree_code opcode,
810 vec<operand_entry_t> *ops)
812 operand_entry_t oelast = ops->last ();
813 tree type = TREE_TYPE (oelast->op);
815 if (oelast->rank == 0
816 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
818 switch (opcode)
820 case BIT_AND_EXPR:
821 if (integer_zerop (oelast->op))
823 if (ops->length () != 1)
825 if (dump_file && (dump_flags & TDF_DETAILS))
826 fprintf (dump_file, "Found & 0, removing all other ops\n");
828 reassociate_stats.ops_eliminated += ops->length () - 1;
830 ops->truncate (0);
831 ops->quick_push (oelast);
832 return;
835 else if (integer_all_onesp (oelast->op))
837 if (ops->length () != 1)
839 if (dump_file && (dump_flags & TDF_DETAILS))
840 fprintf (dump_file, "Found & -1, removing\n");
841 ops->pop ();
842 reassociate_stats.ops_eliminated++;
845 break;
846 case BIT_IOR_EXPR:
847 if (integer_all_onesp (oelast->op))
849 if (ops->length () != 1)
851 if (dump_file && (dump_flags & TDF_DETAILS))
852 fprintf (dump_file, "Found | -1, removing all other ops\n");
854 reassociate_stats.ops_eliminated += ops->length () - 1;
856 ops->truncate (0);
857 ops->quick_push (oelast);
858 return;
861 else if (integer_zerop (oelast->op))
863 if (ops->length () != 1)
865 if (dump_file && (dump_flags & TDF_DETAILS))
866 fprintf (dump_file, "Found | 0, removing\n");
867 ops->pop ();
868 reassociate_stats.ops_eliminated++;
871 break;
872 case MULT_EXPR:
873 if (integer_zerop (oelast->op)
874 || (FLOAT_TYPE_P (type)
875 && !HONOR_NANS (TYPE_MODE (type))
876 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
877 && real_zerop (oelast->op)))
879 if (ops->length () != 1)
881 if (dump_file && (dump_flags & TDF_DETAILS))
882 fprintf (dump_file, "Found * 0, removing all other ops\n");
884 reassociate_stats.ops_eliminated += ops->length () - 1;
885 ops->truncate (1);
886 ops->quick_push (oelast);
887 return;
890 else if (integer_onep (oelast->op)
891 || (FLOAT_TYPE_P (type)
892 && !HONOR_SNANS (TYPE_MODE (type))
893 && real_onep (oelast->op)))
895 if (ops->length () != 1)
897 if (dump_file && (dump_flags & TDF_DETAILS))
898 fprintf (dump_file, "Found * 1, removing\n");
899 ops->pop ();
900 reassociate_stats.ops_eliminated++;
901 return;
904 break;
905 case BIT_XOR_EXPR:
906 case PLUS_EXPR:
907 case MINUS_EXPR:
908 if (integer_zerop (oelast->op)
909 || (FLOAT_TYPE_P (type)
910 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
911 && fold_real_zero_addition_p (type, oelast->op,
912 opcode == MINUS_EXPR)))
914 if (ops->length () != 1)
916 if (dump_file && (dump_flags & TDF_DETAILS))
917 fprintf (dump_file, "Found [|^+] 0, removing\n");
918 ops->pop ();
919 reassociate_stats.ops_eliminated++;
920 return;
923 break;
924 default:
925 break;
931 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
932 bool, bool);
934 /* Structure for tracking and counting operands. */
935 typedef struct oecount_s {
936 int cnt;
937 int id;
938 enum tree_code oecode;
939 tree op;
940 } oecount;
943 /* The heap for the oecount hashtable and the sorted list of operands. */
944 static vec<oecount> cvec;
946 /* Hash function for oecount. */
948 static hashval_t
949 oecount_hash (const void *p)
951 const oecount *c = &cvec[(size_t)p - 42];
952 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
955 /* Comparison function for oecount. */
957 static int
958 oecount_eq (const void *p1, const void *p2)
960 const oecount *c1 = &cvec[(size_t)p1 - 42];
961 const oecount *c2 = &cvec[(size_t)p2 - 42];
962 return (c1->oecode == c2->oecode
963 && c1->op == c2->op);
966 /* Comparison function for qsort sorting oecount elements by count. */
968 static int
969 oecount_cmp (const void *p1, const void *p2)
971 const oecount *c1 = (const oecount *)p1;
972 const oecount *c2 = (const oecount *)p2;
973 if (c1->cnt != c2->cnt)
974 return c1->cnt - c2->cnt;
975 else
976 /* If counts are identical, use unique IDs to stabilize qsort. */
977 return c1->id - c2->id;
980 /* Return TRUE iff STMT represents a builtin call that raises OP
981 to some exponent. */
983 static bool
984 stmt_is_power_of_op (gimple stmt, tree op)
986 tree fndecl;
988 if (!is_gimple_call (stmt))
989 return false;
991 fndecl = gimple_call_fndecl (stmt);
993 if (!fndecl
994 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
995 return false;
997 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
999 CASE_FLT_FN (BUILT_IN_POW):
1000 CASE_FLT_FN (BUILT_IN_POWI):
1001 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1003 default:
1004 return false;
1008 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1009 in place and return the result. Assumes that stmt_is_power_of_op
1010 was previously called for STMT and returned TRUE. */
1012 static HOST_WIDE_INT
1013 decrement_power (gimple stmt)
1015 REAL_VALUE_TYPE c, cint;
1016 HOST_WIDE_INT power;
1017 tree arg1;
1019 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1021 CASE_FLT_FN (BUILT_IN_POW):
1022 arg1 = gimple_call_arg (stmt, 1);
1023 c = TREE_REAL_CST (arg1);
1024 power = real_to_integer (&c) - 1;
1025 real_from_integer (&cint, VOIDmode, power, 0, 0);
1026 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1027 return power;
1029 CASE_FLT_FN (BUILT_IN_POWI):
1030 arg1 = gimple_call_arg (stmt, 1);
1031 power = TREE_INT_CST_LOW (arg1) - 1;
1032 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1033 return power;
1035 default:
1036 gcc_unreachable ();
1040 /* Find the single immediate use of STMT's LHS, and replace it
1041 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1042 replace *DEF with OP as well. */
1044 static void
1045 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1047 tree lhs;
1048 gimple use_stmt;
1049 use_operand_p use;
1050 gimple_stmt_iterator gsi;
1052 if (is_gimple_call (stmt))
1053 lhs = gimple_call_lhs (stmt);
1054 else
1055 lhs = gimple_assign_lhs (stmt);
1057 gcc_assert (has_single_use (lhs));
1058 single_imm_use (lhs, &use, &use_stmt);
1059 if (lhs == *def)
1060 *def = op;
1061 SET_USE (use, op);
1062 if (TREE_CODE (op) != SSA_NAME)
1063 update_stmt (use_stmt);
1064 gsi = gsi_for_stmt (stmt);
1065 unlink_stmt_vdef (stmt);
1066 gsi_remove (&gsi, true);
1067 release_defs (stmt);
1070 /* Walks the linear chain with result *DEF searching for an operation
1071 with operand OP and code OPCODE removing that from the chain. *DEF
1072 is updated if there is only one operand but no operation left. */
1074 static void
1075 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1077 gimple stmt = SSA_NAME_DEF_STMT (*def);
1081 tree name;
1083 if (opcode == MULT_EXPR
1084 && stmt_is_power_of_op (stmt, op))
1086 if (decrement_power (stmt) == 1)
1087 propagate_op_to_single_use (op, stmt, def);
1088 return;
1091 name = gimple_assign_rhs1 (stmt);
1093 /* If this is the operation we look for and one of the operands
1094 is ours simply propagate the other operand into the stmts
1095 single use. */
1096 if (gimple_assign_rhs_code (stmt) == opcode
1097 && (name == op
1098 || gimple_assign_rhs2 (stmt) == op))
1100 if (name == op)
1101 name = gimple_assign_rhs2 (stmt);
1102 propagate_op_to_single_use (name, stmt, def);
1103 return;
1106 /* We might have a multiply of two __builtin_pow* calls, and
1107 the operand might be hiding in the rightmost one. */
1108 if (opcode == MULT_EXPR
1109 && gimple_assign_rhs_code (stmt) == opcode
1110 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
1112 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1113 if (stmt_is_power_of_op (stmt2, op))
1115 if (decrement_power (stmt2) == 1)
1116 propagate_op_to_single_use (op, stmt2, def);
1117 return;
1121 /* Continue walking the chain. */
1122 gcc_assert (name != op
1123 && TREE_CODE (name) == SSA_NAME);
1124 stmt = SSA_NAME_DEF_STMT (name);
1126 while (1);
1129 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1130 the result. Places the statement after the definition of either
1131 OP1 or OP2. Returns the new statement. */
1133 static gimple
1134 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1136 gimple op1def = NULL, op2def = NULL;
1137 gimple_stmt_iterator gsi;
1138 tree op;
1139 gimple sum;
1141 /* Create the addition statement. */
1142 op = make_ssa_name (type, NULL);
1143 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1145 /* Find an insertion place and insert. */
1146 if (TREE_CODE (op1) == SSA_NAME)
1147 op1def = SSA_NAME_DEF_STMT (op1);
1148 if (TREE_CODE (op2) == SSA_NAME)
1149 op2def = SSA_NAME_DEF_STMT (op2);
1150 if ((!op1def || gimple_nop_p (op1def))
1151 && (!op2def || gimple_nop_p (op2def)))
1153 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1154 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1156 else if ((!op1def || gimple_nop_p (op1def))
1157 || (op2def && !gimple_nop_p (op2def)
1158 && stmt_dominates_stmt_p (op1def, op2def)))
1160 if (gimple_code (op2def) == GIMPLE_PHI)
1162 gsi = gsi_after_labels (gimple_bb (op2def));
1163 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1165 else
1167 if (!stmt_ends_bb_p (op2def))
1169 gsi = gsi_for_stmt (op2def);
1170 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1172 else
1174 edge e;
1175 edge_iterator ei;
1177 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1178 if (e->flags & EDGE_FALLTHRU)
1179 gsi_insert_on_edge_immediate (e, sum);
1183 else
1185 if (gimple_code (op1def) == GIMPLE_PHI)
1187 gsi = gsi_after_labels (gimple_bb (op1def));
1188 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1190 else
1192 if (!stmt_ends_bb_p (op1def))
1194 gsi = gsi_for_stmt (op1def);
1195 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1197 else
1199 edge e;
1200 edge_iterator ei;
1202 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1203 if (e->flags & EDGE_FALLTHRU)
1204 gsi_insert_on_edge_immediate (e, sum);
1208 update_stmt (sum);
1210 return sum;
1213 /* Perform un-distribution of divisions and multiplications.
1214 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1215 to (A + B) / X for real X.
1217 The algorithm is organized as follows.
1219 - First we walk the addition chain *OPS looking for summands that
1220 are defined by a multiplication or a real division. This results
1221 in the candidates bitmap with relevant indices into *OPS.
1223 - Second we build the chains of multiplications or divisions for
1224 these candidates, counting the number of occurrences of (operand, code)
1225 pairs in all of the candidates chains.
1227 - Third we sort the (operand, code) pairs by number of occurrence and
1228 process them starting with the pair with the most uses.
1230 * For each such pair we walk the candidates again to build a
1231 second candidate bitmap noting all multiplication/division chains
1232 that have at least one occurrence of (operand, code).
1234 * We build an alternate addition chain only covering these
1235 candidates with one (operand, code) operation removed from their
1236 multiplication/division chain.
1238 * The first candidate gets replaced by the alternate addition chain
1239 multiplied/divided by the operand.
1241 * All candidate chains get disabled for further processing and
1242 processing of (operand, code) pairs continues.
1244 The alternate addition chains built are re-processed by the main
1245 reassociation algorithm which allows optimizing a * x * y + b * y * x
1246 to (a + b ) * x * y in one invocation of the reassociation pass. */
1248 static bool
1249 undistribute_ops_list (enum tree_code opcode,
1250 vec<operand_entry_t> *ops, struct loop *loop)
1252 unsigned int length = ops->length ();
1253 operand_entry_t oe1;
1254 unsigned i, j;
1255 sbitmap candidates, candidates2;
1256 unsigned nr_candidates, nr_candidates2;
1257 sbitmap_iterator sbi0;
1258 vec<operand_entry_t> *subops;
1259 htab_t ctable;
1260 bool changed = false;
1261 int next_oecount_id = 0;
1263 if (length <= 1
1264 || opcode != PLUS_EXPR)
1265 return false;
1267 /* Build a list of candidates to process. */
1268 candidates = sbitmap_alloc (length);
1269 bitmap_clear (candidates);
1270 nr_candidates = 0;
1271 FOR_EACH_VEC_ELT (*ops, i, oe1)
1273 enum tree_code dcode;
1274 gimple oe1def;
1276 if (TREE_CODE (oe1->op) != SSA_NAME)
1277 continue;
1278 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1279 if (!is_gimple_assign (oe1def))
1280 continue;
1281 dcode = gimple_assign_rhs_code (oe1def);
1282 if ((dcode != MULT_EXPR
1283 && dcode != RDIV_EXPR)
1284 || !is_reassociable_op (oe1def, dcode, loop))
1285 continue;
1287 bitmap_set_bit (candidates, i);
1288 nr_candidates++;
1291 if (nr_candidates < 2)
1293 sbitmap_free (candidates);
1294 return false;
1297 if (dump_file && (dump_flags & TDF_DETAILS))
1299 fprintf (dump_file, "searching for un-distribute opportunities ");
1300 print_generic_expr (dump_file,
1301 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1302 fprintf (dump_file, " %d\n", nr_candidates);
1305 /* Build linearized sub-operand lists and the counting table. */
1306 cvec.create (0);
1307 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
1308 /* ??? Macro arguments cannot have multi-argument template types in
1309 them. This typedef is needed to workaround that limitation. */
1310 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1311 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1312 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1314 gimple oedef;
1315 enum tree_code oecode;
1316 unsigned j;
1318 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1319 oecode = gimple_assign_rhs_code (oedef);
1320 linearize_expr_tree (&subops[i], oedef,
1321 associative_tree_code (oecode), false);
1323 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1325 oecount c;
1326 void **slot;
1327 size_t idx;
1328 c.oecode = oecode;
1329 c.cnt = 1;
1330 c.id = next_oecount_id++;
1331 c.op = oe1->op;
1332 cvec.safe_push (c);
1333 idx = cvec.length () + 41;
1334 slot = htab_find_slot (ctable, (void *)idx, INSERT);
1335 if (!*slot)
1337 *slot = (void *)idx;
1339 else
1341 cvec.pop ();
1342 cvec[(size_t)*slot - 42].cnt++;
1346 htab_delete (ctable);
1348 /* Sort the counting table. */
1349 cvec.qsort (oecount_cmp);
1351 if (dump_file && (dump_flags & TDF_DETAILS))
1353 oecount *c;
1354 fprintf (dump_file, "Candidates:\n");
1355 FOR_EACH_VEC_ELT (cvec, j, c)
1357 fprintf (dump_file, " %u %s: ", c->cnt,
1358 c->oecode == MULT_EXPR
1359 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1360 print_generic_expr (dump_file, c->op, 0);
1361 fprintf (dump_file, "\n");
1365 /* Process the (operand, code) pairs in order of most occurence. */
1366 candidates2 = sbitmap_alloc (length);
1367 while (!cvec.is_empty ())
1369 oecount *c = &cvec.last ();
1370 if (c->cnt < 2)
1371 break;
1373 /* Now collect the operands in the outer chain that contain
1374 the common operand in their inner chain. */
1375 bitmap_clear (candidates2);
1376 nr_candidates2 = 0;
1377 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1379 gimple oedef;
1380 enum tree_code oecode;
1381 unsigned j;
1382 tree op = (*ops)[i]->op;
1384 /* If we undistributed in this chain already this may be
1385 a constant. */
1386 if (TREE_CODE (op) != SSA_NAME)
1387 continue;
1389 oedef = SSA_NAME_DEF_STMT (op);
1390 oecode = gimple_assign_rhs_code (oedef);
1391 if (oecode != c->oecode)
1392 continue;
1394 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1396 if (oe1->op == c->op)
1398 bitmap_set_bit (candidates2, i);
1399 ++nr_candidates2;
1400 break;
1405 if (nr_candidates2 >= 2)
1407 operand_entry_t oe1, oe2;
1408 gimple prod;
1409 int first = bitmap_first_set_bit (candidates2);
1411 /* Build the new addition chain. */
1412 oe1 = (*ops)[first];
1413 if (dump_file && (dump_flags & TDF_DETAILS))
1415 fprintf (dump_file, "Building (");
1416 print_generic_expr (dump_file, oe1->op, 0);
1418 zero_one_operation (&oe1->op, c->oecode, c->op);
1419 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1421 gimple sum;
1422 oe2 = (*ops)[i];
1423 if (dump_file && (dump_flags & TDF_DETAILS))
1425 fprintf (dump_file, " + ");
1426 print_generic_expr (dump_file, oe2->op, 0);
1428 zero_one_operation (&oe2->op, c->oecode, c->op);
1429 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1430 oe1->op, oe2->op, opcode);
1431 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1432 oe2->rank = 0;
1433 oe1->op = gimple_get_lhs (sum);
1436 /* Apply the multiplication/division. */
1437 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1438 oe1->op, c->op, c->oecode);
1439 if (dump_file && (dump_flags & TDF_DETAILS))
1441 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1442 print_generic_expr (dump_file, c->op, 0);
1443 fprintf (dump_file, "\n");
1446 /* Record it in the addition chain and disable further
1447 undistribution with this op. */
1448 oe1->op = gimple_assign_lhs (prod);
1449 oe1->rank = get_rank (oe1->op);
1450 subops[first].release ();
1452 changed = true;
1455 cvec.pop ();
1458 for (i = 0; i < ops->length (); ++i)
1459 subops[i].release ();
1460 free (subops);
1461 cvec.release ();
1462 sbitmap_free (candidates);
1463 sbitmap_free (candidates2);
1465 return changed;
1468 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1469 expression, examine the other OPS to see if any of them are comparisons
1470 of the same values, which we may be able to combine or eliminate.
1471 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1473 static bool
1474 eliminate_redundant_comparison (enum tree_code opcode,
1475 vec<operand_entry_t> *ops,
1476 unsigned int currindex,
1477 operand_entry_t curr)
1479 tree op1, op2;
1480 enum tree_code lcode, rcode;
1481 gimple def1, def2;
1482 int i;
1483 operand_entry_t oe;
1485 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1486 return false;
1488 /* Check that CURR is a comparison. */
1489 if (TREE_CODE (curr->op) != SSA_NAME)
1490 return false;
1491 def1 = SSA_NAME_DEF_STMT (curr->op);
1492 if (!is_gimple_assign (def1))
1493 return false;
1494 lcode = gimple_assign_rhs_code (def1);
1495 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1496 return false;
1497 op1 = gimple_assign_rhs1 (def1);
1498 op2 = gimple_assign_rhs2 (def1);
1500 /* Now look for a similar comparison in the remaining OPS. */
1501 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1503 tree t;
1505 if (TREE_CODE (oe->op) != SSA_NAME)
1506 continue;
1507 def2 = SSA_NAME_DEF_STMT (oe->op);
1508 if (!is_gimple_assign (def2))
1509 continue;
1510 rcode = gimple_assign_rhs_code (def2);
1511 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1512 continue;
1514 /* If we got here, we have a match. See if we can combine the
1515 two comparisons. */
1516 if (opcode == BIT_IOR_EXPR)
1517 t = maybe_fold_or_comparisons (lcode, op1, op2,
1518 rcode, gimple_assign_rhs1 (def2),
1519 gimple_assign_rhs2 (def2));
1520 else
1521 t = maybe_fold_and_comparisons (lcode, op1, op2,
1522 rcode, gimple_assign_rhs1 (def2),
1523 gimple_assign_rhs2 (def2));
1524 if (!t)
1525 continue;
1527 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1528 always give us a boolean_type_node value back. If the original
1529 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1530 we need to convert. */
1531 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1532 t = fold_convert (TREE_TYPE (curr->op), t);
1534 if (TREE_CODE (t) != INTEGER_CST
1535 && !operand_equal_p (t, curr->op, 0))
1537 enum tree_code subcode;
1538 tree newop1, newop2;
1539 if (!COMPARISON_CLASS_P (t))
1540 continue;
1541 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1542 STRIP_USELESS_TYPE_CONVERSION (newop1);
1543 STRIP_USELESS_TYPE_CONVERSION (newop2);
1544 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1545 continue;
1548 if (dump_file && (dump_flags & TDF_DETAILS))
1550 fprintf (dump_file, "Equivalence: ");
1551 print_generic_expr (dump_file, curr->op, 0);
1552 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1553 print_generic_expr (dump_file, oe->op, 0);
1554 fprintf (dump_file, " -> ");
1555 print_generic_expr (dump_file, t, 0);
1556 fprintf (dump_file, "\n");
1559 /* Now we can delete oe, as it has been subsumed by the new combined
1560 expression t. */
1561 ops->ordered_remove (i);
1562 reassociate_stats.ops_eliminated ++;
1564 /* If t is the same as curr->op, we're done. Otherwise we must
1565 replace curr->op with t. Special case is if we got a constant
1566 back, in which case we add it to the end instead of in place of
1567 the current entry. */
1568 if (TREE_CODE (t) == INTEGER_CST)
1570 ops->ordered_remove (currindex);
1571 add_to_ops_vec (ops, t);
1573 else if (!operand_equal_p (t, curr->op, 0))
1575 gimple sum;
1576 enum tree_code subcode;
1577 tree newop1;
1578 tree newop2;
1579 gcc_assert (COMPARISON_CLASS_P (t));
1580 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1581 STRIP_USELESS_TYPE_CONVERSION (newop1);
1582 STRIP_USELESS_TYPE_CONVERSION (newop2);
1583 gcc_checking_assert (is_gimple_val (newop1)
1584 && is_gimple_val (newop2));
1585 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1586 curr->op = gimple_get_lhs (sum);
1588 return true;
1591 return false;
1594 /* Perform various identities and other optimizations on the list of
1595 operand entries, stored in OPS. The tree code for the binary
1596 operation between all the operands is OPCODE. */
1598 static void
1599 optimize_ops_list (enum tree_code opcode,
1600 vec<operand_entry_t> *ops)
1602 unsigned int length = ops->length ();
1603 unsigned int i;
1604 operand_entry_t oe;
1605 operand_entry_t oelast = NULL;
1606 bool iterate = false;
1608 if (length == 1)
1609 return;
1611 oelast = ops->last ();
1613 /* If the last two are constants, pop the constants off, merge them
1614 and try the next two. */
1615 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1617 operand_entry_t oelm1 = (*ops)[length - 2];
1619 if (oelm1->rank == 0
1620 && is_gimple_min_invariant (oelm1->op)
1621 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1622 TREE_TYPE (oelast->op)))
1624 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1625 oelm1->op, oelast->op);
1627 if (folded && is_gimple_min_invariant (folded))
1629 if (dump_file && (dump_flags & TDF_DETAILS))
1630 fprintf (dump_file, "Merging constants\n");
1632 ops->pop ();
1633 ops->pop ();
1635 add_to_ops_vec (ops, folded);
1636 reassociate_stats.constants_eliminated++;
1638 optimize_ops_list (opcode, ops);
1639 return;
1644 eliminate_using_constants (opcode, ops);
1645 oelast = NULL;
1647 for (i = 0; ops->iterate (i, &oe);)
1649 bool done = false;
1651 if (eliminate_not_pairs (opcode, ops, i, oe))
1652 return;
1653 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1654 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1655 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1657 if (done)
1658 return;
1659 iterate = true;
1660 oelast = NULL;
1661 continue;
1663 oelast = oe;
1664 i++;
1667 length = ops->length ();
1668 oelast = ops->last ();
1670 if (iterate)
1671 optimize_ops_list (opcode, ops);
1674 /* The following functions are subroutines to optimize_range_tests and allow
1675 it to try to change a logical combination of comparisons into a range
1676 test.
1678 For example, both
1679 X == 2 || X == 5 || X == 3 || X == 4
1681 X >= 2 && X <= 5
1682 are converted to
1683 (unsigned) (X - 2) <= 3
1685 For more information see comments above fold_test_range in fold-const.c,
1686 this implementation is for GIMPLE. */
1688 struct range_entry
1690 tree exp;
1691 tree low;
1692 tree high;
1693 bool in_p;
1694 bool strict_overflow_p;
1695 unsigned int idx, next;
1698 /* This is similar to make_range in fold-const.c, but on top of
1699 GIMPLE instead of trees. If EXP is non-NULL, it should be
1700 an SSA_NAME and STMT argument is ignored, otherwise STMT
1701 argument should be a GIMPLE_COND. */
1703 static void
1704 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1706 int in_p;
1707 tree low, high;
1708 bool is_bool, strict_overflow_p;
1710 r->exp = NULL_TREE;
1711 r->in_p = false;
1712 r->strict_overflow_p = false;
1713 r->low = NULL_TREE;
1714 r->high = NULL_TREE;
1715 if (exp != NULL_TREE
1716 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1717 return;
1719 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1720 and see if we can refine the range. Some of the cases below may not
1721 happen, but it doesn't seem worth worrying about this. We "continue"
1722 the outer loop when we've changed something; otherwise we "break"
1723 the switch, which will "break" the while. */
1724 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1725 high = low;
1726 in_p = 0;
1727 strict_overflow_p = false;
1728 is_bool = false;
1729 if (exp == NULL_TREE)
1730 is_bool = true;
1731 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1733 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1734 is_bool = true;
1735 else
1736 return;
1738 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1739 is_bool = true;
1741 while (1)
1743 enum tree_code code;
1744 tree arg0, arg1, exp_type;
1745 tree nexp;
1746 location_t loc;
1748 if (exp != NULL_TREE)
1750 if (TREE_CODE (exp) != SSA_NAME)
1751 break;
1753 stmt = SSA_NAME_DEF_STMT (exp);
1754 if (!is_gimple_assign (stmt))
1755 break;
1757 code = gimple_assign_rhs_code (stmt);
1758 arg0 = gimple_assign_rhs1 (stmt);
1759 arg1 = gimple_assign_rhs2 (stmt);
1760 exp_type = TREE_TYPE (exp);
1762 else
1764 code = gimple_cond_code (stmt);
1765 arg0 = gimple_cond_lhs (stmt);
1766 arg1 = gimple_cond_rhs (stmt);
1767 exp_type = boolean_type_node;
1770 if (TREE_CODE (arg0) != SSA_NAME)
1771 break;
1772 loc = gimple_location (stmt);
1773 switch (code)
1775 case BIT_NOT_EXPR:
1776 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1778 in_p = !in_p;
1779 exp = arg0;
1780 continue;
1782 break;
1783 case SSA_NAME:
1784 exp = arg0;
1785 continue;
1786 CASE_CONVERT:
1787 if (is_bool)
1788 goto do_default;
1789 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1791 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1792 is_bool = true;
1793 else
1794 return;
1796 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1797 is_bool = true;
1798 goto do_default;
1799 case EQ_EXPR:
1800 case NE_EXPR:
1801 case LT_EXPR:
1802 case LE_EXPR:
1803 case GE_EXPR:
1804 case GT_EXPR:
1805 is_bool = true;
1806 /* FALLTHRU */
1807 default:
1808 if (!is_bool)
1809 return;
1810 do_default:
1811 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1812 &low, &high, &in_p,
1813 &strict_overflow_p);
1814 if (nexp != NULL_TREE)
1816 exp = nexp;
1817 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1818 continue;
1820 break;
1822 break;
1824 if (is_bool)
1826 r->exp = exp;
1827 r->in_p = in_p;
1828 r->low = low;
1829 r->high = high;
1830 r->strict_overflow_p = strict_overflow_p;
1834 /* Comparison function for qsort. Sort entries
1835 without SSA_NAME exp first, then with SSA_NAMEs sorted
1836 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1837 by increasing ->low and if ->low is the same, by increasing
1838 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1839 maximum. */
1841 static int
1842 range_entry_cmp (const void *a, const void *b)
1844 const struct range_entry *p = (const struct range_entry *) a;
1845 const struct range_entry *q = (const struct range_entry *) b;
1847 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1849 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1851 /* Group range_entries for the same SSA_NAME together. */
1852 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1853 return -1;
1854 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1855 return 1;
1856 /* If ->low is different, NULL low goes first, then by
1857 ascending low. */
1858 if (p->low != NULL_TREE)
1860 if (q->low != NULL_TREE)
1862 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1863 p->low, q->low);
1864 if (tem && integer_onep (tem))
1865 return -1;
1866 tem = fold_binary (GT_EXPR, boolean_type_node,
1867 p->low, q->low);
1868 if (tem && integer_onep (tem))
1869 return 1;
1871 else
1872 return 1;
1874 else if (q->low != NULL_TREE)
1875 return -1;
1876 /* If ->high is different, NULL high goes last, before that by
1877 ascending high. */
1878 if (p->high != NULL_TREE)
1880 if (q->high != NULL_TREE)
1882 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1883 p->high, q->high);
1884 if (tem && integer_onep (tem))
1885 return -1;
1886 tem = fold_binary (GT_EXPR, boolean_type_node,
1887 p->high, q->high);
1888 if (tem && integer_onep (tem))
1889 return 1;
1891 else
1892 return -1;
1894 else if (p->high != NULL_TREE)
1895 return 1;
1896 /* If both ranges are the same, sort below by ascending idx. */
1898 else
1899 return 1;
1901 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1902 return -1;
1904 if (p->idx < q->idx)
1905 return -1;
1906 else
1908 gcc_checking_assert (p->idx > q->idx);
1909 return 1;
1913 /* Helper routine of optimize_range_test.
1914 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1915 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1916 OPCODE and OPS are arguments of optimize_range_tests. Return
1917 true if the range merge has been successful.
1918 If OPCODE is ERROR_MARK, this is called from within
1919 maybe_optimize_range_tests and is performing inter-bb range optimization.
1920 Changes should be then performed right away, and whether an op is
1921 BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank. */
1923 static bool
1924 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1925 unsigned int count, enum tree_code opcode,
1926 vec<operand_entry_t> *ops, tree exp, bool in_p,
1927 tree low, tree high, bool strict_overflow_p)
1929 operand_entry_t oe = (*ops)[range->idx];
1930 tree op = oe->op;
1931 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1932 location_t loc = gimple_location (stmt);
1933 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1934 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1935 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1936 gimple_stmt_iterator gsi;
1938 if (tem == NULL_TREE)
1939 return false;
1941 if (strict_overflow_p && issue_strict_overflow_warning (wc))
1942 warning_at (loc, OPT_Wstrict_overflow,
1943 "assuming signed overflow does not occur "
1944 "when simplifying range test");
1946 if (dump_file && (dump_flags & TDF_DETAILS))
1948 struct range_entry *r;
1949 fprintf (dump_file, "Optimizing range tests ");
1950 print_generic_expr (dump_file, range->exp, 0);
1951 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1952 print_generic_expr (dump_file, range->low, 0);
1953 fprintf (dump_file, ", ");
1954 print_generic_expr (dump_file, range->high, 0);
1955 fprintf (dump_file, "]");
1956 for (r = otherrange; r < otherrange + count; r++)
1958 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1959 print_generic_expr (dump_file, r->low, 0);
1960 fprintf (dump_file, ", ");
1961 print_generic_expr (dump_file, r->high, 0);
1962 fprintf (dump_file, "]");
1964 fprintf (dump_file, "\n into ");
1965 print_generic_expr (dump_file, tem, 0);
1966 fprintf (dump_file, "\n");
1969 if (opcode == BIT_IOR_EXPR
1970 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1971 tem = invert_truthvalue_loc (loc, tem);
1973 tem = fold_convert_loc (loc, optype, tem);
1974 gsi = gsi_for_stmt (stmt);
1975 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1976 GSI_SAME_STMT);
1978 /* If doing inter-bb range test optimization, update the
1979 stmts immediately. Start with changing the first range test
1980 immediate use to the new value (TEM), or, if the first range
1981 test is a GIMPLE_COND stmt, change that condition. */
1982 if (opcode == ERROR_MARK)
1984 if (op)
1986 imm_use_iterator iter;
1987 use_operand_p use_p;
1988 gimple use_stmt;
1990 FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
1992 if (is_gimple_debug (use_stmt))
1993 continue;
1994 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
1995 SET_USE (use_p, tem);
1996 update_stmt (use_stmt);
1999 else
2001 gimple_cond_set_code (stmt, NE_EXPR);
2002 gimple_cond_set_lhs (stmt, tem);
2003 gimple_cond_set_rhs (stmt, boolean_false_node);
2004 update_stmt (stmt);
2007 oe->op = tem;
2008 range->exp = exp;
2009 range->low = low;
2010 range->high = high;
2011 range->in_p = in_p;
2012 range->strict_overflow_p = false;
2014 for (range = otherrange; range < otherrange + count; range++)
2016 oe = (*ops)[range->idx];
2017 /* Now change all the other range test immediate uses, so that
2018 those tests will be optimized away. */
2019 if (opcode == ERROR_MARK)
2021 if (oe->op)
2023 imm_use_iterator iter;
2024 use_operand_p use_p;
2025 gimple use_stmt;
2027 FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2029 if (is_gimple_debug (use_stmt))
2030 continue;
2031 /* If imm use of _8 is a statement like _7 = _8 | _9;,
2032 adjust it into _7 = _9;. */
2033 if (is_gimple_assign (use_stmt)
2034 && gimple_assign_rhs_code (use_stmt) == oe->rank)
2036 tree expr = NULL_TREE;
2037 if (oe->op == gimple_assign_rhs1 (use_stmt))
2038 expr = gimple_assign_rhs2 (use_stmt);
2039 else if (oe->op == gimple_assign_rhs2 (use_stmt))
2040 expr = gimple_assign_rhs1 (use_stmt);
2041 if (expr
2042 && expr != oe->op
2043 && TREE_CODE (expr) == SSA_NAME)
2045 gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2046 gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2047 expr, NULL_TREE);
2048 update_stmt (use_stmt);
2049 continue;
2052 /* If imm use of _8 is a statement like _7 = (int) _8;,
2053 adjust it into _7 = 0; or _7 = 1;. */
2054 if (gimple_assign_cast_p (use_stmt)
2055 && oe->op == gimple_assign_rhs1 (use_stmt))
2057 tree lhs = gimple_assign_lhs (use_stmt);
2058 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2060 gimple_stmt_iterator gsi2
2061 = gsi_for_stmt (use_stmt);
2062 tree expr = build_int_cst (TREE_TYPE (lhs),
2063 oe->rank == BIT_IOR_EXPR
2064 ? 0 : 1);
2065 gimple_assign_set_rhs_with_ops (&gsi2,
2066 INTEGER_CST,
2067 expr, NULL_TREE);
2068 update_stmt (use_stmt);
2069 continue;
2072 /* Otherwise replace the use with 0 or 1. */
2073 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2074 SET_USE (use_p,
2075 build_int_cst (TREE_TYPE (oe->op),
2076 oe->rank == BIT_IOR_EXPR
2077 ? 0 : 1));
2078 update_stmt (use_stmt);
2081 else
2083 /* If range test was a GIMPLE_COND, simply change it
2084 into an always false or always true condition. */
2085 stmt = last_stmt (BASIC_BLOCK (oe->id));
2086 if (oe->rank == BIT_IOR_EXPR)
2087 gimple_cond_make_false (stmt);
2088 else
2089 gimple_cond_make_true (stmt);
2090 update_stmt (stmt);
2093 oe->op = error_mark_node;
2094 range->exp = NULL_TREE;
2096 return true;
2099 /* Optimize range tests, similarly how fold_range_test optimizes
2100 it on trees. The tree code for the binary
2101 operation between all the operands is OPCODE.
2102 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2103 maybe_optimize_range_tests for inter-bb range optimization.
2104 In that case if oe->op is NULL, oe->id is bb->index whose
2105 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2106 the actual opcode. */
2108 static void
2109 optimize_range_tests (enum tree_code opcode,
2110 vec<operand_entry_t> *ops)
2112 unsigned int length = ops->length (), i, j, first;
2113 operand_entry_t oe;
2114 struct range_entry *ranges;
2115 bool any_changes = false;
2117 if (length == 1)
2118 return;
2120 ranges = XNEWVEC (struct range_entry, length);
2121 for (i = 0; i < length; i++)
2123 oe = (*ops)[i];
2124 ranges[i].idx = i;
2125 init_range_entry (ranges + i, oe->op,
2126 oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2127 /* For | invert it now, we will invert it again before emitting
2128 the optimized expression. */
2129 if (opcode == BIT_IOR_EXPR
2130 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2131 ranges[i].in_p = !ranges[i].in_p;
2134 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2135 for (i = 0; i < length; i++)
2136 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2137 break;
2139 /* Try to merge ranges. */
2140 for (first = i; i < length; i++)
2142 tree low = ranges[i].low;
2143 tree high = ranges[i].high;
2144 int in_p = ranges[i].in_p;
2145 bool strict_overflow_p = ranges[i].strict_overflow_p;
2146 int update_fail_count = 0;
2148 for (j = i + 1; j < length; j++)
2150 if (ranges[i].exp != ranges[j].exp)
2151 break;
2152 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2153 ranges[j].in_p, ranges[j].low, ranges[j].high))
2154 break;
2155 strict_overflow_p |= ranges[j].strict_overflow_p;
2158 if (j == i + 1)
2159 continue;
2161 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2162 ops, ranges[i].exp, in_p, low, high,
2163 strict_overflow_p))
2165 i = j - 1;
2166 any_changes = true;
2168 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2169 while update_range_test would fail. */
2170 else if (update_fail_count == 64)
2171 i = j - 1;
2172 else
2173 ++update_fail_count;
2176 /* Optimize X == CST1 || X == CST2
2177 if popcount (CST1 ^ CST2) == 1 into
2178 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2179 Similarly for ranges. E.g.
2180 X != 2 && X != 3 && X != 10 && X != 11
2181 will be transformed by the above loop into
2182 (X - 2U) <= 1U && (X - 10U) <= 1U
2183 and this loop can transform that into
2184 ((X & ~8) - 2U) <= 1U. */
2185 for (i = first; i < length; i++)
2187 tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2189 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2190 continue;
2191 type = TREE_TYPE (ranges[i].exp);
2192 if (!INTEGRAL_TYPE_P (type))
2193 continue;
2194 lowi = ranges[i].low;
2195 if (lowi == NULL_TREE)
2196 lowi = TYPE_MIN_VALUE (type);
2197 highi = ranges[i].high;
2198 if (highi == NULL_TREE)
2199 continue;
2200 for (j = i + 1; j < length && j < i + 64; j++)
2202 if (ranges[j].exp == NULL_TREE)
2203 continue;
2204 if (ranges[i].exp != ranges[j].exp)
2205 break;
2206 if (ranges[j].in_p)
2207 continue;
2208 lowj = ranges[j].low;
2209 if (lowj == NULL_TREE)
2210 continue;
2211 highj = ranges[j].high;
2212 if (highj == NULL_TREE)
2213 highj = TYPE_MAX_VALUE (type);
2214 tem = fold_binary (GT_EXPR, boolean_type_node,
2215 lowj, highi);
2216 if (tem == NULL_TREE || !integer_onep (tem))
2217 continue;
2218 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2219 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2220 continue;
2221 gcc_checking_assert (!integer_zerop (lowxor));
2222 tem = fold_binary (MINUS_EXPR, type, lowxor,
2223 build_int_cst (type, 1));
2224 if (tem == NULL_TREE)
2225 continue;
2226 tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2227 if (tem == NULL_TREE || !integer_zerop (tem))
2228 continue;
2229 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2230 if (!tree_int_cst_equal (lowxor, highxor))
2231 continue;
2232 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2233 exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2234 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2235 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2236 if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2237 ranges[i].in_p, lowj, highj,
2238 ranges[i].strict_overflow_p
2239 || ranges[j].strict_overflow_p))
2241 any_changes = true;
2242 break;
2247 if (any_changes && opcode != ERROR_MARK)
2249 j = 0;
2250 FOR_EACH_VEC_ELT (*ops, i, oe)
2252 if (oe->op == error_mark_node)
2253 continue;
2254 else if (i != j)
2255 (*ops)[j] = oe;
2256 j++;
2258 ops->truncate (j);
2261 XDELETEVEC (ranges);
2264 /* Return true if STMT is a cast like:
2265 <bb N>:
2267 _123 = (int) _234;
2269 <bb M>:
2270 # _345 = PHI <_123(N), 1(...), 1(...)>
2271 where _234 has bool type, _123 has single use and
2272 bb N has a single successor M. This is commonly used in
2273 the last block of a range test. */
2275 static bool
2276 final_range_test_p (gimple stmt)
2278 basic_block bb, rhs_bb;
2279 edge e;
2280 tree lhs, rhs;
2281 use_operand_p use_p;
2282 gimple use_stmt;
2284 if (!gimple_assign_cast_p (stmt))
2285 return false;
2286 bb = gimple_bb (stmt);
2287 if (!single_succ_p (bb))
2288 return false;
2289 e = single_succ_edge (bb);
2290 if (e->flags & EDGE_COMPLEX)
2291 return false;
2293 lhs = gimple_assign_lhs (stmt);
2294 rhs = gimple_assign_rhs1 (stmt);
2295 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2296 || TREE_CODE (rhs) != SSA_NAME
2297 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2298 return false;
2300 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2301 if (!single_imm_use (lhs, &use_p, &use_stmt))
2302 return false;
2304 if (gimple_code (use_stmt) != GIMPLE_PHI
2305 || gimple_bb (use_stmt) != e->dest)
2306 return false;
2308 /* And that the rhs is defined in the same loop. */
2309 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2310 if (rhs_bb == NULL
2311 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2312 return false;
2314 return true;
2317 /* Return true if BB is suitable basic block for inter-bb range test
2318 optimization. If BACKWARD is true, BB should be the only predecessor
2319 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2320 or compared with to find a common basic block to which all conditions
2321 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2322 be the only predecessor of BB. */
2324 static bool
2325 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2326 bool backward)
2328 edge_iterator ei, ei2;
2329 edge e, e2;
2330 gimple stmt;
2331 gimple_stmt_iterator gsi;
2332 bool other_edge_seen = false;
2333 bool is_cond;
2335 if (test_bb == bb)
2336 return false;
2337 /* Check last stmt first. */
2338 stmt = last_stmt (bb);
2339 if (stmt == NULL
2340 || (gimple_code (stmt) != GIMPLE_COND
2341 && (backward || !final_range_test_p (stmt)))
2342 || gimple_visited_p (stmt)
2343 || stmt_could_throw_p (stmt)
2344 || *other_bb == bb)
2345 return false;
2346 is_cond = gimple_code (stmt) == GIMPLE_COND;
2347 if (is_cond)
2349 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2350 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2351 to *OTHER_BB (if not set yet, try to find it out). */
2352 if (EDGE_COUNT (bb->succs) != 2)
2353 return false;
2354 FOR_EACH_EDGE (e, ei, bb->succs)
2356 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2357 return false;
2358 if (e->dest == test_bb)
2360 if (backward)
2361 continue;
2362 else
2363 return false;
2365 if (e->dest == bb)
2366 return false;
2367 if (*other_bb == NULL)
2369 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2370 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2371 return false;
2372 else if (e->dest == e2->dest)
2373 *other_bb = e->dest;
2374 if (*other_bb == NULL)
2375 return false;
2377 if (e->dest == *other_bb)
2378 other_edge_seen = true;
2379 else if (backward)
2380 return false;
2382 if (*other_bb == NULL || !other_edge_seen)
2383 return false;
2385 else if (single_succ (bb) != *other_bb)
2386 return false;
2388 /* Now check all PHIs of *OTHER_BB. */
2389 e = find_edge (bb, *other_bb);
2390 e2 = find_edge (test_bb, *other_bb);
2391 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2393 gimple phi = gsi_stmt (gsi);
2394 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2395 corresponding to BB and TEST_BB predecessor must be the same. */
2396 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2397 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2399 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2400 one of the PHIs should have the lhs of the last stmt in
2401 that block as PHI arg and that PHI should have 0 or 1
2402 corresponding to it in all other range test basic blocks
2403 considered. */
2404 if (!is_cond)
2406 if (gimple_phi_arg_def (phi, e->dest_idx)
2407 == gimple_assign_lhs (stmt)
2408 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2409 || integer_onep (gimple_phi_arg_def (phi,
2410 e2->dest_idx))))
2411 continue;
2413 else
2415 gimple test_last = last_stmt (test_bb);
2416 if (gimple_code (test_last) != GIMPLE_COND
2417 && gimple_phi_arg_def (phi, e2->dest_idx)
2418 == gimple_assign_lhs (test_last)
2419 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2420 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2421 continue;
2424 return false;
2427 return true;
2430 /* Return true if BB doesn't have side-effects that would disallow
2431 range test optimization, all SSA_NAMEs set in the bb are consumed
2432 in the bb and there are no PHIs. */
2434 static bool
2435 no_side_effect_bb (basic_block bb)
2437 gimple_stmt_iterator gsi;
2438 gimple last;
2440 if (!gimple_seq_empty_p (phi_nodes (bb)))
2441 return false;
2442 last = last_stmt (bb);
2443 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2445 gimple stmt = gsi_stmt (gsi);
2446 tree lhs;
2447 imm_use_iterator imm_iter;
2448 use_operand_p use_p;
2450 if (is_gimple_debug (stmt))
2451 continue;
2452 if (gimple_has_side_effects (stmt))
2453 return false;
2454 if (stmt == last)
2455 return true;
2456 if (!is_gimple_assign (stmt))
2457 return false;
2458 lhs = gimple_assign_lhs (stmt);
2459 if (TREE_CODE (lhs) != SSA_NAME)
2460 return false;
2461 if (gimple_assign_rhs_could_trap_p (stmt))
2462 return false;
2463 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2465 gimple use_stmt = USE_STMT (use_p);
2466 if (is_gimple_debug (use_stmt))
2467 continue;
2468 if (gimple_bb (use_stmt) != bb)
2469 return false;
2472 return false;
2475 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2476 return true and fill in *OPS recursively. */
2478 static bool
2479 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2480 struct loop *loop)
2482 gimple stmt = SSA_NAME_DEF_STMT (var);
2483 tree rhs[2];
2484 int i;
2486 if (!is_reassociable_op (stmt, code, loop))
2487 return false;
2489 rhs[0] = gimple_assign_rhs1 (stmt);
2490 rhs[1] = gimple_assign_rhs2 (stmt);
2491 gimple_set_visited (stmt, true);
2492 for (i = 0; i < 2; i++)
2493 if (TREE_CODE (rhs[i]) == SSA_NAME
2494 && !get_ops (rhs[i], code, ops, loop)
2495 && has_single_use (rhs[i]))
2497 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2499 oe->op = rhs[i];
2500 oe->rank = code;
2501 oe->id = 0;
2502 oe->count = 1;
2503 ops->safe_push (oe);
2505 return true;
2508 /* Inter-bb range test optimization. */
2510 static void
2511 maybe_optimize_range_tests (gimple stmt)
2513 basic_block first_bb = gimple_bb (stmt);
2514 basic_block last_bb = first_bb;
2515 basic_block other_bb = NULL;
2516 basic_block bb;
2517 edge_iterator ei;
2518 edge e;
2519 vec<operand_entry_t> ops = vNULL;
2521 /* Consider only basic blocks that end with GIMPLE_COND or
2522 a cast statement satisfying final_range_test_p. All
2523 but the last bb in the first_bb .. last_bb range
2524 should end with GIMPLE_COND. */
2525 if (gimple_code (stmt) == GIMPLE_COND)
2527 if (EDGE_COUNT (first_bb->succs) != 2)
2528 return;
2530 else if (final_range_test_p (stmt))
2531 other_bb = single_succ (first_bb);
2532 else
2533 return;
2535 if (stmt_could_throw_p (stmt))
2536 return;
2538 /* As relative ordering of post-dominator sons isn't fixed,
2539 maybe_optimize_range_tests can be called first on any
2540 bb in the range we want to optimize. So, start searching
2541 backwards, if first_bb can be set to a predecessor. */
2542 while (single_pred_p (first_bb))
2544 basic_block pred_bb = single_pred (first_bb);
2545 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2546 break;
2547 if (!no_side_effect_bb (first_bb))
2548 break;
2549 first_bb = pred_bb;
2551 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2552 Before starting forward search in last_bb successors, find
2553 out the other_bb. */
2554 if (first_bb == last_bb)
2556 other_bb = NULL;
2557 /* As non-GIMPLE_COND last stmt always terminates the range,
2558 if forward search didn't discover anything, just give up. */
2559 if (gimple_code (stmt) != GIMPLE_COND)
2560 return;
2561 /* Look at both successors. Either it ends with a GIMPLE_COND
2562 and satisfies suitable_cond_bb, or ends with a cast and
2563 other_bb is that cast's successor. */
2564 FOR_EACH_EDGE (e, ei, first_bb->succs)
2565 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2566 || e->dest == first_bb)
2567 return;
2568 else if (single_pred_p (e->dest))
2570 stmt = last_stmt (e->dest);
2571 if (stmt
2572 && gimple_code (stmt) == GIMPLE_COND
2573 && EDGE_COUNT (e->dest->succs) == 2)
2575 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2576 break;
2577 else
2578 other_bb = NULL;
2580 else if (stmt
2581 && final_range_test_p (stmt)
2582 && find_edge (first_bb, single_succ (e->dest)))
2584 other_bb = single_succ (e->dest);
2585 if (other_bb == first_bb)
2586 other_bb = NULL;
2589 if (other_bb == NULL)
2590 return;
2592 /* Now do the forward search, moving last_bb to successor bbs
2593 that aren't other_bb. */
2594 while (EDGE_COUNT (last_bb->succs) == 2)
2596 FOR_EACH_EDGE (e, ei, last_bb->succs)
2597 if (e->dest != other_bb)
2598 break;
2599 if (e == NULL)
2600 break;
2601 if (!single_pred_p (e->dest))
2602 break;
2603 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2604 break;
2605 if (!no_side_effect_bb (e->dest))
2606 break;
2607 last_bb = e->dest;
2609 if (first_bb == last_bb)
2610 return;
2611 /* Here basic blocks first_bb through last_bb's predecessor
2612 end with GIMPLE_COND, all of them have one of the edges to
2613 other_bb and another to another block in the range,
2614 all blocks except first_bb don't have side-effects and
2615 last_bb ends with either GIMPLE_COND, or cast satisfying
2616 final_range_test_p. */
2617 for (bb = last_bb; ; bb = single_pred (bb))
2619 enum tree_code code;
2620 tree lhs, rhs;
2622 e = find_edge (bb, other_bb);
2623 stmt = last_stmt (bb);
2624 gimple_set_visited (stmt, true);
2625 if (gimple_code (stmt) != GIMPLE_COND)
2627 use_operand_p use_p;
2628 gimple phi;
2629 edge e2;
2630 unsigned int d;
2632 lhs = gimple_assign_lhs (stmt);
2633 rhs = gimple_assign_rhs1 (stmt);
2634 gcc_assert (bb == last_bb);
2636 /* stmt is
2637 _123 = (int) _234;
2639 followed by:
2640 <bb M>:
2641 # _345 = PHI <_123(N), 1(...), 1(...)>
2643 or 0 instead of 1. If it is 0, the _234
2644 range test is anded together with all the
2645 other range tests, if it is 1, it is ored with
2646 them. */
2647 single_imm_use (lhs, &use_p, &phi);
2648 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2649 e2 = find_edge (first_bb, other_bb);
2650 d = e2->dest_idx;
2651 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2652 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2653 code = BIT_AND_EXPR;
2654 else
2656 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2657 code = BIT_IOR_EXPR;
2660 /* If _234 SSA_NAME_DEF_STMT is
2661 _234 = _567 | _789;
2662 (or &, corresponding to 1/0 in the phi arguments,
2663 push into ops the individual range test arguments
2664 of the bitwise or resp. and, recursively. */
2665 if (!get_ops (rhs, code, &ops,
2666 loop_containing_stmt (stmt))
2667 && has_single_use (rhs))
2669 /* Otherwise, push the _234 range test itself. */
2670 operand_entry_t oe
2671 = (operand_entry_t) pool_alloc (operand_entry_pool);
2673 oe->op = rhs;
2674 oe->rank = code;
2675 oe->id = 0;
2676 oe->count = 1;
2677 ops.safe_push (oe);
2679 continue;
2681 /* Otherwise stmt is GIMPLE_COND. */
2682 code = gimple_cond_code (stmt);
2683 lhs = gimple_cond_lhs (stmt);
2684 rhs = gimple_cond_rhs (stmt);
2685 if (TREE_CODE (lhs) == SSA_NAME
2686 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2687 && ((code != EQ_EXPR && code != NE_EXPR)
2688 || rhs != boolean_false_node
2689 /* Either push into ops the individual bitwise
2690 or resp. and operands, depending on which
2691 edge is other_bb. */
2692 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2693 ^ (code == EQ_EXPR))
2694 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2695 loop_containing_stmt (stmt))))
2697 /* Or push the GIMPLE_COND stmt itself. */
2698 operand_entry_t oe
2699 = (operand_entry_t) pool_alloc (operand_entry_pool);
2701 oe->op = NULL;
2702 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2703 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2704 /* oe->op = NULL signs that there is no SSA_NAME
2705 for the range test, and oe->id instead is the
2706 basic block number, at which's end the GIMPLE_COND
2707 is. */
2708 oe->id = bb->index;
2709 oe->count = 1;
2710 ops.safe_push (oe);
2712 if (bb == first_bb)
2713 break;
2715 if (ops.length () > 1)
2716 optimize_range_tests (ERROR_MARK, &ops);
2717 ops.release ();
2720 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2721 of STMT in it's operands. This is also known as a "destructive
2722 update" operation. */
2724 static bool
2725 is_phi_for_stmt (gimple stmt, tree operand)
2727 gimple def_stmt;
2728 tree lhs;
2729 use_operand_p arg_p;
2730 ssa_op_iter i;
2732 if (TREE_CODE (operand) != SSA_NAME)
2733 return false;
2735 lhs = gimple_assign_lhs (stmt);
2737 def_stmt = SSA_NAME_DEF_STMT (operand);
2738 if (gimple_code (def_stmt) != GIMPLE_PHI)
2739 return false;
2741 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2742 if (lhs == USE_FROM_PTR (arg_p))
2743 return true;
2744 return false;
2747 /* Remove def stmt of VAR if VAR has zero uses and recurse
2748 on rhs1 operand if so. */
2750 static void
2751 remove_visited_stmt_chain (tree var)
2753 gimple stmt;
2754 gimple_stmt_iterator gsi;
2756 while (1)
2758 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2759 return;
2760 stmt = SSA_NAME_DEF_STMT (var);
2761 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2763 var = gimple_assign_rhs1 (stmt);
2764 gsi = gsi_for_stmt (stmt);
2765 gsi_remove (&gsi, true);
2766 release_defs (stmt);
2768 else
2769 return;
2773 /* This function checks three consequtive operands in
2774 passed operands vector OPS starting from OPINDEX and
2775 swaps two operands if it is profitable for binary operation
2776 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2778 We pair ops with the same rank if possible.
2780 The alternative we try is to see if STMT is a destructive
2781 update style statement, which is like:
2782 b = phi (a, ...)
2783 a = c + b;
2784 In that case, we want to use the destructive update form to
2785 expose the possible vectorizer sum reduction opportunity.
2786 In that case, the third operand will be the phi node. This
2787 check is not performed if STMT is null.
2789 We could, of course, try to be better as noted above, and do a
2790 lot of work to try to find these opportunities in >3 operand
2791 cases, but it is unlikely to be worth it. */
2793 static void
2794 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2795 unsigned int opindex, gimple stmt)
2797 operand_entry_t oe1, oe2, oe3;
2799 oe1 = ops[opindex];
2800 oe2 = ops[opindex + 1];
2801 oe3 = ops[opindex + 2];
2803 if ((oe1->rank == oe2->rank
2804 && oe2->rank != oe3->rank)
2805 || (stmt && is_phi_for_stmt (stmt, oe3->op)
2806 && !is_phi_for_stmt (stmt, oe1->op)
2807 && !is_phi_for_stmt (stmt, oe2->op)))
2809 struct operand_entry temp = *oe3;
2810 oe3->op = oe1->op;
2811 oe3->rank = oe1->rank;
2812 oe1->op = temp.op;
2813 oe1->rank= temp.rank;
2815 else if ((oe1->rank == oe3->rank
2816 && oe2->rank != oe3->rank)
2817 || (stmt && is_phi_for_stmt (stmt, oe2->op)
2818 && !is_phi_for_stmt (stmt, oe1->op)
2819 && !is_phi_for_stmt (stmt, oe3->op)))
2821 struct operand_entry temp = *oe2;
2822 oe2->op = oe1->op;
2823 oe2->rank = oe1->rank;
2824 oe1->op = temp.op;
2825 oe1->rank= temp.rank;
2829 /* Recursively rewrite our linearized statements so that the operators
2830 match those in OPS[OPINDEX], putting the computation in rank
2831 order. */
2833 static void
2834 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2835 vec<operand_entry_t> ops, bool moved)
2837 tree rhs1 = gimple_assign_rhs1 (stmt);
2838 tree rhs2 = gimple_assign_rhs2 (stmt);
2839 operand_entry_t oe;
2841 /* If we have three operands left, then we want to make sure the ones
2842 that get the double binary op are chosen wisely. */
2843 if (opindex + 3 == ops.length ())
2844 swap_ops_for_binary_stmt (ops, opindex, stmt);
2846 /* The final recursion case for this function is that you have
2847 exactly two operations left.
2848 If we had one exactly one op in the entire list to start with, we
2849 would have never called this function, and the tail recursion
2850 rewrites them one at a time. */
2851 if (opindex + 2 == ops.length ())
2853 operand_entry_t oe1, oe2;
2855 oe1 = ops[opindex];
2856 oe2 = ops[opindex + 1];
2858 if (rhs1 != oe1->op || rhs2 != oe2->op)
2860 if (dump_file && (dump_flags & TDF_DETAILS))
2862 fprintf (dump_file, "Transforming ");
2863 print_gimple_stmt (dump_file, stmt, 0, 0);
2866 gimple_assign_set_rhs1 (stmt, oe1->op);
2867 gimple_assign_set_rhs2 (stmt, oe2->op);
2868 update_stmt (stmt);
2869 if (rhs1 != oe1->op && rhs1 != oe2->op)
2870 remove_visited_stmt_chain (rhs1);
2872 if (dump_file && (dump_flags & TDF_DETAILS))
2874 fprintf (dump_file, " into ");
2875 print_gimple_stmt (dump_file, stmt, 0, 0);
2878 return;
2881 /* If we hit here, we should have 3 or more ops left. */
2882 gcc_assert (opindex + 2 < ops.length ());
2884 /* Rewrite the next operator. */
2885 oe = ops[opindex];
2887 if (oe->op != rhs2)
2889 if (!moved)
2891 gimple_stmt_iterator gsinow, gsirhs1;
2892 gimple stmt1 = stmt, stmt2;
2893 unsigned int count;
2895 gsinow = gsi_for_stmt (stmt);
2896 count = ops.length () - opindex - 2;
2897 while (count-- != 0)
2899 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2900 gsirhs1 = gsi_for_stmt (stmt2);
2901 gsi_move_before (&gsirhs1, &gsinow);
2902 gsi_prev (&gsinow);
2903 stmt1 = stmt2;
2905 moved = true;
2908 if (dump_file && (dump_flags & TDF_DETAILS))
2910 fprintf (dump_file, "Transforming ");
2911 print_gimple_stmt (dump_file, stmt, 0, 0);
2914 gimple_assign_set_rhs2 (stmt, oe->op);
2915 update_stmt (stmt);
2917 if (dump_file && (dump_flags & TDF_DETAILS))
2919 fprintf (dump_file, " into ");
2920 print_gimple_stmt (dump_file, stmt, 0, 0);
2923 /* Recurse on the LHS of the binary operator, which is guaranteed to
2924 be the non-leaf side. */
2925 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2928 /* Find out how many cycles we need to compute statements chain.
2929 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
2930 maximum number of independent statements we may execute per cycle. */
2932 static int
2933 get_required_cycles (int ops_num, int cpu_width)
2935 int res;
2936 int elog;
2937 unsigned int rest;
2939 /* While we have more than 2 * cpu_width operands
2940 we may reduce number of operands by cpu_width
2941 per cycle. */
2942 res = ops_num / (2 * cpu_width);
2944 /* Remained operands count may be reduced twice per cycle
2945 until we have only one operand. */
2946 rest = (unsigned)(ops_num - res * cpu_width);
2947 elog = exact_log2 (rest);
2948 if (elog >= 0)
2949 res += elog;
2950 else
2951 res += floor_log2 (rest) + 1;
2953 return res;
2956 /* Returns an optimal number of registers to use for computation of
2957 given statements. */
2959 static int
2960 get_reassociation_width (int ops_num, enum tree_code opc,
2961 enum machine_mode mode)
2963 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2964 int width;
2965 int width_min;
2966 int cycles_best;
2968 if (param_width > 0)
2969 width = param_width;
2970 else
2971 width = targetm.sched.reassociation_width (opc, mode);
2973 if (width == 1)
2974 return width;
2976 /* Get the minimal time required for sequence computation. */
2977 cycles_best = get_required_cycles (ops_num, width);
2979 /* Check if we may use less width and still compute sequence for
2980 the same time. It will allow us to reduce registers usage.
2981 get_required_cycles is monotonically increasing with lower width
2982 so we can perform a binary search for the minimal width that still
2983 results in the optimal cycle count. */
2984 width_min = 1;
2985 while (width > width_min)
2987 int width_mid = (width + width_min) / 2;
2989 if (get_required_cycles (ops_num, width_mid) == cycles_best)
2990 width = width_mid;
2991 else if (width_min < width_mid)
2992 width_min = width_mid;
2993 else
2994 break;
2997 return width;
3000 /* Recursively rewrite our linearized statements so that the operators
3001 match those in OPS[OPINDEX], putting the computation in rank
3002 order and trying to allow operations to be executed in
3003 parallel. */
3005 static void
3006 rewrite_expr_tree_parallel (gimple stmt, int width,
3007 vec<operand_entry_t> ops)
3009 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3010 int op_num = ops.length ();
3011 int stmt_num = op_num - 1;
3012 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3013 int op_index = op_num - 1;
3014 int stmt_index = 0;
3015 int ready_stmts_end = 0;
3016 int i = 0;
3017 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3019 /* We start expression rewriting from the top statements.
3020 So, in this loop we create a full list of statements
3021 we will work with. */
3022 stmts[stmt_num - 1] = stmt;
3023 for (i = stmt_num - 2; i >= 0; i--)
3024 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3026 for (i = 0; i < stmt_num; i++)
3028 tree op1, op2;
3030 /* Determine whether we should use results of
3031 already handled statements or not. */
3032 if (ready_stmts_end == 0
3033 && (i - stmt_index >= width || op_index < 1))
3034 ready_stmts_end = i;
3036 /* Now we choose operands for the next statement. Non zero
3037 value in ready_stmts_end means here that we should use
3038 the result of already generated statements as new operand. */
3039 if (ready_stmts_end > 0)
3041 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3042 if (ready_stmts_end > stmt_index)
3043 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3044 else if (op_index >= 0)
3045 op2 = ops[op_index--]->op;
3046 else
3048 gcc_assert (stmt_index < i);
3049 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3052 if (stmt_index >= ready_stmts_end)
3053 ready_stmts_end = 0;
3055 else
3057 if (op_index > 1)
3058 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3059 op2 = ops[op_index--]->op;
3060 op1 = ops[op_index--]->op;
3063 /* If we emit the last statement then we should put
3064 operands into the last statement. It will also
3065 break the loop. */
3066 if (op_index < 0 && stmt_index == i)
3067 i = stmt_num - 1;
3069 if (dump_file && (dump_flags & TDF_DETAILS))
3071 fprintf (dump_file, "Transforming ");
3072 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3075 /* We keep original statement only for the last one. All
3076 others are recreated. */
3077 if (i == stmt_num - 1)
3079 gimple_assign_set_rhs1 (stmts[i], op1);
3080 gimple_assign_set_rhs2 (stmts[i], op2);
3081 update_stmt (stmts[i]);
3083 else
3084 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3086 if (dump_file && (dump_flags & TDF_DETAILS))
3088 fprintf (dump_file, " into ");
3089 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3093 remove_visited_stmt_chain (last_rhs1);
3096 /* Transform STMT, which is really (A +B) + (C + D) into the left
3097 linear form, ((A+B)+C)+D.
3098 Recurse on D if necessary. */
3100 static void
3101 linearize_expr (gimple stmt)
3103 gimple_stmt_iterator gsinow, gsirhs;
3104 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3105 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3106 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3107 gimple newbinrhs = NULL;
3108 struct loop *loop = loop_containing_stmt (stmt);
3110 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3111 && is_reassociable_op (binrhs, rhscode, loop));
3113 gsinow = gsi_for_stmt (stmt);
3114 gsirhs = gsi_for_stmt (binrhs);
3115 gsi_move_before (&gsirhs, &gsinow);
3117 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3118 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3119 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3121 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3122 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3124 if (dump_file && (dump_flags & TDF_DETAILS))
3126 fprintf (dump_file, "Linearized: ");
3127 print_gimple_stmt (dump_file, stmt, 0, 0);
3130 reassociate_stats.linearized++;
3131 update_stmt (binrhs);
3132 update_stmt (binlhs);
3133 update_stmt (stmt);
3135 gimple_set_visited (stmt, true);
3136 gimple_set_visited (binlhs, true);
3137 gimple_set_visited (binrhs, true);
3139 /* Tail recurse on the new rhs if it still needs reassociation. */
3140 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3141 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3142 want to change the algorithm while converting to tuples. */
3143 linearize_expr (stmt);
3146 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3147 it. Otherwise, return NULL. */
3149 static gimple
3150 get_single_immediate_use (tree lhs)
3152 use_operand_p immuse;
3153 gimple immusestmt;
3155 if (TREE_CODE (lhs) == SSA_NAME
3156 && single_imm_use (lhs, &immuse, &immusestmt)
3157 && is_gimple_assign (immusestmt))
3158 return immusestmt;
3160 return NULL;
3163 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3164 representing the negated value. Insertions of any necessary
3165 instructions go before GSI.
3166 This function is recursive in that, if you hand it "a_5" as the
3167 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3168 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3170 static tree
3171 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3173 gimple negatedefstmt= NULL;
3174 tree resultofnegate;
3176 /* If we are trying to negate a name, defined by an add, negate the
3177 add operands instead. */
3178 if (TREE_CODE (tonegate) == SSA_NAME)
3179 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3180 if (TREE_CODE (tonegate) == SSA_NAME
3181 && is_gimple_assign (negatedefstmt)
3182 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3183 && has_single_use (gimple_assign_lhs (negatedefstmt))
3184 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3186 gimple_stmt_iterator gsi;
3187 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3188 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3190 gsi = gsi_for_stmt (negatedefstmt);
3191 rhs1 = negate_value (rhs1, &gsi);
3192 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3194 gsi = gsi_for_stmt (negatedefstmt);
3195 rhs2 = negate_value (rhs2, &gsi);
3196 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3198 update_stmt (negatedefstmt);
3199 return gimple_assign_lhs (negatedefstmt);
3202 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3203 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3204 NULL_TREE, true, GSI_SAME_STMT);
3205 return resultofnegate;
3208 /* Return true if we should break up the subtract in STMT into an add
3209 with negate. This is true when we the subtract operands are really
3210 adds, or the subtract itself is used in an add expression. In
3211 either case, breaking up the subtract into an add with negate
3212 exposes the adds to reassociation. */
3214 static bool
3215 should_break_up_subtract (gimple stmt)
3217 tree lhs = gimple_assign_lhs (stmt);
3218 tree binlhs = gimple_assign_rhs1 (stmt);
3219 tree binrhs = gimple_assign_rhs2 (stmt);
3220 gimple immusestmt;
3221 struct loop *loop = loop_containing_stmt (stmt);
3223 if (TREE_CODE (binlhs) == SSA_NAME
3224 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3225 return true;
3227 if (TREE_CODE (binrhs) == SSA_NAME
3228 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3229 return true;
3231 if (TREE_CODE (lhs) == SSA_NAME
3232 && (immusestmt = get_single_immediate_use (lhs))
3233 && is_gimple_assign (immusestmt)
3234 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3235 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3236 return true;
3237 return false;
3240 /* Transform STMT from A - B into A + -B. */
3242 static void
3243 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3245 tree rhs1 = gimple_assign_rhs1 (stmt);
3246 tree rhs2 = gimple_assign_rhs2 (stmt);
3248 if (dump_file && (dump_flags & TDF_DETAILS))
3250 fprintf (dump_file, "Breaking up subtract ");
3251 print_gimple_stmt (dump_file, stmt, 0, 0);
3254 rhs2 = negate_value (rhs2, gsip);
3255 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3256 update_stmt (stmt);
3259 /* Determine whether STMT is a builtin call that raises an SSA name
3260 to an integer power and has only one use. If so, and this is early
3261 reassociation and unsafe math optimizations are permitted, place
3262 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3263 If any of these conditions does not hold, return FALSE. */
3265 static bool
3266 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3268 tree fndecl, arg1;
3269 REAL_VALUE_TYPE c, cint;
3271 if (!first_pass_instance
3272 || !flag_unsafe_math_optimizations
3273 || !is_gimple_call (stmt)
3274 || !has_single_use (gimple_call_lhs (stmt)))
3275 return false;
3277 fndecl = gimple_call_fndecl (stmt);
3279 if (!fndecl
3280 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3281 return false;
3283 switch (DECL_FUNCTION_CODE (fndecl))
3285 CASE_FLT_FN (BUILT_IN_POW):
3286 *base = gimple_call_arg (stmt, 0);
3287 arg1 = gimple_call_arg (stmt, 1);
3289 if (TREE_CODE (arg1) != REAL_CST)
3290 return false;
3292 c = TREE_REAL_CST (arg1);
3294 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3295 return false;
3297 *exponent = real_to_integer (&c);
3298 real_from_integer (&cint, VOIDmode, *exponent,
3299 *exponent < 0 ? -1 : 0, 0);
3300 if (!real_identical (&c, &cint))
3301 return false;
3303 break;
3305 CASE_FLT_FN (BUILT_IN_POWI):
3306 *base = gimple_call_arg (stmt, 0);
3307 arg1 = gimple_call_arg (stmt, 1);
3309 if (!host_integerp (arg1, 0))
3310 return false;
3312 *exponent = TREE_INT_CST_LOW (arg1);
3313 break;
3315 default:
3316 return false;
3319 /* Expanding negative exponents is generally unproductive, so we don't
3320 complicate matters with those. Exponents of zero and one should
3321 have been handled by expression folding. */
3322 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3323 return false;
3325 return true;
3328 /* Recursively linearize a binary expression that is the RHS of STMT.
3329 Place the operands of the expression tree in the vector named OPS. */
3331 static void
3332 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3333 bool is_associative, bool set_visited)
3335 tree binlhs = gimple_assign_rhs1 (stmt);
3336 tree binrhs = gimple_assign_rhs2 (stmt);
3337 gimple binlhsdef = NULL, binrhsdef = NULL;
3338 bool binlhsisreassoc = false;
3339 bool binrhsisreassoc = false;
3340 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3341 struct loop *loop = loop_containing_stmt (stmt);
3342 tree base = NULL_TREE;
3343 HOST_WIDE_INT exponent = 0;
3345 if (set_visited)
3346 gimple_set_visited (stmt, true);
3348 if (TREE_CODE (binlhs) == SSA_NAME)
3350 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3351 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3352 && !stmt_could_throw_p (binlhsdef));
3355 if (TREE_CODE (binrhs) == SSA_NAME)
3357 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3358 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3359 && !stmt_could_throw_p (binrhsdef));
3362 /* If the LHS is not reassociable, but the RHS is, we need to swap
3363 them. If neither is reassociable, there is nothing we can do, so
3364 just put them in the ops vector. If the LHS is reassociable,
3365 linearize it. If both are reassociable, then linearize the RHS
3366 and the LHS. */
3368 if (!binlhsisreassoc)
3370 tree temp;
3372 /* If this is not a associative operation like division, give up. */
3373 if (!is_associative)
3375 add_to_ops_vec (ops, binrhs);
3376 return;
3379 if (!binrhsisreassoc)
3381 if (rhscode == MULT_EXPR
3382 && TREE_CODE (binrhs) == SSA_NAME
3383 && acceptable_pow_call (binrhsdef, &base, &exponent))
3385 add_repeat_to_ops_vec (ops, base, exponent);
3386 gimple_set_visited (binrhsdef, true);
3388 else
3389 add_to_ops_vec (ops, binrhs);
3391 if (rhscode == MULT_EXPR
3392 && TREE_CODE (binlhs) == SSA_NAME
3393 && acceptable_pow_call (binlhsdef, &base, &exponent))
3395 add_repeat_to_ops_vec (ops, base, exponent);
3396 gimple_set_visited (binlhsdef, true);
3398 else
3399 add_to_ops_vec (ops, binlhs);
3401 return;
3404 if (dump_file && (dump_flags & TDF_DETAILS))
3406 fprintf (dump_file, "swapping operands of ");
3407 print_gimple_stmt (dump_file, stmt, 0, 0);
3410 swap_tree_operands (stmt,
3411 gimple_assign_rhs1_ptr (stmt),
3412 gimple_assign_rhs2_ptr (stmt));
3413 update_stmt (stmt);
3415 if (dump_file && (dump_flags & TDF_DETAILS))
3417 fprintf (dump_file, " is now ");
3418 print_gimple_stmt (dump_file, stmt, 0, 0);
3421 /* We want to make it so the lhs is always the reassociative op,
3422 so swap. */
3423 temp = binlhs;
3424 binlhs = binrhs;
3425 binrhs = temp;
3427 else if (binrhsisreassoc)
3429 linearize_expr (stmt);
3430 binlhs = gimple_assign_rhs1 (stmt);
3431 binrhs = gimple_assign_rhs2 (stmt);
3434 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3435 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3436 rhscode, loop));
3437 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3438 is_associative, set_visited);
3440 if (rhscode == MULT_EXPR
3441 && TREE_CODE (binrhs) == SSA_NAME
3442 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3444 add_repeat_to_ops_vec (ops, base, exponent);
3445 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3447 else
3448 add_to_ops_vec (ops, binrhs);
3451 /* Repropagate the negates back into subtracts, since no other pass
3452 currently does it. */
3454 static void
3455 repropagate_negates (void)
3457 unsigned int i = 0;
3458 tree negate;
3460 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3462 gimple user = get_single_immediate_use (negate);
3464 if (!user || !is_gimple_assign (user))
3465 continue;
3467 /* The negate operand can be either operand of a PLUS_EXPR
3468 (it can be the LHS if the RHS is a constant for example).
3470 Force the negate operand to the RHS of the PLUS_EXPR, then
3471 transform the PLUS_EXPR into a MINUS_EXPR. */
3472 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3474 /* If the negated operand appears on the LHS of the
3475 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3476 to force the negated operand to the RHS of the PLUS_EXPR. */
3477 if (gimple_assign_rhs1 (user) == negate)
3479 swap_tree_operands (user,
3480 gimple_assign_rhs1_ptr (user),
3481 gimple_assign_rhs2_ptr (user));
3484 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3485 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3486 if (gimple_assign_rhs2 (user) == negate)
3488 tree rhs1 = gimple_assign_rhs1 (user);
3489 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3490 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3491 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3492 update_stmt (user);
3495 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3497 if (gimple_assign_rhs1 (user) == negate)
3499 /* We have
3500 x = -a
3501 y = x - b
3502 which we transform into
3503 x = a + b
3504 y = -x .
3505 This pushes down the negate which we possibly can merge
3506 into some other operation, hence insert it into the
3507 plus_negates vector. */
3508 gimple feed = SSA_NAME_DEF_STMT (negate);
3509 tree a = gimple_assign_rhs1 (feed);
3510 tree rhs2 = gimple_assign_rhs2 (user);
3511 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3512 gimple_replace_lhs (feed, negate);
3513 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3514 update_stmt (gsi_stmt (gsi));
3515 gsi2 = gsi_for_stmt (user);
3516 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3517 update_stmt (gsi_stmt (gsi2));
3518 gsi_move_before (&gsi, &gsi2);
3519 plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3521 else
3523 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3524 rid of one operation. */
3525 gimple feed = SSA_NAME_DEF_STMT (negate);
3526 tree a = gimple_assign_rhs1 (feed);
3527 tree rhs1 = gimple_assign_rhs1 (user);
3528 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3529 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3530 update_stmt (gsi_stmt (gsi));
3536 /* Returns true if OP is of a type for which we can do reassociation.
3537 That is for integral or non-saturating fixed-point types, and for
3538 floating point type when associative-math is enabled. */
3540 static bool
3541 can_reassociate_p (tree op)
3543 tree type = TREE_TYPE (op);
3544 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3545 || NON_SAT_FIXED_POINT_TYPE_P (type)
3546 || (flag_associative_math && FLOAT_TYPE_P (type)))
3547 return true;
3548 return false;
3551 /* Break up subtract operations in block BB.
3553 We do this top down because we don't know whether the subtract is
3554 part of a possible chain of reassociation except at the top.
3556 IE given
3557 d = f + g
3558 c = a + e
3559 b = c - d
3560 q = b - r
3561 k = t - q
3563 we want to break up k = t - q, but we won't until we've transformed q
3564 = b - r, which won't be broken up until we transform b = c - d.
3566 En passant, clear the GIMPLE visited flag on every statement. */
3568 static void
3569 break_up_subtract_bb (basic_block bb)
3571 gimple_stmt_iterator gsi;
3572 basic_block son;
3574 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3576 gimple stmt = gsi_stmt (gsi);
3577 gimple_set_visited (stmt, false);
3579 if (!is_gimple_assign (stmt)
3580 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3581 continue;
3583 /* Look for simple gimple subtract operations. */
3584 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3586 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3587 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3588 continue;
3590 /* Check for a subtract used only in an addition. If this
3591 is the case, transform it into add of a negate for better
3592 reassociation. IE transform C = A-B into C = A + -B if C
3593 is only used in an addition. */
3594 if (should_break_up_subtract (stmt))
3595 break_up_subtract (stmt, &gsi);
3597 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3598 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3599 plus_negates.safe_push (gimple_assign_lhs (stmt));
3601 for (son = first_dom_son (CDI_DOMINATORS, bb);
3602 son;
3603 son = next_dom_son (CDI_DOMINATORS, son))
3604 break_up_subtract_bb (son);
3607 /* Used for repeated factor analysis. */
3608 struct repeat_factor_d
3610 /* An SSA name that occurs in a multiply chain. */
3611 tree factor;
3613 /* Cached rank of the factor. */
3614 unsigned rank;
3616 /* Number of occurrences of the factor in the chain. */
3617 HOST_WIDE_INT count;
3619 /* An SSA name representing the product of this factor and
3620 all factors appearing later in the repeated factor vector. */
3621 tree repr;
3624 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3625 typedef const struct repeat_factor_d *const_repeat_factor_t;
3628 static vec<repeat_factor> repeat_factor_vec;
3630 /* Used for sorting the repeat factor vector. Sort primarily by
3631 ascending occurrence count, secondarily by descending rank. */
3633 static int
3634 compare_repeat_factors (const void *x1, const void *x2)
3636 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3637 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3639 if (rf1->count != rf2->count)
3640 return rf1->count - rf2->count;
3642 return rf2->rank - rf1->rank;
3645 /* Look for repeated operands in OPS in the multiply tree rooted at
3646 STMT. Replace them with an optimal sequence of multiplies and powi
3647 builtin calls, and remove the used operands from OPS. Return an
3648 SSA name representing the value of the replacement sequence. */
3650 static tree
3651 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3653 unsigned i, j, vec_len;
3654 int ii;
3655 operand_entry_t oe;
3656 repeat_factor_t rf1, rf2;
3657 repeat_factor rfnew;
3658 tree result = NULL_TREE;
3659 tree target_ssa, iter_result;
3660 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3661 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3662 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3663 gimple mul_stmt, pow_stmt;
3665 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3666 target. */
3667 if (!powi_fndecl)
3668 return NULL_TREE;
3670 /* Allocate the repeated factor vector. */
3671 repeat_factor_vec.create (10);
3673 /* Scan the OPS vector for all SSA names in the product and build
3674 up a vector of occurrence counts for each factor. */
3675 FOR_EACH_VEC_ELT (*ops, i, oe)
3677 if (TREE_CODE (oe->op) == SSA_NAME)
3679 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3681 if (rf1->factor == oe->op)
3683 rf1->count += oe->count;
3684 break;
3688 if (j >= repeat_factor_vec.length ())
3690 rfnew.factor = oe->op;
3691 rfnew.rank = oe->rank;
3692 rfnew.count = oe->count;
3693 rfnew.repr = NULL_TREE;
3694 repeat_factor_vec.safe_push (rfnew);
3699 /* Sort the repeated factor vector by (a) increasing occurrence count,
3700 and (b) decreasing rank. */
3701 repeat_factor_vec.qsort (compare_repeat_factors);
3703 /* It is generally best to combine as many base factors as possible
3704 into a product before applying __builtin_powi to the result.
3705 However, the sort order chosen for the repeated factor vector
3706 allows us to cache partial results for the product of the base
3707 factors for subsequent use. When we already have a cached partial
3708 result from a previous iteration, it is best to make use of it
3709 before looking for another __builtin_pow opportunity.
3711 As an example, consider x * x * y * y * y * z * z * z * z.
3712 We want to first compose the product x * y * z, raise it to the
3713 second power, then multiply this by y * z, and finally multiply
3714 by z. This can be done in 5 multiplies provided we cache y * z
3715 for use in both expressions:
3717 t1 = y * z
3718 t2 = t1 * x
3719 t3 = t2 * t2
3720 t4 = t1 * t3
3721 result = t4 * z
3723 If we instead ignored the cached y * z and first multiplied by
3724 the __builtin_pow opportunity z * z, we would get the inferior:
3726 t1 = y * z
3727 t2 = t1 * x
3728 t3 = t2 * t2
3729 t4 = z * z
3730 t5 = t3 * t4
3731 result = t5 * y */
3733 vec_len = repeat_factor_vec.length ();
3735 /* Repeatedly look for opportunities to create a builtin_powi call. */
3736 while (true)
3738 HOST_WIDE_INT power;
3740 /* First look for the largest cached product of factors from
3741 preceding iterations. If found, create a builtin_powi for
3742 it if the minimum occurrence count for its factors is at
3743 least 2, or just use this cached product as our next
3744 multiplicand if the minimum occurrence count is 1. */
3745 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3747 if (rf1->repr && rf1->count > 0)
3748 break;
3751 if (j < vec_len)
3753 power = rf1->count;
3755 if (power == 1)
3757 iter_result = rf1->repr;
3759 if (dump_file && (dump_flags & TDF_DETAILS))
3761 unsigned elt;
3762 repeat_factor_t rf;
3763 fputs ("Multiplying by cached product ", dump_file);
3764 for (elt = j; elt < vec_len; elt++)
3766 rf = &repeat_factor_vec[elt];
3767 print_generic_expr (dump_file, rf->factor, 0);
3768 if (elt < vec_len - 1)
3769 fputs (" * ", dump_file);
3771 fputs ("\n", dump_file);
3774 else
3776 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3777 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3778 build_int_cst (integer_type_node,
3779 power));
3780 gimple_call_set_lhs (pow_stmt, iter_result);
3781 gimple_set_location (pow_stmt, gimple_location (stmt));
3782 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3784 if (dump_file && (dump_flags & TDF_DETAILS))
3786 unsigned elt;
3787 repeat_factor_t rf;
3788 fputs ("Building __builtin_pow call for cached product (",
3789 dump_file);
3790 for (elt = j; elt < vec_len; elt++)
3792 rf = &repeat_factor_vec[elt];
3793 print_generic_expr (dump_file, rf->factor, 0);
3794 if (elt < vec_len - 1)
3795 fputs (" * ", dump_file);
3797 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3798 power);
3802 else
3804 /* Otherwise, find the first factor in the repeated factor
3805 vector whose occurrence count is at least 2. If no such
3806 factor exists, there are no builtin_powi opportunities
3807 remaining. */
3808 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3810 if (rf1->count >= 2)
3811 break;
3814 if (j >= vec_len)
3815 break;
3817 power = rf1->count;
3819 if (dump_file && (dump_flags & TDF_DETAILS))
3821 unsigned elt;
3822 repeat_factor_t rf;
3823 fputs ("Building __builtin_pow call for (", dump_file);
3824 for (elt = j; elt < vec_len; elt++)
3826 rf = &repeat_factor_vec[elt];
3827 print_generic_expr (dump_file, rf->factor, 0);
3828 if (elt < vec_len - 1)
3829 fputs (" * ", dump_file);
3831 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3834 reassociate_stats.pows_created++;
3836 /* Visit each element of the vector in reverse order (so that
3837 high-occurrence elements are visited first, and within the
3838 same occurrence count, lower-ranked elements are visited
3839 first). Form a linear product of all elements in this order
3840 whose occurrencce count is at least that of element J.
3841 Record the SSA name representing the product of each element
3842 with all subsequent elements in the vector. */
3843 if (j == vec_len - 1)
3844 rf1->repr = rf1->factor;
3845 else
3847 for (ii = vec_len - 2; ii >= (int)j; ii--)
3849 tree op1, op2;
3851 rf1 = &repeat_factor_vec[ii];
3852 rf2 = &repeat_factor_vec[ii + 1];
3854 /* Init the last factor's representative to be itself. */
3855 if (!rf2->repr)
3856 rf2->repr = rf2->factor;
3858 op1 = rf1->factor;
3859 op2 = rf2->repr;
3861 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3862 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3863 target_ssa,
3864 op1, op2);
3865 gimple_set_location (mul_stmt, gimple_location (stmt));
3866 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3867 rf1->repr = target_ssa;
3869 /* Don't reprocess the multiply we just introduced. */
3870 gimple_set_visited (mul_stmt, true);
3874 /* Form a call to __builtin_powi for the maximum product
3875 just formed, raised to the power obtained earlier. */
3876 rf1 = &repeat_factor_vec[j];
3877 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3878 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3879 build_int_cst (integer_type_node,
3880 power));
3881 gimple_call_set_lhs (pow_stmt, iter_result);
3882 gimple_set_location (pow_stmt, gimple_location (stmt));
3883 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3886 /* If we previously formed at least one other builtin_powi call,
3887 form the product of this one and those others. */
3888 if (result)
3890 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3891 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3892 result, iter_result);
3893 gimple_set_location (mul_stmt, gimple_location (stmt));
3894 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3895 gimple_set_visited (mul_stmt, true);
3896 result = new_result;
3898 else
3899 result = iter_result;
3901 /* Decrement the occurrence count of each element in the product
3902 by the count found above, and remove this many copies of each
3903 factor from OPS. */
3904 for (i = j; i < vec_len; i++)
3906 unsigned k = power;
3907 unsigned n;
3909 rf1 = &repeat_factor_vec[i];
3910 rf1->count -= power;
3912 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3914 if (oe->op == rf1->factor)
3916 if (oe->count <= k)
3918 ops->ordered_remove (n);
3919 k -= oe->count;
3921 if (k == 0)
3922 break;
3924 else
3926 oe->count -= k;
3927 break;
3934 /* At this point all elements in the repeated factor vector have a
3935 remaining occurrence count of 0 or 1, and those with a count of 1
3936 don't have cached representatives. Re-sort the ops vector and
3937 clean up. */
3938 ops->qsort (sort_by_operand_rank);
3939 repeat_factor_vec.release ();
3941 /* Return the final product computed herein. Note that there may
3942 still be some elements with single occurrence count left in OPS;
3943 those will be handled by the normal reassociation logic. */
3944 return result;
3947 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
3949 static void
3950 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3952 tree rhs1;
3954 if (dump_file && (dump_flags & TDF_DETAILS))
3956 fprintf (dump_file, "Transforming ");
3957 print_gimple_stmt (dump_file, stmt, 0, 0);
3960 rhs1 = gimple_assign_rhs1 (stmt);
3961 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3962 update_stmt (stmt);
3963 remove_visited_stmt_chain (rhs1);
3965 if (dump_file && (dump_flags & TDF_DETAILS))
3967 fprintf (dump_file, " into ");
3968 print_gimple_stmt (dump_file, stmt, 0, 0);
3972 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
3974 static void
3975 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3976 tree rhs1, tree rhs2)
3978 if (dump_file && (dump_flags & TDF_DETAILS))
3980 fprintf (dump_file, "Transforming ");
3981 print_gimple_stmt (dump_file, stmt, 0, 0);
3984 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
3985 update_stmt (gsi_stmt (*gsi));
3986 remove_visited_stmt_chain (rhs1);
3988 if (dump_file && (dump_flags & TDF_DETAILS))
3990 fprintf (dump_file, " into ");
3991 print_gimple_stmt (dump_file, stmt, 0, 0);
3995 /* Reassociate expressions in basic block BB and its post-dominator as
3996 children. */
3998 static void
3999 reassociate_bb (basic_block bb)
4001 gimple_stmt_iterator gsi;
4002 basic_block son;
4003 gimple stmt = last_stmt (bb);
4005 if (stmt && !gimple_visited_p (stmt))
4006 maybe_optimize_range_tests (stmt);
4008 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4010 stmt = gsi_stmt (gsi);
4012 if (is_gimple_assign (stmt)
4013 && !stmt_could_throw_p (stmt))
4015 tree lhs, rhs1, rhs2;
4016 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4018 /* If this is not a gimple binary expression, there is
4019 nothing for us to do with it. */
4020 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4021 continue;
4023 /* If this was part of an already processed statement,
4024 we don't need to touch it again. */
4025 if (gimple_visited_p (stmt))
4027 /* This statement might have become dead because of previous
4028 reassociations. */
4029 if (has_zero_uses (gimple_get_lhs (stmt)))
4031 gsi_remove (&gsi, true);
4032 release_defs (stmt);
4033 /* We might end up removing the last stmt above which
4034 places the iterator to the end of the sequence.
4035 Reset it to the last stmt in this case which might
4036 be the end of the sequence as well if we removed
4037 the last statement of the sequence. In which case
4038 we need to bail out. */
4039 if (gsi_end_p (gsi))
4041 gsi = gsi_last_bb (bb);
4042 if (gsi_end_p (gsi))
4043 break;
4046 continue;
4049 lhs = gimple_assign_lhs (stmt);
4050 rhs1 = gimple_assign_rhs1 (stmt);
4051 rhs2 = gimple_assign_rhs2 (stmt);
4053 /* For non-bit or min/max operations we can't associate
4054 all types. Verify that here. */
4055 if (rhs_code != BIT_IOR_EXPR
4056 && rhs_code != BIT_AND_EXPR
4057 && rhs_code != BIT_XOR_EXPR
4058 && rhs_code != MIN_EXPR
4059 && rhs_code != MAX_EXPR
4060 && (!can_reassociate_p (lhs)
4061 || !can_reassociate_p (rhs1)
4062 || !can_reassociate_p (rhs2)))
4063 continue;
4065 if (associative_tree_code (rhs_code))
4067 vec<operand_entry_t> ops = vNULL;
4068 tree powi_result = NULL_TREE;
4070 /* There may be no immediate uses left by the time we
4071 get here because we may have eliminated them all. */
4072 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4073 continue;
4075 gimple_set_visited (stmt, true);
4076 linearize_expr_tree (&ops, stmt, true, true);
4077 ops.qsort (sort_by_operand_rank);
4078 optimize_ops_list (rhs_code, &ops);
4079 if (undistribute_ops_list (rhs_code, &ops,
4080 loop_containing_stmt (stmt)))
4082 ops.qsort (sort_by_operand_rank);
4083 optimize_ops_list (rhs_code, &ops);
4086 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4087 optimize_range_tests (rhs_code, &ops);
4089 if (first_pass_instance
4090 && rhs_code == MULT_EXPR
4091 && flag_unsafe_math_optimizations)
4092 powi_result = attempt_builtin_powi (stmt, &ops);
4094 /* If the operand vector is now empty, all operands were
4095 consumed by the __builtin_powi optimization. */
4096 if (ops.length () == 0)
4097 transform_stmt_to_copy (&gsi, stmt, powi_result);
4098 else if (ops.length () == 1)
4100 tree last_op = ops.last ()->op;
4102 if (powi_result)
4103 transform_stmt_to_multiply (&gsi, stmt, last_op,
4104 powi_result);
4105 else
4106 transform_stmt_to_copy (&gsi, stmt, last_op);
4108 else
4110 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4111 int ops_num = ops.length ();
4112 int width = get_reassociation_width (ops_num, rhs_code, mode);
4114 if (dump_file && (dump_flags & TDF_DETAILS))
4115 fprintf (dump_file,
4116 "Width = %d was chosen for reassociation\n", width);
4118 if (width > 1
4119 && ops.length () > 3)
4120 rewrite_expr_tree_parallel (stmt, width, ops);
4121 else
4122 rewrite_expr_tree (stmt, 0, ops, false);
4124 /* If we combined some repeated factors into a
4125 __builtin_powi call, multiply that result by the
4126 reassociated operands. */
4127 if (powi_result)
4129 gimple mul_stmt;
4130 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4131 tree target_ssa = make_temp_ssa_name (type, NULL,
4132 "reassocpow");
4133 gimple_set_lhs (stmt, target_ssa);
4134 update_stmt (stmt);
4135 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4136 powi_result,
4137 target_ssa);
4138 gimple_set_location (mul_stmt, gimple_location (stmt));
4139 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4143 ops.release ();
4147 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4148 son;
4149 son = next_dom_son (CDI_POST_DOMINATORS, son))
4150 reassociate_bb (son);
4153 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4154 void debug_ops_vector (vec<operand_entry_t> ops);
4156 /* Dump the operand entry vector OPS to FILE. */
4158 void
4159 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4161 operand_entry_t oe;
4162 unsigned int i;
4164 FOR_EACH_VEC_ELT (ops, i, oe)
4166 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4167 print_generic_expr (file, oe->op, 0);
4171 /* Dump the operand entry vector OPS to STDERR. */
4173 DEBUG_FUNCTION void
4174 debug_ops_vector (vec<operand_entry_t> ops)
4176 dump_ops_vector (stderr, ops);
4179 static void
4180 do_reassoc (void)
4182 break_up_subtract_bb (ENTRY_BLOCK_PTR);
4183 reassociate_bb (EXIT_BLOCK_PTR);
4186 /* Initialize the reassociation pass. */
4188 static void
4189 init_reassoc (void)
4191 int i;
4192 long rank = 2;
4193 int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4195 /* Find the loops, so that we can prevent moving calculations in
4196 them. */
4197 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4199 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4201 operand_entry_pool = create_alloc_pool ("operand entry pool",
4202 sizeof (struct operand_entry), 30);
4203 next_operand_entry_id = 0;
4205 /* Reverse RPO (Reverse Post Order) will give us something where
4206 deeper loops come later. */
4207 pre_and_rev_post_order_compute (NULL, bbs, false);
4208 bb_rank = XCNEWVEC (long, last_basic_block);
4209 operand_rank = pointer_map_create ();
4211 /* Give each default definition a distinct rank. This includes
4212 parameters and the static chain. Walk backwards over all
4213 SSA names so that we get proper rank ordering according
4214 to tree_swap_operands_p. */
4215 for (i = num_ssa_names - 1; i > 0; --i)
4217 tree name = ssa_name (i);
4218 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4219 insert_operand_rank (name, ++rank);
4222 /* Set up rank for each BB */
4223 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4224 bb_rank[bbs[i]] = ++rank << 16;
4226 free (bbs);
4227 calculate_dominance_info (CDI_POST_DOMINATORS);
4228 plus_negates = vNULL;
4231 /* Cleanup after the reassociation pass, and print stats if
4232 requested. */
4234 static void
4235 fini_reassoc (void)
4237 statistics_counter_event (cfun, "Linearized",
4238 reassociate_stats.linearized);
4239 statistics_counter_event (cfun, "Constants eliminated",
4240 reassociate_stats.constants_eliminated);
4241 statistics_counter_event (cfun, "Ops eliminated",
4242 reassociate_stats.ops_eliminated);
4243 statistics_counter_event (cfun, "Statements rewritten",
4244 reassociate_stats.rewritten);
4245 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4246 reassociate_stats.pows_encountered);
4247 statistics_counter_event (cfun, "Built-in powi calls created",
4248 reassociate_stats.pows_created);
4250 pointer_map_destroy (operand_rank);
4251 free_alloc_pool (operand_entry_pool);
4252 free (bb_rank);
4253 plus_negates.release ();
4254 free_dominance_info (CDI_POST_DOMINATORS);
4255 loop_optimizer_finalize ();
4258 /* Gate and execute functions for Reassociation. */
4260 static unsigned int
4261 execute_reassoc (void)
4263 init_reassoc ();
4265 do_reassoc ();
4266 repropagate_negates ();
4268 fini_reassoc ();
4269 return 0;
4272 static bool
4273 gate_tree_ssa_reassoc (void)
4275 return flag_tree_reassoc != 0;
4278 struct gimple_opt_pass pass_reassoc =
4281 GIMPLE_PASS,
4282 "reassoc", /* name */
4283 OPTGROUP_NONE, /* optinfo_flags */
4284 gate_tree_ssa_reassoc, /* gate */
4285 execute_reassoc, /* execute */
4286 NULL, /* sub */
4287 NULL, /* next */
4288 0, /* static_pass_number */
4289 TV_TREE_REASSOC, /* tv_id */
4290 PROP_cfg | PROP_ssa, /* properties_required */
4291 0, /* properties_provided */
4292 0, /* properties_destroyed */
4293 0, /* todo_flags_start */
4294 TODO_verify_ssa
4295 | TODO_verify_flow
4296 | TODO_ggc_collect /* todo_flags_finish */