2013-05-15 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob4fc19cb551eda29a067b2f656903ed6a03e3ff99
1 /* Reassociation for trees.
2 Copyright (C) 2005-2013 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "tree.h"
27 #include "basic-block.h"
28 #include "gimple-pretty-print.h"
29 #include "tree-inline.h"
30 #include "tree-flow.h"
31 #include "gimple.h"
32 #include "tree-iterator.h"
33 #include "tree-pass.h"
34 #include "alloc-pool.h"
35 #include "vec.h"
36 #include "langhooks.h"
37 #include "pointer-set.h"
38 #include "cfgloop.h"
39 #include "flags.h"
40 #include "target.h"
41 #include "params.h"
42 #include "diagnostic-core.h"
44 /* This is a simple global reassociation pass. It is, in part, based
45 on the LLVM pass of the same name (They do some things more/less
46 than we do, in different orders, etc).
48 It consists of five steps:
50 1. Breaking up subtract operations into addition + negate, where
51 it would promote the reassociation of adds.
53 2. Left linearization of the expression trees, so that (A+B)+(C+D)
54 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
55 During linearization, we place the operands of the binary
56 expressions into a vector of operand_entry_t
58 3. Optimization of the operand lists, eliminating things like a +
59 -a, a & a, etc.
61 3a. Combine repeated factors with the same occurrence counts
62 into a __builtin_powi call that will later be optimized into
63 an optimal number of multiplies.
65 4. Rewrite the expression trees we linearized and optimized so
66 they are in proper rank order.
68 5. Repropagate negates, as nothing else will clean it up ATM.
70 A bit of theory on #4, since nobody seems to write anything down
71 about why it makes sense to do it the way they do it:
73 We could do this much nicer theoretically, but don't (for reasons
74 explained after how to do it theoretically nice :P).
76 In order to promote the most redundancy elimination, you want
77 binary expressions whose operands are the same rank (or
78 preferably, the same value) exposed to the redundancy eliminator,
79 for possible elimination.
81 So the way to do this if we really cared, is to build the new op
82 tree from the leaves to the roots, merging as you go, and putting the
83 new op on the end of the worklist, until you are left with one
84 thing on the worklist.
86 IE if you have to rewrite the following set of operands (listed with
87 rank in parentheses), with opcode PLUS_EXPR:
89 a (1), b (1), c (1), d (2), e (2)
92 We start with our merge worklist empty, and the ops list with all of
93 those on it.
95 You want to first merge all leaves of the same rank, as much as
96 possible.
98 So first build a binary op of
100 mergetmp = a + b, and put "mergetmp" on the merge worklist.
102 Because there is no three operand form of PLUS_EXPR, c is not going to
103 be exposed to redundancy elimination as a rank 1 operand.
105 So you might as well throw it on the merge worklist (you could also
106 consider it to now be a rank two operand, and merge it with d and e,
107 but in this case, you then have evicted e from a binary op. So at
108 least in this situation, you can't win.)
110 Then build a binary op of d + e
111 mergetmp2 = d + e
113 and put mergetmp2 on the merge worklist.
115 so merge worklist = {mergetmp, c, mergetmp2}
117 Continue building binary ops of these operations until you have only
118 one operation left on the worklist.
120 So we have
122 build binary op
123 mergetmp3 = mergetmp + c
125 worklist = {mergetmp2, mergetmp3}
127 mergetmp4 = mergetmp2 + mergetmp3
129 worklist = {mergetmp4}
131 because we have one operation left, we can now just set the original
132 statement equal to the result of that operation.
134 This will at least expose a + b and d + e to redundancy elimination
135 as binary operations.
137 For extra points, you can reuse the old statements to build the
138 mergetmps, since you shouldn't run out.
140 So why don't we do this?
142 Because it's expensive, and rarely will help. Most trees we are
143 reassociating have 3 or less ops. If they have 2 ops, they already
144 will be written into a nice single binary op. If you have 3 ops, a
145 single simple check suffices to tell you whether the first two are of the
146 same rank. If so, you know to order it
148 mergetmp = op1 + op2
149 newstmt = mergetmp + op3
151 instead of
152 mergetmp = op2 + op3
153 newstmt = mergetmp + op1
155 If all three are of the same rank, you can't expose them all in a
156 single binary operator anyway, so the above is *still* the best you
157 can do.
159 Thus, this is what we do. When we have three ops left, we check to see
160 what order to put them in, and call it a day. As a nod to vector sum
161 reduction, we check if any of the ops are really a phi node that is a
162 destructive update for the associating op, and keep the destructive
163 update together for vector sum reduction recognition. */
166 /* Statistics */
167 static struct
169 int linearized;
170 int constants_eliminated;
171 int ops_eliminated;
172 int rewritten;
173 int pows_encountered;
174 int pows_created;
175 } reassociate_stats;
177 /* Operator, rank pair. */
178 typedef struct operand_entry
180 unsigned int rank;
181 int id;
182 tree op;
183 unsigned int count;
184 } *operand_entry_t;
186 static alloc_pool operand_entry_pool;
188 /* This is used to assign a unique ID to each struct operand_entry
189 so that qsort results are identical on different hosts. */
190 static int next_operand_entry_id;
192 /* Starting rank number for a given basic block, so that we can rank
193 operations using unmovable instructions in that BB based on the bb
194 depth. */
195 static long *bb_rank;
197 /* Operand->rank hashtable. */
198 static struct pointer_map_t *operand_rank;
200 /* Forward decls. */
201 static long get_rank (tree);
204 /* Bias amount for loop-carried phis. We want this to be larger than
205 the depth of any reassociation tree we can see, but not larger than
206 the rank difference between two blocks. */
207 #define PHI_LOOP_BIAS (1 << 15)
209 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
210 an innermost loop, and the phi has only a single use which is inside
211 the loop, then the rank is the block rank of the loop latch plus an
212 extra bias for the loop-carried dependence. This causes expressions
213 calculated into an accumulator variable to be independent for each
214 iteration of the loop. If STMT is some other phi, the rank is the
215 block rank of its containing block. */
216 static long
217 phi_rank (gimple stmt)
219 basic_block bb = gimple_bb (stmt);
220 struct loop *father = bb->loop_father;
221 tree res;
222 unsigned i;
223 use_operand_p use;
224 gimple use_stmt;
226 /* We only care about real loops (those with a latch). */
227 if (!father->latch)
228 return bb_rank[bb->index];
230 /* Interesting phis must be in headers of innermost loops. */
231 if (bb != father->header
232 || father->inner)
233 return bb_rank[bb->index];
235 /* Ignore virtual SSA_NAMEs. */
236 res = gimple_phi_result (stmt);
237 if (virtual_operand_p (res))
238 return bb_rank[bb->index];
240 /* The phi definition must have a single use, and that use must be
241 within the loop. Otherwise this isn't an accumulator pattern. */
242 if (!single_imm_use (res, &use, &use_stmt)
243 || gimple_bb (use_stmt)->loop_father != father)
244 return bb_rank[bb->index];
246 /* Look for phi arguments from within the loop. If found, bias this phi. */
247 for (i = 0; i < gimple_phi_num_args (stmt); i++)
249 tree arg = gimple_phi_arg_def (stmt, i);
250 if (TREE_CODE (arg) == SSA_NAME
251 && !SSA_NAME_IS_DEFAULT_DEF (arg))
253 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
254 if (gimple_bb (def_stmt)->loop_father == father)
255 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
259 /* Must be an uninteresting phi. */
260 return bb_rank[bb->index];
263 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
264 loop-carried dependence of an innermost loop, return TRUE; else
265 return FALSE. */
266 static bool
267 loop_carried_phi (tree exp)
269 gimple phi_stmt;
270 long block_rank;
272 if (TREE_CODE (exp) != SSA_NAME
273 || SSA_NAME_IS_DEFAULT_DEF (exp))
274 return false;
276 phi_stmt = SSA_NAME_DEF_STMT (exp);
278 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
279 return false;
281 /* Non-loop-carried phis have block rank. Loop-carried phis have
282 an additional bias added in. If this phi doesn't have block rank,
283 it's biased and should not be propagated. */
284 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
286 if (phi_rank (phi_stmt) != block_rank)
287 return true;
289 return false;
292 /* Return the maximum of RANK and the rank that should be propagated
293 from expression OP. For most operands, this is just the rank of OP.
294 For loop-carried phis, the value is zero to avoid undoing the bias
295 in favor of the phi. */
296 static long
297 propagate_rank (long rank, tree op)
299 long op_rank;
301 if (loop_carried_phi (op))
302 return rank;
304 op_rank = get_rank (op);
306 return MAX (rank, op_rank);
309 /* Look up the operand rank structure for expression E. */
311 static inline long
312 find_operand_rank (tree e)
314 void **slot = pointer_map_contains (operand_rank, e);
315 return slot ? (long) (intptr_t) *slot : -1;
318 /* Insert {E,RANK} into the operand rank hashtable. */
320 static inline void
321 insert_operand_rank (tree e, long rank)
323 void **slot;
324 gcc_assert (rank > 0);
325 slot = pointer_map_insert (operand_rank, e);
326 gcc_assert (!*slot);
327 *slot = (void *) (intptr_t) rank;
330 /* Given an expression E, return the rank of the expression. */
332 static long
333 get_rank (tree e)
335 /* Constants have rank 0. */
336 if (is_gimple_min_invariant (e))
337 return 0;
339 /* SSA_NAME's have the rank of the expression they are the result
341 For globals and uninitialized values, the rank is 0.
342 For function arguments, use the pre-setup rank.
343 For PHI nodes, stores, asm statements, etc, we use the rank of
344 the BB.
345 For simple operations, the rank is the maximum rank of any of
346 its operands, or the bb_rank, whichever is less.
347 I make no claims that this is optimal, however, it gives good
348 results. */
350 /* We make an exception to the normal ranking system to break
351 dependences of accumulator variables in loops. Suppose we
352 have a simple one-block loop containing:
354 x_1 = phi(x_0, x_2)
355 b = a + x_1
356 c = b + d
357 x_2 = c + e
359 As shown, each iteration of the calculation into x is fully
360 dependent upon the iteration before it. We would prefer to
361 see this in the form:
363 x_1 = phi(x_0, x_2)
364 b = a + d
365 c = b + e
366 x_2 = c + x_1
368 If the loop is unrolled, the calculations of b and c from
369 different iterations can be interleaved.
371 To obtain this result during reassociation, we bias the rank
372 of the phi definition x_1 upward, when it is recognized as an
373 accumulator pattern. The artificial rank causes it to be
374 added last, providing the desired independence. */
376 if (TREE_CODE (e) == SSA_NAME)
378 gimple stmt;
379 long rank;
380 int i, n;
381 tree op;
383 if (SSA_NAME_IS_DEFAULT_DEF (e))
384 return find_operand_rank (e);
386 stmt = SSA_NAME_DEF_STMT (e);
387 if (gimple_code (stmt) == GIMPLE_PHI)
388 return phi_rank (stmt);
390 if (!is_gimple_assign (stmt)
391 || gimple_vdef (stmt))
392 return bb_rank[gimple_bb (stmt)->index];
394 /* If we already have a rank for this expression, use that. */
395 rank = find_operand_rank (e);
396 if (rank != -1)
397 return rank;
399 /* Otherwise, find the maximum rank for the operands. As an
400 exception, remove the bias from loop-carried phis when propagating
401 the rank so that dependent operations are not also biased. */
402 rank = 0;
403 if (gimple_assign_single_p (stmt))
405 tree rhs = gimple_assign_rhs1 (stmt);
406 n = TREE_OPERAND_LENGTH (rhs);
407 if (n == 0)
408 rank = propagate_rank (rank, rhs);
409 else
411 for (i = 0; i < n; i++)
413 op = TREE_OPERAND (rhs, i);
415 if (op != NULL_TREE)
416 rank = propagate_rank (rank, op);
420 else
422 n = gimple_num_ops (stmt);
423 for (i = 1; i < n; i++)
425 op = gimple_op (stmt, i);
426 gcc_assert (op);
427 rank = propagate_rank (rank, op);
431 if (dump_file && (dump_flags & TDF_DETAILS))
433 fprintf (dump_file, "Rank for ");
434 print_generic_expr (dump_file, e, 0);
435 fprintf (dump_file, " is %ld\n", (rank + 1));
438 /* Note the rank in the hashtable so we don't recompute it. */
439 insert_operand_rank (e, (rank + 1));
440 return (rank + 1);
443 /* Globals, etc, are rank 0 */
444 return 0;
448 /* We want integer ones to end up last no matter what, since they are
449 the ones we can do the most with. */
450 #define INTEGER_CONST_TYPE 1 << 3
451 #define FLOAT_CONST_TYPE 1 << 2
452 #define OTHER_CONST_TYPE 1 << 1
454 /* Classify an invariant tree into integer, float, or other, so that
455 we can sort them to be near other constants of the same type. */
456 static inline int
457 constant_type (tree t)
459 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
460 return INTEGER_CONST_TYPE;
461 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
462 return FLOAT_CONST_TYPE;
463 else
464 return OTHER_CONST_TYPE;
467 /* qsort comparison function to sort operand entries PA and PB by rank
468 so that the sorted array is ordered by rank in decreasing order. */
469 static int
470 sort_by_operand_rank (const void *pa, const void *pb)
472 const operand_entry_t oea = *(const operand_entry_t *)pa;
473 const operand_entry_t oeb = *(const operand_entry_t *)pb;
475 /* It's nicer for optimize_expression if constants that are likely
476 to fold when added/multiplied//whatever are put next to each
477 other. Since all constants have rank 0, order them by type. */
478 if (oeb->rank == 0 && oea->rank == 0)
480 if (constant_type (oeb->op) != constant_type (oea->op))
481 return constant_type (oeb->op) - constant_type (oea->op);
482 else
483 /* To make sorting result stable, we use unique IDs to determine
484 order. */
485 return oeb->id - oea->id;
488 /* Lastly, make sure the versions that are the same go next to each
489 other. We use SSA_NAME_VERSION because it's stable. */
490 if ((oeb->rank - oea->rank == 0)
491 && TREE_CODE (oea->op) == SSA_NAME
492 && TREE_CODE (oeb->op) == SSA_NAME)
494 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
495 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
496 else
497 return oeb->id - oea->id;
500 if (oeb->rank != oea->rank)
501 return oeb->rank - oea->rank;
502 else
503 return oeb->id - oea->id;
506 /* Add an operand entry to *OPS for the tree operand OP. */
508 static void
509 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
511 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
513 oe->op = op;
514 oe->rank = get_rank (op);
515 oe->id = next_operand_entry_id++;
516 oe->count = 1;
517 ops->safe_push (oe);
520 /* Add an operand entry to *OPS for the tree operand OP with repeat
521 count REPEAT. */
523 static void
524 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
525 HOST_WIDE_INT repeat)
527 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
529 oe->op = op;
530 oe->rank = get_rank (op);
531 oe->id = next_operand_entry_id++;
532 oe->count = repeat;
533 ops->safe_push (oe);
535 reassociate_stats.pows_encountered++;
538 /* Return true if STMT is reassociable operation containing a binary
539 operation with tree code CODE, and is inside LOOP. */
541 static bool
542 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
544 basic_block bb = gimple_bb (stmt);
546 if (gimple_bb (stmt) == NULL)
547 return false;
549 if (!flow_bb_inside_loop_p (loop, bb))
550 return false;
552 if (is_gimple_assign (stmt)
553 && gimple_assign_rhs_code (stmt) == code
554 && has_single_use (gimple_assign_lhs (stmt)))
555 return true;
557 return false;
561 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
562 operand of the negate operation. Otherwise, return NULL. */
564 static tree
565 get_unary_op (tree name, enum tree_code opcode)
567 gimple stmt = SSA_NAME_DEF_STMT (name);
569 if (!is_gimple_assign (stmt))
570 return NULL_TREE;
572 if (gimple_assign_rhs_code (stmt) == opcode)
573 return gimple_assign_rhs1 (stmt);
574 return NULL_TREE;
577 /* If CURR and LAST are a pair of ops that OPCODE allows us to
578 eliminate through equivalences, do so, remove them from OPS, and
579 return true. Otherwise, return false. */
581 static bool
582 eliminate_duplicate_pair (enum tree_code opcode,
583 vec<operand_entry_t> *ops,
584 bool *all_done,
585 unsigned int i,
586 operand_entry_t curr,
587 operand_entry_t last)
590 /* If we have two of the same op, and the opcode is & |, min, or max,
591 we can eliminate one of them.
592 If we have two of the same op, and the opcode is ^, we can
593 eliminate both of them. */
595 if (last && last->op == curr->op)
597 switch (opcode)
599 case MAX_EXPR:
600 case MIN_EXPR:
601 case BIT_IOR_EXPR:
602 case BIT_AND_EXPR:
603 if (dump_file && (dump_flags & TDF_DETAILS))
605 fprintf (dump_file, "Equivalence: ");
606 print_generic_expr (dump_file, curr->op, 0);
607 fprintf (dump_file, " [&|minmax] ");
608 print_generic_expr (dump_file, last->op, 0);
609 fprintf (dump_file, " -> ");
610 print_generic_stmt (dump_file, last->op, 0);
613 ops->ordered_remove (i);
614 reassociate_stats.ops_eliminated ++;
616 return true;
618 case BIT_XOR_EXPR:
619 if (dump_file && (dump_flags & TDF_DETAILS))
621 fprintf (dump_file, "Equivalence: ");
622 print_generic_expr (dump_file, curr->op, 0);
623 fprintf (dump_file, " ^ ");
624 print_generic_expr (dump_file, last->op, 0);
625 fprintf (dump_file, " -> nothing\n");
628 reassociate_stats.ops_eliminated += 2;
630 if (ops->length () == 2)
632 ops->create (0);
633 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
634 *all_done = true;
636 else
638 ops->ordered_remove (i-1);
639 ops->ordered_remove (i-1);
642 return true;
644 default:
645 break;
648 return false;
651 static vec<tree> plus_negates;
653 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
654 expression, look in OPS for a corresponding positive operation to cancel
655 it out. If we find one, remove the other from OPS, replace
656 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
657 return false. */
659 static bool
660 eliminate_plus_minus_pair (enum tree_code opcode,
661 vec<operand_entry_t> *ops,
662 unsigned int currindex,
663 operand_entry_t curr)
665 tree negateop;
666 tree notop;
667 unsigned int i;
668 operand_entry_t oe;
670 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
671 return false;
673 negateop = get_unary_op (curr->op, NEGATE_EXPR);
674 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
675 if (negateop == NULL_TREE && notop == NULL_TREE)
676 return false;
678 /* Any non-negated version will have a rank that is one less than
679 the current rank. So once we hit those ranks, if we don't find
680 one, we can stop. */
682 for (i = currindex + 1;
683 ops->iterate (i, &oe)
684 && oe->rank >= curr->rank - 1 ;
685 i++)
687 if (oe->op == negateop)
690 if (dump_file && (dump_flags & TDF_DETAILS))
692 fprintf (dump_file, "Equivalence: ");
693 print_generic_expr (dump_file, negateop, 0);
694 fprintf (dump_file, " + -");
695 print_generic_expr (dump_file, oe->op, 0);
696 fprintf (dump_file, " -> 0\n");
699 ops->ordered_remove (i);
700 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
701 ops->ordered_remove (currindex);
702 reassociate_stats.ops_eliminated ++;
704 return true;
706 else if (oe->op == notop)
708 tree op_type = TREE_TYPE (oe->op);
710 if (dump_file && (dump_flags & TDF_DETAILS))
712 fprintf (dump_file, "Equivalence: ");
713 print_generic_expr (dump_file, notop, 0);
714 fprintf (dump_file, " + ~");
715 print_generic_expr (dump_file, oe->op, 0);
716 fprintf (dump_file, " -> -1\n");
719 ops->ordered_remove (i);
720 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
721 ops->ordered_remove (currindex);
722 reassociate_stats.ops_eliminated ++;
724 return true;
728 /* CURR->OP is a negate expr in a plus expr: save it for later
729 inspection in repropagate_negates(). */
730 if (negateop != NULL_TREE)
731 plus_negates.safe_push (curr->op);
733 return false;
736 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
737 bitwise not expression, look in OPS for a corresponding operand to
738 cancel it out. If we find one, remove the other from OPS, replace
739 OPS[CURRINDEX] with 0, and return true. Otherwise, return
740 false. */
742 static bool
743 eliminate_not_pairs (enum tree_code opcode,
744 vec<operand_entry_t> *ops,
745 unsigned int currindex,
746 operand_entry_t curr)
748 tree notop;
749 unsigned int i;
750 operand_entry_t oe;
752 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
753 || TREE_CODE (curr->op) != SSA_NAME)
754 return false;
756 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
757 if (notop == NULL_TREE)
758 return false;
760 /* Any non-not version will have a rank that is one less than
761 the current rank. So once we hit those ranks, if we don't find
762 one, we can stop. */
764 for (i = currindex + 1;
765 ops->iterate (i, &oe)
766 && oe->rank >= curr->rank - 1;
767 i++)
769 if (oe->op == notop)
771 if (dump_file && (dump_flags & TDF_DETAILS))
773 fprintf (dump_file, "Equivalence: ");
774 print_generic_expr (dump_file, notop, 0);
775 if (opcode == BIT_AND_EXPR)
776 fprintf (dump_file, " & ~");
777 else if (opcode == BIT_IOR_EXPR)
778 fprintf (dump_file, " | ~");
779 print_generic_expr (dump_file, oe->op, 0);
780 if (opcode == BIT_AND_EXPR)
781 fprintf (dump_file, " -> 0\n");
782 else if (opcode == BIT_IOR_EXPR)
783 fprintf (dump_file, " -> -1\n");
786 if (opcode == BIT_AND_EXPR)
787 oe->op = build_zero_cst (TREE_TYPE (oe->op));
788 else if (opcode == BIT_IOR_EXPR)
789 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
790 TYPE_PRECISION (TREE_TYPE (oe->op)));
792 reassociate_stats.ops_eliminated += ops->length () - 1;
793 ops->truncate (0);
794 ops->quick_push (oe);
795 return true;
799 return false;
802 /* Use constant value that may be present in OPS to try to eliminate
803 operands. Note that this function is only really used when we've
804 eliminated ops for other reasons, or merged constants. Across
805 single statements, fold already does all of this, plus more. There
806 is little point in duplicating logic, so I've only included the
807 identities that I could ever construct testcases to trigger. */
809 static void
810 eliminate_using_constants (enum tree_code opcode,
811 vec<operand_entry_t> *ops)
813 operand_entry_t oelast = ops->last ();
814 tree type = TREE_TYPE (oelast->op);
816 if (oelast->rank == 0
817 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
819 switch (opcode)
821 case BIT_AND_EXPR:
822 if (integer_zerop (oelast->op))
824 if (ops->length () != 1)
826 if (dump_file && (dump_flags & TDF_DETAILS))
827 fprintf (dump_file, "Found & 0, removing all other ops\n");
829 reassociate_stats.ops_eliminated += ops->length () - 1;
831 ops->truncate (0);
832 ops->quick_push (oelast);
833 return;
836 else if (integer_all_onesp (oelast->op))
838 if (ops->length () != 1)
840 if (dump_file && (dump_flags & TDF_DETAILS))
841 fprintf (dump_file, "Found & -1, removing\n");
842 ops->pop ();
843 reassociate_stats.ops_eliminated++;
846 break;
847 case BIT_IOR_EXPR:
848 if (integer_all_onesp (oelast->op))
850 if (ops->length () != 1)
852 if (dump_file && (dump_flags & TDF_DETAILS))
853 fprintf (dump_file, "Found | -1, removing all other ops\n");
855 reassociate_stats.ops_eliminated += ops->length () - 1;
857 ops->truncate (0);
858 ops->quick_push (oelast);
859 return;
862 else if (integer_zerop (oelast->op))
864 if (ops->length () != 1)
866 if (dump_file && (dump_flags & TDF_DETAILS))
867 fprintf (dump_file, "Found | 0, removing\n");
868 ops->pop ();
869 reassociate_stats.ops_eliminated++;
872 break;
873 case MULT_EXPR:
874 if (integer_zerop (oelast->op)
875 || (FLOAT_TYPE_P (type)
876 && !HONOR_NANS (TYPE_MODE (type))
877 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
878 && real_zerop (oelast->op)))
880 if (ops->length () != 1)
882 if (dump_file && (dump_flags & TDF_DETAILS))
883 fprintf (dump_file, "Found * 0, removing all other ops\n");
885 reassociate_stats.ops_eliminated += ops->length () - 1;
886 ops->truncate (1);
887 ops->quick_push (oelast);
888 return;
891 else if (integer_onep (oelast->op)
892 || (FLOAT_TYPE_P (type)
893 && !HONOR_SNANS (TYPE_MODE (type))
894 && real_onep (oelast->op)))
896 if (ops->length () != 1)
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "Found * 1, removing\n");
900 ops->pop ();
901 reassociate_stats.ops_eliminated++;
902 return;
905 break;
906 case BIT_XOR_EXPR:
907 case PLUS_EXPR:
908 case MINUS_EXPR:
909 if (integer_zerop (oelast->op)
910 || (FLOAT_TYPE_P (type)
911 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
912 && fold_real_zero_addition_p (type, oelast->op,
913 opcode == MINUS_EXPR)))
915 if (ops->length () != 1)
917 if (dump_file && (dump_flags & TDF_DETAILS))
918 fprintf (dump_file, "Found [|^+] 0, removing\n");
919 ops->pop ();
920 reassociate_stats.ops_eliminated++;
921 return;
924 break;
925 default:
926 break;
932 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
933 bool, bool);
935 /* Structure for tracking and counting operands. */
936 typedef struct oecount_s {
937 int cnt;
938 int id;
939 enum tree_code oecode;
940 tree op;
941 } oecount;
944 /* The heap for the oecount hashtable and the sorted list of operands. */
945 static vec<oecount> cvec;
948 /* Oecount hashtable helpers. */
950 struct oecount_hasher : typed_noop_remove <void>
952 /* Note that this hash table stores integers, not pointers.
953 So, observe the casting in the member functions. */
954 typedef void value_type;
955 typedef void compare_type;
956 static inline hashval_t hash (const value_type *);
957 static inline bool equal (const value_type *, const compare_type *);
960 /* Hash function for oecount. */
962 inline hashval_t
963 oecount_hasher::hash (const value_type *p)
965 const oecount *c = &cvec[(size_t)p - 42];
966 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
969 /* Comparison function for oecount. */
971 inline bool
972 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
974 const oecount *c1 = &cvec[(size_t)p1 - 42];
975 const oecount *c2 = &cvec[(size_t)p2 - 42];
976 return (c1->oecode == c2->oecode
977 && c1->op == c2->op);
980 /* Comparison function for qsort sorting oecount elements by count. */
982 static int
983 oecount_cmp (const void *p1, const void *p2)
985 const oecount *c1 = (const oecount *)p1;
986 const oecount *c2 = (const oecount *)p2;
987 if (c1->cnt != c2->cnt)
988 return c1->cnt - c2->cnt;
989 else
990 /* If counts are identical, use unique IDs to stabilize qsort. */
991 return c1->id - c2->id;
994 /* Return TRUE iff STMT represents a builtin call that raises OP
995 to some exponent. */
997 static bool
998 stmt_is_power_of_op (gimple stmt, tree op)
1000 tree fndecl;
1002 if (!is_gimple_call (stmt))
1003 return false;
1005 fndecl = gimple_call_fndecl (stmt);
1007 if (!fndecl
1008 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1009 return false;
1011 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1013 CASE_FLT_FN (BUILT_IN_POW):
1014 CASE_FLT_FN (BUILT_IN_POWI):
1015 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1017 default:
1018 return false;
1022 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1023 in place and return the result. Assumes that stmt_is_power_of_op
1024 was previously called for STMT and returned TRUE. */
1026 static HOST_WIDE_INT
1027 decrement_power (gimple stmt)
1029 REAL_VALUE_TYPE c, cint;
1030 HOST_WIDE_INT power;
1031 tree arg1;
1033 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1035 CASE_FLT_FN (BUILT_IN_POW):
1036 arg1 = gimple_call_arg (stmt, 1);
1037 c = TREE_REAL_CST (arg1);
1038 power = real_to_integer (&c) - 1;
1039 real_from_integer (&cint, VOIDmode, power, 0, 0);
1040 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1041 return power;
1043 CASE_FLT_FN (BUILT_IN_POWI):
1044 arg1 = gimple_call_arg (stmt, 1);
1045 power = TREE_INT_CST_LOW (arg1) - 1;
1046 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1047 return power;
1049 default:
1050 gcc_unreachable ();
1054 /* Find the single immediate use of STMT's LHS, and replace it
1055 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1056 replace *DEF with OP as well. */
1058 static void
1059 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1061 tree lhs;
1062 gimple use_stmt;
1063 use_operand_p use;
1064 gimple_stmt_iterator gsi;
1066 if (is_gimple_call (stmt))
1067 lhs = gimple_call_lhs (stmt);
1068 else
1069 lhs = gimple_assign_lhs (stmt);
1071 gcc_assert (has_single_use (lhs));
1072 single_imm_use (lhs, &use, &use_stmt);
1073 if (lhs == *def)
1074 *def = op;
1075 SET_USE (use, op);
1076 if (TREE_CODE (op) != SSA_NAME)
1077 update_stmt (use_stmt);
1078 gsi = gsi_for_stmt (stmt);
1079 unlink_stmt_vdef (stmt);
1080 gsi_remove (&gsi, true);
1081 release_defs (stmt);
1084 /* Walks the linear chain with result *DEF searching for an operation
1085 with operand OP and code OPCODE removing that from the chain. *DEF
1086 is updated if there is only one operand but no operation left. */
1088 static void
1089 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1091 gimple stmt = SSA_NAME_DEF_STMT (*def);
1095 tree name;
1097 if (opcode == MULT_EXPR
1098 && stmt_is_power_of_op (stmt, op))
1100 if (decrement_power (stmt) == 1)
1101 propagate_op_to_single_use (op, stmt, def);
1102 return;
1105 name = gimple_assign_rhs1 (stmt);
1107 /* If this is the operation we look for and one of the operands
1108 is ours simply propagate the other operand into the stmts
1109 single use. */
1110 if (gimple_assign_rhs_code (stmt) == opcode
1111 && (name == op
1112 || gimple_assign_rhs2 (stmt) == op))
1114 if (name == op)
1115 name = gimple_assign_rhs2 (stmt);
1116 propagate_op_to_single_use (name, stmt, def);
1117 return;
1120 /* We might have a multiply of two __builtin_pow* calls, and
1121 the operand might be hiding in the rightmost one. */
1122 if (opcode == MULT_EXPR
1123 && gimple_assign_rhs_code (stmt) == opcode
1124 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1125 && has_single_use (gimple_assign_rhs2 (stmt)))
1127 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1128 if (stmt_is_power_of_op (stmt2, op))
1130 if (decrement_power (stmt2) == 1)
1131 propagate_op_to_single_use (op, stmt2, def);
1132 return;
1136 /* Continue walking the chain. */
1137 gcc_assert (name != op
1138 && TREE_CODE (name) == SSA_NAME);
1139 stmt = SSA_NAME_DEF_STMT (name);
1141 while (1);
1144 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1145 the result. Places the statement after the definition of either
1146 OP1 or OP2. Returns the new statement. */
1148 static gimple
1149 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1151 gimple op1def = NULL, op2def = NULL;
1152 gimple_stmt_iterator gsi;
1153 tree op;
1154 gimple sum;
1156 /* Create the addition statement. */
1157 op = make_ssa_name (type, NULL);
1158 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1160 /* Find an insertion place and insert. */
1161 if (TREE_CODE (op1) == SSA_NAME)
1162 op1def = SSA_NAME_DEF_STMT (op1);
1163 if (TREE_CODE (op2) == SSA_NAME)
1164 op2def = SSA_NAME_DEF_STMT (op2);
1165 if ((!op1def || gimple_nop_p (op1def))
1166 && (!op2def || gimple_nop_p (op2def)))
1168 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR));
1169 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1171 else if ((!op1def || gimple_nop_p (op1def))
1172 || (op2def && !gimple_nop_p (op2def)
1173 && stmt_dominates_stmt_p (op1def, op2def)))
1175 if (gimple_code (op2def) == GIMPLE_PHI)
1177 gsi = gsi_after_labels (gimple_bb (op2def));
1178 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1180 else
1182 if (!stmt_ends_bb_p (op2def))
1184 gsi = gsi_for_stmt (op2def);
1185 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1187 else
1189 edge e;
1190 edge_iterator ei;
1192 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
1193 if (e->flags & EDGE_FALLTHRU)
1194 gsi_insert_on_edge_immediate (e, sum);
1198 else
1200 if (gimple_code (op1def) == GIMPLE_PHI)
1202 gsi = gsi_after_labels (gimple_bb (op1def));
1203 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1205 else
1207 if (!stmt_ends_bb_p (op1def))
1209 gsi = gsi_for_stmt (op1def);
1210 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
1212 else
1214 edge e;
1215 edge_iterator ei;
1217 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
1218 if (e->flags & EDGE_FALLTHRU)
1219 gsi_insert_on_edge_immediate (e, sum);
1223 update_stmt (sum);
1225 return sum;
1228 /* Perform un-distribution of divisions and multiplications.
1229 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1230 to (A + B) / X for real X.
1232 The algorithm is organized as follows.
1234 - First we walk the addition chain *OPS looking for summands that
1235 are defined by a multiplication or a real division. This results
1236 in the candidates bitmap with relevant indices into *OPS.
1238 - Second we build the chains of multiplications or divisions for
1239 these candidates, counting the number of occurrences of (operand, code)
1240 pairs in all of the candidates chains.
1242 - Third we sort the (operand, code) pairs by number of occurrence and
1243 process them starting with the pair with the most uses.
1245 * For each such pair we walk the candidates again to build a
1246 second candidate bitmap noting all multiplication/division chains
1247 that have at least one occurrence of (operand, code).
1249 * We build an alternate addition chain only covering these
1250 candidates with one (operand, code) operation removed from their
1251 multiplication/division chain.
1253 * The first candidate gets replaced by the alternate addition chain
1254 multiplied/divided by the operand.
1256 * All candidate chains get disabled for further processing and
1257 processing of (operand, code) pairs continues.
1259 The alternate addition chains built are re-processed by the main
1260 reassociation algorithm which allows optimizing a * x * y + b * y * x
1261 to (a + b ) * x * y in one invocation of the reassociation pass. */
1263 static bool
1264 undistribute_ops_list (enum tree_code opcode,
1265 vec<operand_entry_t> *ops, struct loop *loop)
1267 unsigned int length = ops->length ();
1268 operand_entry_t oe1;
1269 unsigned i, j;
1270 sbitmap candidates, candidates2;
1271 unsigned nr_candidates, nr_candidates2;
1272 sbitmap_iterator sbi0;
1273 vec<operand_entry_t> *subops;
1274 hash_table <oecount_hasher> ctable;
1275 bool changed = false;
1276 int next_oecount_id = 0;
1278 if (length <= 1
1279 || opcode != PLUS_EXPR)
1280 return false;
1282 /* Build a list of candidates to process. */
1283 candidates = sbitmap_alloc (length);
1284 bitmap_clear (candidates);
1285 nr_candidates = 0;
1286 FOR_EACH_VEC_ELT (*ops, i, oe1)
1288 enum tree_code dcode;
1289 gimple oe1def;
1291 if (TREE_CODE (oe1->op) != SSA_NAME)
1292 continue;
1293 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1294 if (!is_gimple_assign (oe1def))
1295 continue;
1296 dcode = gimple_assign_rhs_code (oe1def);
1297 if ((dcode != MULT_EXPR
1298 && dcode != RDIV_EXPR)
1299 || !is_reassociable_op (oe1def, dcode, loop))
1300 continue;
1302 bitmap_set_bit (candidates, i);
1303 nr_candidates++;
1306 if (nr_candidates < 2)
1308 sbitmap_free (candidates);
1309 return false;
1312 if (dump_file && (dump_flags & TDF_DETAILS))
1314 fprintf (dump_file, "searching for un-distribute opportunities ");
1315 print_generic_expr (dump_file,
1316 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1317 fprintf (dump_file, " %d\n", nr_candidates);
1320 /* Build linearized sub-operand lists and the counting table. */
1321 cvec.create (0);
1322 ctable.create (15);
1323 /* ??? Macro arguments cannot have multi-argument template types in
1324 them. This typedef is needed to workaround that limitation. */
1325 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1326 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1327 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1329 gimple oedef;
1330 enum tree_code oecode;
1331 unsigned j;
1333 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1334 oecode = gimple_assign_rhs_code (oedef);
1335 linearize_expr_tree (&subops[i], oedef,
1336 associative_tree_code (oecode), false);
1338 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1340 oecount c;
1341 void **slot;
1342 size_t idx;
1343 c.oecode = oecode;
1344 c.cnt = 1;
1345 c.id = next_oecount_id++;
1346 c.op = oe1->op;
1347 cvec.safe_push (c);
1348 idx = cvec.length () + 41;
1349 slot = ctable.find_slot ((void *)idx, INSERT);
1350 if (!*slot)
1352 *slot = (void *)idx;
1354 else
1356 cvec.pop ();
1357 cvec[(size_t)*slot - 42].cnt++;
1361 ctable.dispose ();
1363 /* Sort the counting table. */
1364 cvec.qsort (oecount_cmp);
1366 if (dump_file && (dump_flags & TDF_DETAILS))
1368 oecount *c;
1369 fprintf (dump_file, "Candidates:\n");
1370 FOR_EACH_VEC_ELT (cvec, j, c)
1372 fprintf (dump_file, " %u %s: ", c->cnt,
1373 c->oecode == MULT_EXPR
1374 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1375 print_generic_expr (dump_file, c->op, 0);
1376 fprintf (dump_file, "\n");
1380 /* Process the (operand, code) pairs in order of most occurence. */
1381 candidates2 = sbitmap_alloc (length);
1382 while (!cvec.is_empty ())
1384 oecount *c = &cvec.last ();
1385 if (c->cnt < 2)
1386 break;
1388 /* Now collect the operands in the outer chain that contain
1389 the common operand in their inner chain. */
1390 bitmap_clear (candidates2);
1391 nr_candidates2 = 0;
1392 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1394 gimple oedef;
1395 enum tree_code oecode;
1396 unsigned j;
1397 tree op = (*ops)[i]->op;
1399 /* If we undistributed in this chain already this may be
1400 a constant. */
1401 if (TREE_CODE (op) != SSA_NAME)
1402 continue;
1404 oedef = SSA_NAME_DEF_STMT (op);
1405 oecode = gimple_assign_rhs_code (oedef);
1406 if (oecode != c->oecode)
1407 continue;
1409 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1411 if (oe1->op == c->op)
1413 bitmap_set_bit (candidates2, i);
1414 ++nr_candidates2;
1415 break;
1420 if (nr_candidates2 >= 2)
1422 operand_entry_t oe1, oe2;
1423 gimple prod;
1424 int first = bitmap_first_set_bit (candidates2);
1426 /* Build the new addition chain. */
1427 oe1 = (*ops)[first];
1428 if (dump_file && (dump_flags & TDF_DETAILS))
1430 fprintf (dump_file, "Building (");
1431 print_generic_expr (dump_file, oe1->op, 0);
1433 zero_one_operation (&oe1->op, c->oecode, c->op);
1434 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1436 gimple sum;
1437 oe2 = (*ops)[i];
1438 if (dump_file && (dump_flags & TDF_DETAILS))
1440 fprintf (dump_file, " + ");
1441 print_generic_expr (dump_file, oe2->op, 0);
1443 zero_one_operation (&oe2->op, c->oecode, c->op);
1444 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1445 oe1->op, oe2->op, opcode);
1446 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1447 oe2->rank = 0;
1448 oe1->op = gimple_get_lhs (sum);
1451 /* Apply the multiplication/division. */
1452 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1453 oe1->op, c->op, c->oecode);
1454 if (dump_file && (dump_flags & TDF_DETAILS))
1456 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1457 print_generic_expr (dump_file, c->op, 0);
1458 fprintf (dump_file, "\n");
1461 /* Record it in the addition chain and disable further
1462 undistribution with this op. */
1463 oe1->op = gimple_assign_lhs (prod);
1464 oe1->rank = get_rank (oe1->op);
1465 subops[first].release ();
1467 changed = true;
1470 cvec.pop ();
1473 for (i = 0; i < ops->length (); ++i)
1474 subops[i].release ();
1475 free (subops);
1476 cvec.release ();
1477 sbitmap_free (candidates);
1478 sbitmap_free (candidates2);
1480 return changed;
1483 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1484 expression, examine the other OPS to see if any of them are comparisons
1485 of the same values, which we may be able to combine or eliminate.
1486 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1488 static bool
1489 eliminate_redundant_comparison (enum tree_code opcode,
1490 vec<operand_entry_t> *ops,
1491 unsigned int currindex,
1492 operand_entry_t curr)
1494 tree op1, op2;
1495 enum tree_code lcode, rcode;
1496 gimple def1, def2;
1497 int i;
1498 operand_entry_t oe;
1500 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1501 return false;
1503 /* Check that CURR is a comparison. */
1504 if (TREE_CODE (curr->op) != SSA_NAME)
1505 return false;
1506 def1 = SSA_NAME_DEF_STMT (curr->op);
1507 if (!is_gimple_assign (def1))
1508 return false;
1509 lcode = gimple_assign_rhs_code (def1);
1510 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1511 return false;
1512 op1 = gimple_assign_rhs1 (def1);
1513 op2 = gimple_assign_rhs2 (def1);
1515 /* Now look for a similar comparison in the remaining OPS. */
1516 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1518 tree t;
1520 if (TREE_CODE (oe->op) != SSA_NAME)
1521 continue;
1522 def2 = SSA_NAME_DEF_STMT (oe->op);
1523 if (!is_gimple_assign (def2))
1524 continue;
1525 rcode = gimple_assign_rhs_code (def2);
1526 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1527 continue;
1529 /* If we got here, we have a match. See if we can combine the
1530 two comparisons. */
1531 if (opcode == BIT_IOR_EXPR)
1532 t = maybe_fold_or_comparisons (lcode, op1, op2,
1533 rcode, gimple_assign_rhs1 (def2),
1534 gimple_assign_rhs2 (def2));
1535 else
1536 t = maybe_fold_and_comparisons (lcode, op1, op2,
1537 rcode, gimple_assign_rhs1 (def2),
1538 gimple_assign_rhs2 (def2));
1539 if (!t)
1540 continue;
1542 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1543 always give us a boolean_type_node value back. If the original
1544 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1545 we need to convert. */
1546 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1547 t = fold_convert (TREE_TYPE (curr->op), t);
1549 if (TREE_CODE (t) != INTEGER_CST
1550 && !operand_equal_p (t, curr->op, 0))
1552 enum tree_code subcode;
1553 tree newop1, newop2;
1554 if (!COMPARISON_CLASS_P (t))
1555 continue;
1556 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1557 STRIP_USELESS_TYPE_CONVERSION (newop1);
1558 STRIP_USELESS_TYPE_CONVERSION (newop2);
1559 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1560 continue;
1563 if (dump_file && (dump_flags & TDF_DETAILS))
1565 fprintf (dump_file, "Equivalence: ");
1566 print_generic_expr (dump_file, curr->op, 0);
1567 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1568 print_generic_expr (dump_file, oe->op, 0);
1569 fprintf (dump_file, " -> ");
1570 print_generic_expr (dump_file, t, 0);
1571 fprintf (dump_file, "\n");
1574 /* Now we can delete oe, as it has been subsumed by the new combined
1575 expression t. */
1576 ops->ordered_remove (i);
1577 reassociate_stats.ops_eliminated ++;
1579 /* If t is the same as curr->op, we're done. Otherwise we must
1580 replace curr->op with t. Special case is if we got a constant
1581 back, in which case we add it to the end instead of in place of
1582 the current entry. */
1583 if (TREE_CODE (t) == INTEGER_CST)
1585 ops->ordered_remove (currindex);
1586 add_to_ops_vec (ops, t);
1588 else if (!operand_equal_p (t, curr->op, 0))
1590 gimple sum;
1591 enum tree_code subcode;
1592 tree newop1;
1593 tree newop2;
1594 gcc_assert (COMPARISON_CLASS_P (t));
1595 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1596 STRIP_USELESS_TYPE_CONVERSION (newop1);
1597 STRIP_USELESS_TYPE_CONVERSION (newop2);
1598 gcc_checking_assert (is_gimple_val (newop1)
1599 && is_gimple_val (newop2));
1600 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1601 curr->op = gimple_get_lhs (sum);
1603 return true;
1606 return false;
1609 /* Perform various identities and other optimizations on the list of
1610 operand entries, stored in OPS. The tree code for the binary
1611 operation between all the operands is OPCODE. */
1613 static void
1614 optimize_ops_list (enum tree_code opcode,
1615 vec<operand_entry_t> *ops)
1617 unsigned int length = ops->length ();
1618 unsigned int i;
1619 operand_entry_t oe;
1620 operand_entry_t oelast = NULL;
1621 bool iterate = false;
1623 if (length == 1)
1624 return;
1626 oelast = ops->last ();
1628 /* If the last two are constants, pop the constants off, merge them
1629 and try the next two. */
1630 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1632 operand_entry_t oelm1 = (*ops)[length - 2];
1634 if (oelm1->rank == 0
1635 && is_gimple_min_invariant (oelm1->op)
1636 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1637 TREE_TYPE (oelast->op)))
1639 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1640 oelm1->op, oelast->op);
1642 if (folded && is_gimple_min_invariant (folded))
1644 if (dump_file && (dump_flags & TDF_DETAILS))
1645 fprintf (dump_file, "Merging constants\n");
1647 ops->pop ();
1648 ops->pop ();
1650 add_to_ops_vec (ops, folded);
1651 reassociate_stats.constants_eliminated++;
1653 optimize_ops_list (opcode, ops);
1654 return;
1659 eliminate_using_constants (opcode, ops);
1660 oelast = NULL;
1662 for (i = 0; ops->iterate (i, &oe);)
1664 bool done = false;
1666 if (eliminate_not_pairs (opcode, ops, i, oe))
1667 return;
1668 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1669 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1670 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1672 if (done)
1673 return;
1674 iterate = true;
1675 oelast = NULL;
1676 continue;
1678 oelast = oe;
1679 i++;
1682 length = ops->length ();
1683 oelast = ops->last ();
1685 if (iterate)
1686 optimize_ops_list (opcode, ops);
1689 /* The following functions are subroutines to optimize_range_tests and allow
1690 it to try to change a logical combination of comparisons into a range
1691 test.
1693 For example, both
1694 X == 2 || X == 5 || X == 3 || X == 4
1696 X >= 2 && X <= 5
1697 are converted to
1698 (unsigned) (X - 2) <= 3
1700 For more information see comments above fold_test_range in fold-const.c,
1701 this implementation is for GIMPLE. */
1703 struct range_entry
1705 tree exp;
1706 tree low;
1707 tree high;
1708 bool in_p;
1709 bool strict_overflow_p;
1710 unsigned int idx, next;
1713 /* This is similar to make_range in fold-const.c, but on top of
1714 GIMPLE instead of trees. If EXP is non-NULL, it should be
1715 an SSA_NAME and STMT argument is ignored, otherwise STMT
1716 argument should be a GIMPLE_COND. */
1718 static void
1719 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1721 int in_p;
1722 tree low, high;
1723 bool is_bool, strict_overflow_p;
1725 r->exp = NULL_TREE;
1726 r->in_p = false;
1727 r->strict_overflow_p = false;
1728 r->low = NULL_TREE;
1729 r->high = NULL_TREE;
1730 if (exp != NULL_TREE
1731 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1732 return;
1734 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1735 and see if we can refine the range. Some of the cases below may not
1736 happen, but it doesn't seem worth worrying about this. We "continue"
1737 the outer loop when we've changed something; otherwise we "break"
1738 the switch, which will "break" the while. */
1739 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1740 high = low;
1741 in_p = 0;
1742 strict_overflow_p = false;
1743 is_bool = false;
1744 if (exp == NULL_TREE)
1745 is_bool = true;
1746 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1748 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1749 is_bool = true;
1750 else
1751 return;
1753 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1754 is_bool = true;
1756 while (1)
1758 enum tree_code code;
1759 tree arg0, arg1, exp_type;
1760 tree nexp;
1761 location_t loc;
1763 if (exp != NULL_TREE)
1765 if (TREE_CODE (exp) != SSA_NAME)
1766 break;
1768 stmt = SSA_NAME_DEF_STMT (exp);
1769 if (!is_gimple_assign (stmt))
1770 break;
1772 code = gimple_assign_rhs_code (stmt);
1773 arg0 = gimple_assign_rhs1 (stmt);
1774 arg1 = gimple_assign_rhs2 (stmt);
1775 exp_type = TREE_TYPE (exp);
1777 else
1779 code = gimple_cond_code (stmt);
1780 arg0 = gimple_cond_lhs (stmt);
1781 arg1 = gimple_cond_rhs (stmt);
1782 exp_type = boolean_type_node;
1785 if (TREE_CODE (arg0) != SSA_NAME)
1786 break;
1787 loc = gimple_location (stmt);
1788 switch (code)
1790 case BIT_NOT_EXPR:
1791 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1793 in_p = !in_p;
1794 exp = arg0;
1795 continue;
1797 break;
1798 case SSA_NAME:
1799 exp = arg0;
1800 continue;
1801 CASE_CONVERT:
1802 if (is_bool)
1803 goto do_default;
1804 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1806 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1807 is_bool = true;
1808 else
1809 return;
1811 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1812 is_bool = true;
1813 goto do_default;
1814 case EQ_EXPR:
1815 case NE_EXPR:
1816 case LT_EXPR:
1817 case LE_EXPR:
1818 case GE_EXPR:
1819 case GT_EXPR:
1820 is_bool = true;
1821 /* FALLTHRU */
1822 default:
1823 if (!is_bool)
1824 return;
1825 do_default:
1826 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1827 &low, &high, &in_p,
1828 &strict_overflow_p);
1829 if (nexp != NULL_TREE)
1831 exp = nexp;
1832 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1833 continue;
1835 break;
1837 break;
1839 if (is_bool)
1841 r->exp = exp;
1842 r->in_p = in_p;
1843 r->low = low;
1844 r->high = high;
1845 r->strict_overflow_p = strict_overflow_p;
1849 /* Comparison function for qsort. Sort entries
1850 without SSA_NAME exp first, then with SSA_NAMEs sorted
1851 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1852 by increasing ->low and if ->low is the same, by increasing
1853 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1854 maximum. */
1856 static int
1857 range_entry_cmp (const void *a, const void *b)
1859 const struct range_entry *p = (const struct range_entry *) a;
1860 const struct range_entry *q = (const struct range_entry *) b;
1862 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1864 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1866 /* Group range_entries for the same SSA_NAME together. */
1867 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1868 return -1;
1869 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1870 return 1;
1871 /* If ->low is different, NULL low goes first, then by
1872 ascending low. */
1873 if (p->low != NULL_TREE)
1875 if (q->low != NULL_TREE)
1877 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1878 p->low, q->low);
1879 if (tem && integer_onep (tem))
1880 return -1;
1881 tem = fold_binary (GT_EXPR, boolean_type_node,
1882 p->low, q->low);
1883 if (tem && integer_onep (tem))
1884 return 1;
1886 else
1887 return 1;
1889 else if (q->low != NULL_TREE)
1890 return -1;
1891 /* If ->high is different, NULL high goes last, before that by
1892 ascending high. */
1893 if (p->high != NULL_TREE)
1895 if (q->high != NULL_TREE)
1897 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1898 p->high, q->high);
1899 if (tem && integer_onep (tem))
1900 return -1;
1901 tem = fold_binary (GT_EXPR, boolean_type_node,
1902 p->high, q->high);
1903 if (tem && integer_onep (tem))
1904 return 1;
1906 else
1907 return -1;
1909 else if (p->high != NULL_TREE)
1910 return 1;
1911 /* If both ranges are the same, sort below by ascending idx. */
1913 else
1914 return 1;
1916 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1917 return -1;
1919 if (p->idx < q->idx)
1920 return -1;
1921 else
1923 gcc_checking_assert (p->idx > q->idx);
1924 return 1;
1928 /* Helper routine of optimize_range_test.
1929 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
1930 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
1931 OPCODE and OPS are arguments of optimize_range_tests. Return
1932 true if the range merge has been successful.
1933 If OPCODE is ERROR_MARK, this is called from within
1934 maybe_optimize_range_tests and is performing inter-bb range optimization.
1935 Changes should be then performed right away, and whether an op is
1936 BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank. */
1938 static bool
1939 update_range_test (struct range_entry *range, struct range_entry *otherrange,
1940 unsigned int count, enum tree_code opcode,
1941 vec<operand_entry_t> *ops, tree exp, bool in_p,
1942 tree low, tree high, bool strict_overflow_p)
1944 operand_entry_t oe = (*ops)[range->idx];
1945 tree op = oe->op;
1946 gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id));
1947 location_t loc = gimple_location (stmt);
1948 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
1949 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
1950 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
1951 gimple_stmt_iterator gsi;
1953 if (tem == NULL_TREE)
1954 return false;
1956 if (strict_overflow_p && issue_strict_overflow_warning (wc))
1957 warning_at (loc, OPT_Wstrict_overflow,
1958 "assuming signed overflow does not occur "
1959 "when simplifying range test");
1961 if (dump_file && (dump_flags & TDF_DETAILS))
1963 struct range_entry *r;
1964 fprintf (dump_file, "Optimizing range tests ");
1965 print_generic_expr (dump_file, range->exp, 0);
1966 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
1967 print_generic_expr (dump_file, range->low, 0);
1968 fprintf (dump_file, ", ");
1969 print_generic_expr (dump_file, range->high, 0);
1970 fprintf (dump_file, "]");
1971 for (r = otherrange; r < otherrange + count; r++)
1973 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
1974 print_generic_expr (dump_file, r->low, 0);
1975 fprintf (dump_file, ", ");
1976 print_generic_expr (dump_file, r->high, 0);
1977 fprintf (dump_file, "]");
1979 fprintf (dump_file, "\n into ");
1980 print_generic_expr (dump_file, tem, 0);
1981 fprintf (dump_file, "\n");
1984 if (opcode == BIT_IOR_EXPR
1985 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
1986 tem = invert_truthvalue_loc (loc, tem);
1988 tem = fold_convert_loc (loc, optype, tem);
1989 gsi = gsi_for_stmt (stmt);
1990 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
1991 GSI_SAME_STMT);
1993 /* If doing inter-bb range test optimization, update the
1994 stmts immediately. Start with changing the first range test
1995 immediate use to the new value (TEM), or, if the first range
1996 test is a GIMPLE_COND stmt, change that condition. */
1997 if (opcode == ERROR_MARK)
1999 if (op)
2001 imm_use_iterator iter;
2002 use_operand_p use_p;
2003 gimple use_stmt;
2005 FOR_EACH_IMM_USE_STMT (use_stmt, iter, op)
2007 if (is_gimple_debug (use_stmt))
2008 continue;
2009 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2010 SET_USE (use_p, tem);
2011 update_stmt (use_stmt);
2014 else
2016 gimple_cond_set_code (stmt, NE_EXPR);
2017 gimple_cond_set_lhs (stmt, tem);
2018 gimple_cond_set_rhs (stmt, boolean_false_node);
2019 update_stmt (stmt);
2022 oe->op = tem;
2023 range->exp = exp;
2024 range->low = low;
2025 range->high = high;
2026 range->in_p = in_p;
2027 range->strict_overflow_p = false;
2029 for (range = otherrange; range < otherrange + count; range++)
2031 oe = (*ops)[range->idx];
2032 /* Now change all the other range test immediate uses, so that
2033 those tests will be optimized away. */
2034 if (opcode == ERROR_MARK)
2036 if (oe->op)
2038 imm_use_iterator iter;
2039 use_operand_p use_p;
2040 gimple use_stmt;
2042 FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op)
2044 if (is_gimple_debug (use_stmt))
2045 continue;
2046 /* If imm use of _8 is a statement like _7 = _8 | _9;,
2047 adjust it into _7 = _9;. */
2048 if (is_gimple_assign (use_stmt)
2049 && gimple_assign_rhs_code (use_stmt) == oe->rank)
2051 tree expr = NULL_TREE;
2052 if (oe->op == gimple_assign_rhs1 (use_stmt))
2053 expr = gimple_assign_rhs2 (use_stmt);
2054 else if (oe->op == gimple_assign_rhs2 (use_stmt))
2055 expr = gimple_assign_rhs1 (use_stmt);
2056 if (expr
2057 && expr != oe->op
2058 && TREE_CODE (expr) == SSA_NAME)
2060 gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt);
2061 gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME,
2062 expr, NULL_TREE);
2063 update_stmt (use_stmt);
2064 continue;
2067 /* If imm use of _8 is a statement like _7 = (int) _8;,
2068 adjust it into _7 = 0; or _7 = 1;. */
2069 if (gimple_assign_cast_p (use_stmt)
2070 && oe->op == gimple_assign_rhs1 (use_stmt))
2072 tree lhs = gimple_assign_lhs (use_stmt);
2073 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
2075 gimple_stmt_iterator gsi2
2076 = gsi_for_stmt (use_stmt);
2077 tree expr = build_int_cst (TREE_TYPE (lhs),
2078 oe->rank == BIT_IOR_EXPR
2079 ? 0 : 1);
2080 gimple_assign_set_rhs_with_ops (&gsi2,
2081 INTEGER_CST,
2082 expr, NULL_TREE);
2083 update_stmt (use_stmt);
2084 continue;
2087 /* Otherwise replace the use with 0 or 1. */
2088 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2089 SET_USE (use_p,
2090 build_int_cst (TREE_TYPE (oe->op),
2091 oe->rank == BIT_IOR_EXPR
2092 ? 0 : 1));
2093 update_stmt (use_stmt);
2096 else
2098 /* If range test was a GIMPLE_COND, simply change it
2099 into an always false or always true condition. */
2100 stmt = last_stmt (BASIC_BLOCK (oe->id));
2101 if (oe->rank == BIT_IOR_EXPR)
2102 gimple_cond_make_false (stmt);
2103 else
2104 gimple_cond_make_true (stmt);
2105 update_stmt (stmt);
2108 oe->op = error_mark_node;
2109 range->exp = NULL_TREE;
2111 return true;
2114 /* Optimize range tests, similarly how fold_range_test optimizes
2115 it on trees. The tree code for the binary
2116 operation between all the operands is OPCODE.
2117 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2118 maybe_optimize_range_tests for inter-bb range optimization.
2119 In that case if oe->op is NULL, oe->id is bb->index whose
2120 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2121 the actual opcode. */
2123 static void
2124 optimize_range_tests (enum tree_code opcode,
2125 vec<operand_entry_t> *ops)
2127 unsigned int length = ops->length (), i, j, first;
2128 operand_entry_t oe;
2129 struct range_entry *ranges;
2130 bool any_changes = false;
2132 if (length == 1)
2133 return;
2135 ranges = XNEWVEC (struct range_entry, length);
2136 for (i = 0; i < length; i++)
2138 oe = (*ops)[i];
2139 ranges[i].idx = i;
2140 init_range_entry (ranges + i, oe->op,
2141 oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id)));
2142 /* For | invert it now, we will invert it again before emitting
2143 the optimized expression. */
2144 if (opcode == BIT_IOR_EXPR
2145 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2146 ranges[i].in_p = !ranges[i].in_p;
2149 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2150 for (i = 0; i < length; i++)
2151 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2152 break;
2154 /* Try to merge ranges. */
2155 for (first = i; i < length; i++)
2157 tree low = ranges[i].low;
2158 tree high = ranges[i].high;
2159 int in_p = ranges[i].in_p;
2160 bool strict_overflow_p = ranges[i].strict_overflow_p;
2161 int update_fail_count = 0;
2163 for (j = i + 1; j < length; j++)
2165 if (ranges[i].exp != ranges[j].exp)
2166 break;
2167 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2168 ranges[j].in_p, ranges[j].low, ranges[j].high))
2169 break;
2170 strict_overflow_p |= ranges[j].strict_overflow_p;
2173 if (j == i + 1)
2174 continue;
2176 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2177 ops, ranges[i].exp, in_p, low, high,
2178 strict_overflow_p))
2180 i = j - 1;
2181 any_changes = true;
2183 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2184 while update_range_test would fail. */
2185 else if (update_fail_count == 64)
2186 i = j - 1;
2187 else
2188 ++update_fail_count;
2191 /* Optimize X == CST1 || X == CST2
2192 if popcount (CST1 ^ CST2) == 1 into
2193 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2194 Similarly for ranges. E.g.
2195 X != 2 && X != 3 && X != 10 && X != 11
2196 will be transformed by the above loop into
2197 (X - 2U) <= 1U && (X - 10U) <= 1U
2198 and this loop can transform that into
2199 ((X & ~8) - 2U) <= 1U. */
2200 for (i = first; i < length; i++)
2202 tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp;
2204 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2205 continue;
2206 type = TREE_TYPE (ranges[i].exp);
2207 if (!INTEGRAL_TYPE_P (type))
2208 continue;
2209 lowi = ranges[i].low;
2210 if (lowi == NULL_TREE)
2211 lowi = TYPE_MIN_VALUE (type);
2212 highi = ranges[i].high;
2213 if (highi == NULL_TREE)
2214 continue;
2215 for (j = i + 1; j < length && j < i + 64; j++)
2217 if (ranges[j].exp == NULL_TREE)
2218 continue;
2219 if (ranges[i].exp != ranges[j].exp)
2220 break;
2221 if (ranges[j].in_p)
2222 continue;
2223 lowj = ranges[j].low;
2224 if (lowj == NULL_TREE)
2225 continue;
2226 highj = ranges[j].high;
2227 if (highj == NULL_TREE)
2228 highj = TYPE_MAX_VALUE (type);
2229 tem = fold_binary (GT_EXPR, boolean_type_node,
2230 lowj, highi);
2231 if (tem == NULL_TREE || !integer_onep (tem))
2232 continue;
2233 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2234 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2235 continue;
2236 gcc_checking_assert (!integer_zerop (lowxor));
2237 tem = fold_binary (MINUS_EXPR, type, lowxor,
2238 build_int_cst (type, 1));
2239 if (tem == NULL_TREE)
2240 continue;
2241 tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem);
2242 if (tem == NULL_TREE || !integer_zerop (tem))
2243 continue;
2244 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2245 if (!tree_int_cst_equal (lowxor, highxor))
2246 continue;
2247 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2248 exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem);
2249 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2250 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2251 if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp,
2252 ranges[i].in_p, lowj, highj,
2253 ranges[i].strict_overflow_p
2254 || ranges[j].strict_overflow_p))
2256 any_changes = true;
2257 break;
2262 if (any_changes && opcode != ERROR_MARK)
2264 j = 0;
2265 FOR_EACH_VEC_ELT (*ops, i, oe)
2267 if (oe->op == error_mark_node)
2268 continue;
2269 else if (i != j)
2270 (*ops)[j] = oe;
2271 j++;
2273 ops->truncate (j);
2276 XDELETEVEC (ranges);
2279 /* Return true if STMT is a cast like:
2280 <bb N>:
2282 _123 = (int) _234;
2284 <bb M>:
2285 # _345 = PHI <_123(N), 1(...), 1(...)>
2286 where _234 has bool type, _123 has single use and
2287 bb N has a single successor M. This is commonly used in
2288 the last block of a range test. */
2290 static bool
2291 final_range_test_p (gimple stmt)
2293 basic_block bb, rhs_bb;
2294 edge e;
2295 tree lhs, rhs;
2296 use_operand_p use_p;
2297 gimple use_stmt;
2299 if (!gimple_assign_cast_p (stmt))
2300 return false;
2301 bb = gimple_bb (stmt);
2302 if (!single_succ_p (bb))
2303 return false;
2304 e = single_succ_edge (bb);
2305 if (e->flags & EDGE_COMPLEX)
2306 return false;
2308 lhs = gimple_assign_lhs (stmt);
2309 rhs = gimple_assign_rhs1 (stmt);
2310 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2311 || TREE_CODE (rhs) != SSA_NAME
2312 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2313 return false;
2315 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2316 if (!single_imm_use (lhs, &use_p, &use_stmt))
2317 return false;
2319 if (gimple_code (use_stmt) != GIMPLE_PHI
2320 || gimple_bb (use_stmt) != e->dest)
2321 return false;
2323 /* And that the rhs is defined in the same loop. */
2324 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2325 if (rhs_bb == NULL
2326 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2327 return false;
2329 return true;
2332 /* Return true if BB is suitable basic block for inter-bb range test
2333 optimization. If BACKWARD is true, BB should be the only predecessor
2334 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2335 or compared with to find a common basic block to which all conditions
2336 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2337 be the only predecessor of BB. */
2339 static bool
2340 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2341 bool backward)
2343 edge_iterator ei, ei2;
2344 edge e, e2;
2345 gimple stmt;
2346 gimple_stmt_iterator gsi;
2347 bool other_edge_seen = false;
2348 bool is_cond;
2350 if (test_bb == bb)
2351 return false;
2352 /* Check last stmt first. */
2353 stmt = last_stmt (bb);
2354 if (stmt == NULL
2355 || (gimple_code (stmt) != GIMPLE_COND
2356 && (backward || !final_range_test_p (stmt)))
2357 || gimple_visited_p (stmt)
2358 || stmt_could_throw_p (stmt)
2359 || *other_bb == bb)
2360 return false;
2361 is_cond = gimple_code (stmt) == GIMPLE_COND;
2362 if (is_cond)
2364 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2365 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2366 to *OTHER_BB (if not set yet, try to find it out). */
2367 if (EDGE_COUNT (bb->succs) != 2)
2368 return false;
2369 FOR_EACH_EDGE (e, ei, bb->succs)
2371 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2372 return false;
2373 if (e->dest == test_bb)
2375 if (backward)
2376 continue;
2377 else
2378 return false;
2380 if (e->dest == bb)
2381 return false;
2382 if (*other_bb == NULL)
2384 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2385 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2386 return false;
2387 else if (e->dest == e2->dest)
2388 *other_bb = e->dest;
2389 if (*other_bb == NULL)
2390 return false;
2392 if (e->dest == *other_bb)
2393 other_edge_seen = true;
2394 else if (backward)
2395 return false;
2397 if (*other_bb == NULL || !other_edge_seen)
2398 return false;
2400 else if (single_succ (bb) != *other_bb)
2401 return false;
2403 /* Now check all PHIs of *OTHER_BB. */
2404 e = find_edge (bb, *other_bb);
2405 e2 = find_edge (test_bb, *other_bb);
2406 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2408 gimple phi = gsi_stmt (gsi);
2409 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2410 corresponding to BB and TEST_BB predecessor must be the same. */
2411 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2412 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2414 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2415 one of the PHIs should have the lhs of the last stmt in
2416 that block as PHI arg and that PHI should have 0 or 1
2417 corresponding to it in all other range test basic blocks
2418 considered. */
2419 if (!is_cond)
2421 if (gimple_phi_arg_def (phi, e->dest_idx)
2422 == gimple_assign_lhs (stmt)
2423 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2424 || integer_onep (gimple_phi_arg_def (phi,
2425 e2->dest_idx))))
2426 continue;
2428 else
2430 gimple test_last = last_stmt (test_bb);
2431 if (gimple_code (test_last) != GIMPLE_COND
2432 && gimple_phi_arg_def (phi, e2->dest_idx)
2433 == gimple_assign_lhs (test_last)
2434 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2435 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2436 continue;
2439 return false;
2442 return true;
2445 /* Return true if BB doesn't have side-effects that would disallow
2446 range test optimization, all SSA_NAMEs set in the bb are consumed
2447 in the bb and there are no PHIs. */
2449 static bool
2450 no_side_effect_bb (basic_block bb)
2452 gimple_stmt_iterator gsi;
2453 gimple last;
2455 if (!gimple_seq_empty_p (phi_nodes (bb)))
2456 return false;
2457 last = last_stmt (bb);
2458 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2460 gimple stmt = gsi_stmt (gsi);
2461 tree lhs;
2462 imm_use_iterator imm_iter;
2463 use_operand_p use_p;
2465 if (is_gimple_debug (stmt))
2466 continue;
2467 if (gimple_has_side_effects (stmt))
2468 return false;
2469 if (stmt == last)
2470 return true;
2471 if (!is_gimple_assign (stmt))
2472 return false;
2473 lhs = gimple_assign_lhs (stmt);
2474 if (TREE_CODE (lhs) != SSA_NAME)
2475 return false;
2476 if (gimple_assign_rhs_could_trap_p (stmt))
2477 return false;
2478 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2480 gimple use_stmt = USE_STMT (use_p);
2481 if (is_gimple_debug (use_stmt))
2482 continue;
2483 if (gimple_bb (use_stmt) != bb)
2484 return false;
2487 return false;
2490 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2491 return true and fill in *OPS recursively. */
2493 static bool
2494 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2495 struct loop *loop)
2497 gimple stmt = SSA_NAME_DEF_STMT (var);
2498 tree rhs[2];
2499 int i;
2501 if (!is_reassociable_op (stmt, code, loop))
2502 return false;
2504 rhs[0] = gimple_assign_rhs1 (stmt);
2505 rhs[1] = gimple_assign_rhs2 (stmt);
2506 gimple_set_visited (stmt, true);
2507 for (i = 0; i < 2; i++)
2508 if (TREE_CODE (rhs[i]) == SSA_NAME
2509 && !get_ops (rhs[i], code, ops, loop)
2510 && has_single_use (rhs[i]))
2512 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2514 oe->op = rhs[i];
2515 oe->rank = code;
2516 oe->id = 0;
2517 oe->count = 1;
2518 ops->safe_push (oe);
2520 return true;
2523 /* Inter-bb range test optimization. */
2525 static void
2526 maybe_optimize_range_tests (gimple stmt)
2528 basic_block first_bb = gimple_bb (stmt);
2529 basic_block last_bb = first_bb;
2530 basic_block other_bb = NULL;
2531 basic_block bb;
2532 edge_iterator ei;
2533 edge e;
2534 vec<operand_entry_t> ops = vNULL;
2536 /* Consider only basic blocks that end with GIMPLE_COND or
2537 a cast statement satisfying final_range_test_p. All
2538 but the last bb in the first_bb .. last_bb range
2539 should end with GIMPLE_COND. */
2540 if (gimple_code (stmt) == GIMPLE_COND)
2542 if (EDGE_COUNT (first_bb->succs) != 2)
2543 return;
2545 else if (final_range_test_p (stmt))
2546 other_bb = single_succ (first_bb);
2547 else
2548 return;
2550 if (stmt_could_throw_p (stmt))
2551 return;
2553 /* As relative ordering of post-dominator sons isn't fixed,
2554 maybe_optimize_range_tests can be called first on any
2555 bb in the range we want to optimize. So, start searching
2556 backwards, if first_bb can be set to a predecessor. */
2557 while (single_pred_p (first_bb))
2559 basic_block pred_bb = single_pred (first_bb);
2560 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2561 break;
2562 if (!no_side_effect_bb (first_bb))
2563 break;
2564 first_bb = pred_bb;
2566 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2567 Before starting forward search in last_bb successors, find
2568 out the other_bb. */
2569 if (first_bb == last_bb)
2571 other_bb = NULL;
2572 /* As non-GIMPLE_COND last stmt always terminates the range,
2573 if forward search didn't discover anything, just give up. */
2574 if (gimple_code (stmt) != GIMPLE_COND)
2575 return;
2576 /* Look at both successors. Either it ends with a GIMPLE_COND
2577 and satisfies suitable_cond_bb, or ends with a cast and
2578 other_bb is that cast's successor. */
2579 FOR_EACH_EDGE (e, ei, first_bb->succs)
2580 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2581 || e->dest == first_bb)
2582 return;
2583 else if (single_pred_p (e->dest))
2585 stmt = last_stmt (e->dest);
2586 if (stmt
2587 && gimple_code (stmt) == GIMPLE_COND
2588 && EDGE_COUNT (e->dest->succs) == 2)
2590 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2591 break;
2592 else
2593 other_bb = NULL;
2595 else if (stmt
2596 && final_range_test_p (stmt)
2597 && find_edge (first_bb, single_succ (e->dest)))
2599 other_bb = single_succ (e->dest);
2600 if (other_bb == first_bb)
2601 other_bb = NULL;
2604 if (other_bb == NULL)
2605 return;
2607 /* Now do the forward search, moving last_bb to successor bbs
2608 that aren't other_bb. */
2609 while (EDGE_COUNT (last_bb->succs) == 2)
2611 FOR_EACH_EDGE (e, ei, last_bb->succs)
2612 if (e->dest != other_bb)
2613 break;
2614 if (e == NULL)
2615 break;
2616 if (!single_pred_p (e->dest))
2617 break;
2618 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2619 break;
2620 if (!no_side_effect_bb (e->dest))
2621 break;
2622 last_bb = e->dest;
2624 if (first_bb == last_bb)
2625 return;
2626 /* Here basic blocks first_bb through last_bb's predecessor
2627 end with GIMPLE_COND, all of them have one of the edges to
2628 other_bb and another to another block in the range,
2629 all blocks except first_bb don't have side-effects and
2630 last_bb ends with either GIMPLE_COND, or cast satisfying
2631 final_range_test_p. */
2632 for (bb = last_bb; ; bb = single_pred (bb))
2634 enum tree_code code;
2635 tree lhs, rhs;
2637 e = find_edge (bb, other_bb);
2638 stmt = last_stmt (bb);
2639 gimple_set_visited (stmt, true);
2640 if (gimple_code (stmt) != GIMPLE_COND)
2642 use_operand_p use_p;
2643 gimple phi;
2644 edge e2;
2645 unsigned int d;
2647 lhs = gimple_assign_lhs (stmt);
2648 rhs = gimple_assign_rhs1 (stmt);
2649 gcc_assert (bb == last_bb);
2651 /* stmt is
2652 _123 = (int) _234;
2654 followed by:
2655 <bb M>:
2656 # _345 = PHI <_123(N), 1(...), 1(...)>
2658 or 0 instead of 1. If it is 0, the _234
2659 range test is anded together with all the
2660 other range tests, if it is 1, it is ored with
2661 them. */
2662 single_imm_use (lhs, &use_p, &phi);
2663 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2664 e2 = find_edge (first_bb, other_bb);
2665 d = e2->dest_idx;
2666 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2667 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2668 code = BIT_AND_EXPR;
2669 else
2671 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2672 code = BIT_IOR_EXPR;
2675 /* If _234 SSA_NAME_DEF_STMT is
2676 _234 = _567 | _789;
2677 (or &, corresponding to 1/0 in the phi arguments,
2678 push into ops the individual range test arguments
2679 of the bitwise or resp. and, recursively. */
2680 if (!get_ops (rhs, code, &ops,
2681 loop_containing_stmt (stmt))
2682 && has_single_use (rhs))
2684 /* Otherwise, push the _234 range test itself. */
2685 operand_entry_t oe
2686 = (operand_entry_t) pool_alloc (operand_entry_pool);
2688 oe->op = rhs;
2689 oe->rank = code;
2690 oe->id = 0;
2691 oe->count = 1;
2692 ops.safe_push (oe);
2694 continue;
2696 /* Otherwise stmt is GIMPLE_COND. */
2697 code = gimple_cond_code (stmt);
2698 lhs = gimple_cond_lhs (stmt);
2699 rhs = gimple_cond_rhs (stmt);
2700 if (TREE_CODE (lhs) == SSA_NAME
2701 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2702 && ((code != EQ_EXPR && code != NE_EXPR)
2703 || rhs != boolean_false_node
2704 /* Either push into ops the individual bitwise
2705 or resp. and operands, depending on which
2706 edge is other_bb. */
2707 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2708 ^ (code == EQ_EXPR))
2709 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2710 loop_containing_stmt (stmt))))
2712 /* Or push the GIMPLE_COND stmt itself. */
2713 operand_entry_t oe
2714 = (operand_entry_t) pool_alloc (operand_entry_pool);
2716 oe->op = NULL;
2717 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2718 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2719 /* oe->op = NULL signs that there is no SSA_NAME
2720 for the range test, and oe->id instead is the
2721 basic block number, at which's end the GIMPLE_COND
2722 is. */
2723 oe->id = bb->index;
2724 oe->count = 1;
2725 ops.safe_push (oe);
2727 if (bb == first_bb)
2728 break;
2730 if (ops.length () > 1)
2731 optimize_range_tests (ERROR_MARK, &ops);
2732 ops.release ();
2735 /* Return true if OPERAND is defined by a PHI node which uses the LHS
2736 of STMT in it's operands. This is also known as a "destructive
2737 update" operation. */
2739 static bool
2740 is_phi_for_stmt (gimple stmt, tree operand)
2742 gimple def_stmt;
2743 tree lhs;
2744 use_operand_p arg_p;
2745 ssa_op_iter i;
2747 if (TREE_CODE (operand) != SSA_NAME)
2748 return false;
2750 lhs = gimple_assign_lhs (stmt);
2752 def_stmt = SSA_NAME_DEF_STMT (operand);
2753 if (gimple_code (def_stmt) != GIMPLE_PHI)
2754 return false;
2756 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
2757 if (lhs == USE_FROM_PTR (arg_p))
2758 return true;
2759 return false;
2762 /* Remove def stmt of VAR if VAR has zero uses and recurse
2763 on rhs1 operand if so. */
2765 static void
2766 remove_visited_stmt_chain (tree var)
2768 gimple stmt;
2769 gimple_stmt_iterator gsi;
2771 while (1)
2773 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
2774 return;
2775 stmt = SSA_NAME_DEF_STMT (var);
2776 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
2778 var = gimple_assign_rhs1 (stmt);
2779 gsi = gsi_for_stmt (stmt);
2780 gsi_remove (&gsi, true);
2781 release_defs (stmt);
2783 else
2784 return;
2788 /* This function checks three consequtive operands in
2789 passed operands vector OPS starting from OPINDEX and
2790 swaps two operands if it is profitable for binary operation
2791 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
2793 We pair ops with the same rank if possible.
2795 The alternative we try is to see if STMT is a destructive
2796 update style statement, which is like:
2797 b = phi (a, ...)
2798 a = c + b;
2799 In that case, we want to use the destructive update form to
2800 expose the possible vectorizer sum reduction opportunity.
2801 In that case, the third operand will be the phi node. This
2802 check is not performed if STMT is null.
2804 We could, of course, try to be better as noted above, and do a
2805 lot of work to try to find these opportunities in >3 operand
2806 cases, but it is unlikely to be worth it. */
2808 static void
2809 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
2810 unsigned int opindex, gimple stmt)
2812 operand_entry_t oe1, oe2, oe3;
2814 oe1 = ops[opindex];
2815 oe2 = ops[opindex + 1];
2816 oe3 = ops[opindex + 2];
2818 if ((oe1->rank == oe2->rank
2819 && oe2->rank != oe3->rank)
2820 || (stmt && is_phi_for_stmt (stmt, oe3->op)
2821 && !is_phi_for_stmt (stmt, oe1->op)
2822 && !is_phi_for_stmt (stmt, oe2->op)))
2824 struct operand_entry temp = *oe3;
2825 oe3->op = oe1->op;
2826 oe3->rank = oe1->rank;
2827 oe1->op = temp.op;
2828 oe1->rank= temp.rank;
2830 else if ((oe1->rank == oe3->rank
2831 && oe2->rank != oe3->rank)
2832 || (stmt && is_phi_for_stmt (stmt, oe2->op)
2833 && !is_phi_for_stmt (stmt, oe1->op)
2834 && !is_phi_for_stmt (stmt, oe3->op)))
2836 struct operand_entry temp = *oe2;
2837 oe2->op = oe1->op;
2838 oe2->rank = oe1->rank;
2839 oe1->op = temp.op;
2840 oe1->rank= temp.rank;
2844 /* Recursively rewrite our linearized statements so that the operators
2845 match those in OPS[OPINDEX], putting the computation in rank
2846 order. */
2848 static void
2849 rewrite_expr_tree (gimple stmt, unsigned int opindex,
2850 vec<operand_entry_t> ops, bool moved)
2852 tree rhs1 = gimple_assign_rhs1 (stmt);
2853 tree rhs2 = gimple_assign_rhs2 (stmt);
2854 operand_entry_t oe;
2856 /* If we have three operands left, then we want to make sure the ones
2857 that get the double binary op are chosen wisely. */
2858 if (opindex + 3 == ops.length ())
2859 swap_ops_for_binary_stmt (ops, opindex, stmt);
2861 /* The final recursion case for this function is that you have
2862 exactly two operations left.
2863 If we had one exactly one op in the entire list to start with, we
2864 would have never called this function, and the tail recursion
2865 rewrites them one at a time. */
2866 if (opindex + 2 == ops.length ())
2868 operand_entry_t oe1, oe2;
2870 oe1 = ops[opindex];
2871 oe2 = ops[opindex + 1];
2873 if (rhs1 != oe1->op || rhs2 != oe2->op)
2875 if (dump_file && (dump_flags & TDF_DETAILS))
2877 fprintf (dump_file, "Transforming ");
2878 print_gimple_stmt (dump_file, stmt, 0, 0);
2881 gimple_assign_set_rhs1 (stmt, oe1->op);
2882 gimple_assign_set_rhs2 (stmt, oe2->op);
2883 update_stmt (stmt);
2884 if (rhs1 != oe1->op && rhs1 != oe2->op)
2885 remove_visited_stmt_chain (rhs1);
2887 if (dump_file && (dump_flags & TDF_DETAILS))
2889 fprintf (dump_file, " into ");
2890 print_gimple_stmt (dump_file, stmt, 0, 0);
2893 return;
2896 /* If we hit here, we should have 3 or more ops left. */
2897 gcc_assert (opindex + 2 < ops.length ());
2899 /* Rewrite the next operator. */
2900 oe = ops[opindex];
2902 if (oe->op != rhs2)
2904 if (!moved)
2906 gimple_stmt_iterator gsinow, gsirhs1;
2907 gimple stmt1 = stmt, stmt2;
2908 unsigned int count;
2910 gsinow = gsi_for_stmt (stmt);
2911 count = ops.length () - opindex - 2;
2912 while (count-- != 0)
2914 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
2915 gsirhs1 = gsi_for_stmt (stmt2);
2916 gsi_move_before (&gsirhs1, &gsinow);
2917 gsi_prev (&gsinow);
2918 stmt1 = stmt2;
2920 moved = true;
2923 if (dump_file && (dump_flags & TDF_DETAILS))
2925 fprintf (dump_file, "Transforming ");
2926 print_gimple_stmt (dump_file, stmt, 0, 0);
2929 gimple_assign_set_rhs2 (stmt, oe->op);
2930 update_stmt (stmt);
2932 if (dump_file && (dump_flags & TDF_DETAILS))
2934 fprintf (dump_file, " into ");
2935 print_gimple_stmt (dump_file, stmt, 0, 0);
2938 /* Recurse on the LHS of the binary operator, which is guaranteed to
2939 be the non-leaf side. */
2940 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
2943 /* Find out how many cycles we need to compute statements chain.
2944 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
2945 maximum number of independent statements we may execute per cycle. */
2947 static int
2948 get_required_cycles (int ops_num, int cpu_width)
2950 int res;
2951 int elog;
2952 unsigned int rest;
2954 /* While we have more than 2 * cpu_width operands
2955 we may reduce number of operands by cpu_width
2956 per cycle. */
2957 res = ops_num / (2 * cpu_width);
2959 /* Remained operands count may be reduced twice per cycle
2960 until we have only one operand. */
2961 rest = (unsigned)(ops_num - res * cpu_width);
2962 elog = exact_log2 (rest);
2963 if (elog >= 0)
2964 res += elog;
2965 else
2966 res += floor_log2 (rest) + 1;
2968 return res;
2971 /* Returns an optimal number of registers to use for computation of
2972 given statements. */
2974 static int
2975 get_reassociation_width (int ops_num, enum tree_code opc,
2976 enum machine_mode mode)
2978 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
2979 int width;
2980 int width_min;
2981 int cycles_best;
2983 if (param_width > 0)
2984 width = param_width;
2985 else
2986 width = targetm.sched.reassociation_width (opc, mode);
2988 if (width == 1)
2989 return width;
2991 /* Get the minimal time required for sequence computation. */
2992 cycles_best = get_required_cycles (ops_num, width);
2994 /* Check if we may use less width and still compute sequence for
2995 the same time. It will allow us to reduce registers usage.
2996 get_required_cycles is monotonically increasing with lower width
2997 so we can perform a binary search for the minimal width that still
2998 results in the optimal cycle count. */
2999 width_min = 1;
3000 while (width > width_min)
3002 int width_mid = (width + width_min) / 2;
3004 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3005 width = width_mid;
3006 else if (width_min < width_mid)
3007 width_min = width_mid;
3008 else
3009 break;
3012 return width;
3015 /* Recursively rewrite our linearized statements so that the operators
3016 match those in OPS[OPINDEX], putting the computation in rank
3017 order and trying to allow operations to be executed in
3018 parallel. */
3020 static void
3021 rewrite_expr_tree_parallel (gimple stmt, int width,
3022 vec<operand_entry_t> ops)
3024 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3025 int op_num = ops.length ();
3026 int stmt_num = op_num - 1;
3027 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3028 int op_index = op_num - 1;
3029 int stmt_index = 0;
3030 int ready_stmts_end = 0;
3031 int i = 0;
3032 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3034 /* We start expression rewriting from the top statements.
3035 So, in this loop we create a full list of statements
3036 we will work with. */
3037 stmts[stmt_num - 1] = stmt;
3038 for (i = stmt_num - 2; i >= 0; i--)
3039 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3041 for (i = 0; i < stmt_num; i++)
3043 tree op1, op2;
3045 /* Determine whether we should use results of
3046 already handled statements or not. */
3047 if (ready_stmts_end == 0
3048 && (i - stmt_index >= width || op_index < 1))
3049 ready_stmts_end = i;
3051 /* Now we choose operands for the next statement. Non zero
3052 value in ready_stmts_end means here that we should use
3053 the result of already generated statements as new operand. */
3054 if (ready_stmts_end > 0)
3056 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3057 if (ready_stmts_end > stmt_index)
3058 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3059 else if (op_index >= 0)
3060 op2 = ops[op_index--]->op;
3061 else
3063 gcc_assert (stmt_index < i);
3064 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3067 if (stmt_index >= ready_stmts_end)
3068 ready_stmts_end = 0;
3070 else
3072 if (op_index > 1)
3073 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3074 op2 = ops[op_index--]->op;
3075 op1 = ops[op_index--]->op;
3078 /* If we emit the last statement then we should put
3079 operands into the last statement. It will also
3080 break the loop. */
3081 if (op_index < 0 && stmt_index == i)
3082 i = stmt_num - 1;
3084 if (dump_file && (dump_flags & TDF_DETAILS))
3086 fprintf (dump_file, "Transforming ");
3087 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3090 /* We keep original statement only for the last one. All
3091 others are recreated. */
3092 if (i == stmt_num - 1)
3094 gimple_assign_set_rhs1 (stmts[i], op1);
3095 gimple_assign_set_rhs2 (stmts[i], op2);
3096 update_stmt (stmts[i]);
3098 else
3099 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3101 if (dump_file && (dump_flags & TDF_DETAILS))
3103 fprintf (dump_file, " into ");
3104 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3108 remove_visited_stmt_chain (last_rhs1);
3111 /* Transform STMT, which is really (A +B) + (C + D) into the left
3112 linear form, ((A+B)+C)+D.
3113 Recurse on D if necessary. */
3115 static void
3116 linearize_expr (gimple stmt)
3118 gimple_stmt_iterator gsinow, gsirhs;
3119 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3120 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3121 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3122 gimple newbinrhs = NULL;
3123 struct loop *loop = loop_containing_stmt (stmt);
3125 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3126 && is_reassociable_op (binrhs, rhscode, loop));
3128 gsinow = gsi_for_stmt (stmt);
3129 gsirhs = gsi_for_stmt (binrhs);
3130 gsi_move_before (&gsirhs, &gsinow);
3132 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3133 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
3134 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3136 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3137 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3139 if (dump_file && (dump_flags & TDF_DETAILS))
3141 fprintf (dump_file, "Linearized: ");
3142 print_gimple_stmt (dump_file, stmt, 0, 0);
3145 reassociate_stats.linearized++;
3146 update_stmt (binrhs);
3147 update_stmt (binlhs);
3148 update_stmt (stmt);
3150 gimple_set_visited (stmt, true);
3151 gimple_set_visited (binlhs, true);
3152 gimple_set_visited (binrhs, true);
3154 /* Tail recurse on the new rhs if it still needs reassociation. */
3155 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3156 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3157 want to change the algorithm while converting to tuples. */
3158 linearize_expr (stmt);
3161 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3162 it. Otherwise, return NULL. */
3164 static gimple
3165 get_single_immediate_use (tree lhs)
3167 use_operand_p immuse;
3168 gimple immusestmt;
3170 if (TREE_CODE (lhs) == SSA_NAME
3171 && single_imm_use (lhs, &immuse, &immusestmt)
3172 && is_gimple_assign (immusestmt))
3173 return immusestmt;
3175 return NULL;
3178 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3179 representing the negated value. Insertions of any necessary
3180 instructions go before GSI.
3181 This function is recursive in that, if you hand it "a_5" as the
3182 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3183 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3185 static tree
3186 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
3188 gimple negatedefstmt= NULL;
3189 tree resultofnegate;
3191 /* If we are trying to negate a name, defined by an add, negate the
3192 add operands instead. */
3193 if (TREE_CODE (tonegate) == SSA_NAME)
3194 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3195 if (TREE_CODE (tonegate) == SSA_NAME
3196 && is_gimple_assign (negatedefstmt)
3197 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3198 && has_single_use (gimple_assign_lhs (negatedefstmt))
3199 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3201 gimple_stmt_iterator gsi;
3202 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3203 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3205 gsi = gsi_for_stmt (negatedefstmt);
3206 rhs1 = negate_value (rhs1, &gsi);
3207 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
3209 gsi = gsi_for_stmt (negatedefstmt);
3210 rhs2 = negate_value (rhs2, &gsi);
3211 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
3213 update_stmt (negatedefstmt);
3214 return gimple_assign_lhs (negatedefstmt);
3217 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3218 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
3219 NULL_TREE, true, GSI_SAME_STMT);
3220 return resultofnegate;
3223 /* Return true if we should break up the subtract in STMT into an add
3224 with negate. This is true when we the subtract operands are really
3225 adds, or the subtract itself is used in an add expression. In
3226 either case, breaking up the subtract into an add with negate
3227 exposes the adds to reassociation. */
3229 static bool
3230 should_break_up_subtract (gimple stmt)
3232 tree lhs = gimple_assign_lhs (stmt);
3233 tree binlhs = gimple_assign_rhs1 (stmt);
3234 tree binrhs = gimple_assign_rhs2 (stmt);
3235 gimple immusestmt;
3236 struct loop *loop = loop_containing_stmt (stmt);
3238 if (TREE_CODE (binlhs) == SSA_NAME
3239 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3240 return true;
3242 if (TREE_CODE (binrhs) == SSA_NAME
3243 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3244 return true;
3246 if (TREE_CODE (lhs) == SSA_NAME
3247 && (immusestmt = get_single_immediate_use (lhs))
3248 && is_gimple_assign (immusestmt)
3249 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3250 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3251 return true;
3252 return false;
3255 /* Transform STMT from A - B into A + -B. */
3257 static void
3258 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3260 tree rhs1 = gimple_assign_rhs1 (stmt);
3261 tree rhs2 = gimple_assign_rhs2 (stmt);
3263 if (dump_file && (dump_flags & TDF_DETAILS))
3265 fprintf (dump_file, "Breaking up subtract ");
3266 print_gimple_stmt (dump_file, stmt, 0, 0);
3269 rhs2 = negate_value (rhs2, gsip);
3270 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3271 update_stmt (stmt);
3274 /* Determine whether STMT is a builtin call that raises an SSA name
3275 to an integer power and has only one use. If so, and this is early
3276 reassociation and unsafe math optimizations are permitted, place
3277 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3278 If any of these conditions does not hold, return FALSE. */
3280 static bool
3281 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3283 tree fndecl, arg1;
3284 REAL_VALUE_TYPE c, cint;
3286 if (!first_pass_instance
3287 || !flag_unsafe_math_optimizations
3288 || !is_gimple_call (stmt)
3289 || !has_single_use (gimple_call_lhs (stmt)))
3290 return false;
3292 fndecl = gimple_call_fndecl (stmt);
3294 if (!fndecl
3295 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3296 return false;
3298 switch (DECL_FUNCTION_CODE (fndecl))
3300 CASE_FLT_FN (BUILT_IN_POW):
3301 *base = gimple_call_arg (stmt, 0);
3302 arg1 = gimple_call_arg (stmt, 1);
3304 if (TREE_CODE (arg1) != REAL_CST)
3305 return false;
3307 c = TREE_REAL_CST (arg1);
3309 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3310 return false;
3312 *exponent = real_to_integer (&c);
3313 real_from_integer (&cint, VOIDmode, *exponent,
3314 *exponent < 0 ? -1 : 0, 0);
3315 if (!real_identical (&c, &cint))
3316 return false;
3318 break;
3320 CASE_FLT_FN (BUILT_IN_POWI):
3321 *base = gimple_call_arg (stmt, 0);
3322 arg1 = gimple_call_arg (stmt, 1);
3324 if (!host_integerp (arg1, 0))
3325 return false;
3327 *exponent = TREE_INT_CST_LOW (arg1);
3328 break;
3330 default:
3331 return false;
3334 /* Expanding negative exponents is generally unproductive, so we don't
3335 complicate matters with those. Exponents of zero and one should
3336 have been handled by expression folding. */
3337 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3338 return false;
3340 return true;
3343 /* Recursively linearize a binary expression that is the RHS of STMT.
3344 Place the operands of the expression tree in the vector named OPS. */
3346 static void
3347 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3348 bool is_associative, bool set_visited)
3350 tree binlhs = gimple_assign_rhs1 (stmt);
3351 tree binrhs = gimple_assign_rhs2 (stmt);
3352 gimple binlhsdef = NULL, binrhsdef = NULL;
3353 bool binlhsisreassoc = false;
3354 bool binrhsisreassoc = false;
3355 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3356 struct loop *loop = loop_containing_stmt (stmt);
3357 tree base = NULL_TREE;
3358 HOST_WIDE_INT exponent = 0;
3360 if (set_visited)
3361 gimple_set_visited (stmt, true);
3363 if (TREE_CODE (binlhs) == SSA_NAME)
3365 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3366 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3367 && !stmt_could_throw_p (binlhsdef));
3370 if (TREE_CODE (binrhs) == SSA_NAME)
3372 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3373 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3374 && !stmt_could_throw_p (binrhsdef));
3377 /* If the LHS is not reassociable, but the RHS is, we need to swap
3378 them. If neither is reassociable, there is nothing we can do, so
3379 just put them in the ops vector. If the LHS is reassociable,
3380 linearize it. If both are reassociable, then linearize the RHS
3381 and the LHS. */
3383 if (!binlhsisreassoc)
3385 tree temp;
3387 /* If this is not a associative operation like division, give up. */
3388 if (!is_associative)
3390 add_to_ops_vec (ops, binrhs);
3391 return;
3394 if (!binrhsisreassoc)
3396 if (rhscode == MULT_EXPR
3397 && TREE_CODE (binrhs) == SSA_NAME
3398 && acceptable_pow_call (binrhsdef, &base, &exponent))
3400 add_repeat_to_ops_vec (ops, base, exponent);
3401 gimple_set_visited (binrhsdef, true);
3403 else
3404 add_to_ops_vec (ops, binrhs);
3406 if (rhscode == MULT_EXPR
3407 && TREE_CODE (binlhs) == SSA_NAME
3408 && acceptable_pow_call (binlhsdef, &base, &exponent))
3410 add_repeat_to_ops_vec (ops, base, exponent);
3411 gimple_set_visited (binlhsdef, true);
3413 else
3414 add_to_ops_vec (ops, binlhs);
3416 return;
3419 if (dump_file && (dump_flags & TDF_DETAILS))
3421 fprintf (dump_file, "swapping operands of ");
3422 print_gimple_stmt (dump_file, stmt, 0, 0);
3425 swap_tree_operands (stmt,
3426 gimple_assign_rhs1_ptr (stmt),
3427 gimple_assign_rhs2_ptr (stmt));
3428 update_stmt (stmt);
3430 if (dump_file && (dump_flags & TDF_DETAILS))
3432 fprintf (dump_file, " is now ");
3433 print_gimple_stmt (dump_file, stmt, 0, 0);
3436 /* We want to make it so the lhs is always the reassociative op,
3437 so swap. */
3438 temp = binlhs;
3439 binlhs = binrhs;
3440 binrhs = temp;
3442 else if (binrhsisreassoc)
3444 linearize_expr (stmt);
3445 binlhs = gimple_assign_rhs1 (stmt);
3446 binrhs = gimple_assign_rhs2 (stmt);
3449 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3450 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3451 rhscode, loop));
3452 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3453 is_associative, set_visited);
3455 if (rhscode == MULT_EXPR
3456 && TREE_CODE (binrhs) == SSA_NAME
3457 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3459 add_repeat_to_ops_vec (ops, base, exponent);
3460 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3462 else
3463 add_to_ops_vec (ops, binrhs);
3466 /* Repropagate the negates back into subtracts, since no other pass
3467 currently does it. */
3469 static void
3470 repropagate_negates (void)
3472 unsigned int i = 0;
3473 tree negate;
3475 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3477 gimple user = get_single_immediate_use (negate);
3479 if (!user || !is_gimple_assign (user))
3480 continue;
3482 /* The negate operand can be either operand of a PLUS_EXPR
3483 (it can be the LHS if the RHS is a constant for example).
3485 Force the negate operand to the RHS of the PLUS_EXPR, then
3486 transform the PLUS_EXPR into a MINUS_EXPR. */
3487 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3489 /* If the negated operand appears on the LHS of the
3490 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3491 to force the negated operand to the RHS of the PLUS_EXPR. */
3492 if (gimple_assign_rhs1 (user) == negate)
3494 swap_tree_operands (user,
3495 gimple_assign_rhs1_ptr (user),
3496 gimple_assign_rhs2_ptr (user));
3499 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3500 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3501 if (gimple_assign_rhs2 (user) == negate)
3503 tree rhs1 = gimple_assign_rhs1 (user);
3504 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3505 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3506 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3507 update_stmt (user);
3510 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3512 if (gimple_assign_rhs1 (user) == negate)
3514 /* We have
3515 x = -a
3516 y = x - b
3517 which we transform into
3518 x = a + b
3519 y = -x .
3520 This pushes down the negate which we possibly can merge
3521 into some other operation, hence insert it into the
3522 plus_negates vector. */
3523 gimple feed = SSA_NAME_DEF_STMT (negate);
3524 tree a = gimple_assign_rhs1 (feed);
3525 tree rhs2 = gimple_assign_rhs2 (user);
3526 gimple_stmt_iterator gsi = gsi_for_stmt (feed), gsi2;
3527 gimple_replace_lhs (feed, negate);
3528 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, a, rhs2);
3529 update_stmt (gsi_stmt (gsi));
3530 gsi2 = gsi_for_stmt (user);
3531 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, negate, NULL);
3532 update_stmt (gsi_stmt (gsi2));
3533 gsi_move_before (&gsi, &gsi2);
3534 plus_negates.safe_push (gimple_assign_lhs (gsi_stmt (gsi2)));
3536 else
3538 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3539 rid of one operation. */
3540 gimple feed = SSA_NAME_DEF_STMT (negate);
3541 tree a = gimple_assign_rhs1 (feed);
3542 tree rhs1 = gimple_assign_rhs1 (user);
3543 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3544 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3545 update_stmt (gsi_stmt (gsi));
3551 /* Returns true if OP is of a type for which we can do reassociation.
3552 That is for integral or non-saturating fixed-point types, and for
3553 floating point type when associative-math is enabled. */
3555 static bool
3556 can_reassociate_p (tree op)
3558 tree type = TREE_TYPE (op);
3559 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3560 || NON_SAT_FIXED_POINT_TYPE_P (type)
3561 || (flag_associative_math && FLOAT_TYPE_P (type)))
3562 return true;
3563 return false;
3566 /* Break up subtract operations in block BB.
3568 We do this top down because we don't know whether the subtract is
3569 part of a possible chain of reassociation except at the top.
3571 IE given
3572 d = f + g
3573 c = a + e
3574 b = c - d
3575 q = b - r
3576 k = t - q
3578 we want to break up k = t - q, but we won't until we've transformed q
3579 = b - r, which won't be broken up until we transform b = c - d.
3581 En passant, clear the GIMPLE visited flag on every statement. */
3583 static void
3584 break_up_subtract_bb (basic_block bb)
3586 gimple_stmt_iterator gsi;
3587 basic_block son;
3589 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3591 gimple stmt = gsi_stmt (gsi);
3592 gimple_set_visited (stmt, false);
3594 if (!is_gimple_assign (stmt)
3595 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3596 continue;
3598 /* Look for simple gimple subtract operations. */
3599 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3601 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3602 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3603 continue;
3605 /* Check for a subtract used only in an addition. If this
3606 is the case, transform it into add of a negate for better
3607 reassociation. IE transform C = A-B into C = A + -B if C
3608 is only used in an addition. */
3609 if (should_break_up_subtract (stmt))
3610 break_up_subtract (stmt, &gsi);
3612 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3613 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3614 plus_negates.safe_push (gimple_assign_lhs (stmt));
3616 for (son = first_dom_son (CDI_DOMINATORS, bb);
3617 son;
3618 son = next_dom_son (CDI_DOMINATORS, son))
3619 break_up_subtract_bb (son);
3622 /* Used for repeated factor analysis. */
3623 struct repeat_factor_d
3625 /* An SSA name that occurs in a multiply chain. */
3626 tree factor;
3628 /* Cached rank of the factor. */
3629 unsigned rank;
3631 /* Number of occurrences of the factor in the chain. */
3632 HOST_WIDE_INT count;
3634 /* An SSA name representing the product of this factor and
3635 all factors appearing later in the repeated factor vector. */
3636 tree repr;
3639 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
3640 typedef const struct repeat_factor_d *const_repeat_factor_t;
3643 static vec<repeat_factor> repeat_factor_vec;
3645 /* Used for sorting the repeat factor vector. Sort primarily by
3646 ascending occurrence count, secondarily by descending rank. */
3648 static int
3649 compare_repeat_factors (const void *x1, const void *x2)
3651 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
3652 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
3654 if (rf1->count != rf2->count)
3655 return rf1->count - rf2->count;
3657 return rf2->rank - rf1->rank;
3660 /* Look for repeated operands in OPS in the multiply tree rooted at
3661 STMT. Replace them with an optimal sequence of multiplies and powi
3662 builtin calls, and remove the used operands from OPS. Return an
3663 SSA name representing the value of the replacement sequence. */
3665 static tree
3666 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
3668 unsigned i, j, vec_len;
3669 int ii;
3670 operand_entry_t oe;
3671 repeat_factor_t rf1, rf2;
3672 repeat_factor rfnew;
3673 tree result = NULL_TREE;
3674 tree target_ssa, iter_result;
3675 tree type = TREE_TYPE (gimple_get_lhs (stmt));
3676 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
3677 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3678 gimple mul_stmt, pow_stmt;
3680 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
3681 target. */
3682 if (!powi_fndecl)
3683 return NULL_TREE;
3685 /* Allocate the repeated factor vector. */
3686 repeat_factor_vec.create (10);
3688 /* Scan the OPS vector for all SSA names in the product and build
3689 up a vector of occurrence counts for each factor. */
3690 FOR_EACH_VEC_ELT (*ops, i, oe)
3692 if (TREE_CODE (oe->op) == SSA_NAME)
3694 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3696 if (rf1->factor == oe->op)
3698 rf1->count += oe->count;
3699 break;
3703 if (j >= repeat_factor_vec.length ())
3705 rfnew.factor = oe->op;
3706 rfnew.rank = oe->rank;
3707 rfnew.count = oe->count;
3708 rfnew.repr = NULL_TREE;
3709 repeat_factor_vec.safe_push (rfnew);
3714 /* Sort the repeated factor vector by (a) increasing occurrence count,
3715 and (b) decreasing rank. */
3716 repeat_factor_vec.qsort (compare_repeat_factors);
3718 /* It is generally best to combine as many base factors as possible
3719 into a product before applying __builtin_powi to the result.
3720 However, the sort order chosen for the repeated factor vector
3721 allows us to cache partial results for the product of the base
3722 factors for subsequent use. When we already have a cached partial
3723 result from a previous iteration, it is best to make use of it
3724 before looking for another __builtin_pow opportunity.
3726 As an example, consider x * x * y * y * y * z * z * z * z.
3727 We want to first compose the product x * y * z, raise it to the
3728 second power, then multiply this by y * z, and finally multiply
3729 by z. This can be done in 5 multiplies provided we cache y * z
3730 for use in both expressions:
3732 t1 = y * z
3733 t2 = t1 * x
3734 t3 = t2 * t2
3735 t4 = t1 * t3
3736 result = t4 * z
3738 If we instead ignored the cached y * z and first multiplied by
3739 the __builtin_pow opportunity z * z, we would get the inferior:
3741 t1 = y * z
3742 t2 = t1 * x
3743 t3 = t2 * t2
3744 t4 = z * z
3745 t5 = t3 * t4
3746 result = t5 * y */
3748 vec_len = repeat_factor_vec.length ();
3750 /* Repeatedly look for opportunities to create a builtin_powi call. */
3751 while (true)
3753 HOST_WIDE_INT power;
3755 /* First look for the largest cached product of factors from
3756 preceding iterations. If found, create a builtin_powi for
3757 it if the minimum occurrence count for its factors is at
3758 least 2, or just use this cached product as our next
3759 multiplicand if the minimum occurrence count is 1. */
3760 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3762 if (rf1->repr && rf1->count > 0)
3763 break;
3766 if (j < vec_len)
3768 power = rf1->count;
3770 if (power == 1)
3772 iter_result = rf1->repr;
3774 if (dump_file && (dump_flags & TDF_DETAILS))
3776 unsigned elt;
3777 repeat_factor_t rf;
3778 fputs ("Multiplying by cached product ", dump_file);
3779 for (elt = j; elt < vec_len; elt++)
3781 rf = &repeat_factor_vec[elt];
3782 print_generic_expr (dump_file, rf->factor, 0);
3783 if (elt < vec_len - 1)
3784 fputs (" * ", dump_file);
3786 fputs ("\n", dump_file);
3789 else
3791 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3792 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3793 build_int_cst (integer_type_node,
3794 power));
3795 gimple_call_set_lhs (pow_stmt, iter_result);
3796 gimple_set_location (pow_stmt, gimple_location (stmt));
3797 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3799 if (dump_file && (dump_flags & TDF_DETAILS))
3801 unsigned elt;
3802 repeat_factor_t rf;
3803 fputs ("Building __builtin_pow call for cached product (",
3804 dump_file);
3805 for (elt = j; elt < vec_len; elt++)
3807 rf = &repeat_factor_vec[elt];
3808 print_generic_expr (dump_file, rf->factor, 0);
3809 if (elt < vec_len - 1)
3810 fputs (" * ", dump_file);
3812 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
3813 power);
3817 else
3819 /* Otherwise, find the first factor in the repeated factor
3820 vector whose occurrence count is at least 2. If no such
3821 factor exists, there are no builtin_powi opportunities
3822 remaining. */
3823 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
3825 if (rf1->count >= 2)
3826 break;
3829 if (j >= vec_len)
3830 break;
3832 power = rf1->count;
3834 if (dump_file && (dump_flags & TDF_DETAILS))
3836 unsigned elt;
3837 repeat_factor_t rf;
3838 fputs ("Building __builtin_pow call for (", dump_file);
3839 for (elt = j; elt < vec_len; elt++)
3841 rf = &repeat_factor_vec[elt];
3842 print_generic_expr (dump_file, rf->factor, 0);
3843 if (elt < vec_len - 1)
3844 fputs (" * ", dump_file);
3846 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
3849 reassociate_stats.pows_created++;
3851 /* Visit each element of the vector in reverse order (so that
3852 high-occurrence elements are visited first, and within the
3853 same occurrence count, lower-ranked elements are visited
3854 first). Form a linear product of all elements in this order
3855 whose occurrencce count is at least that of element J.
3856 Record the SSA name representing the product of each element
3857 with all subsequent elements in the vector. */
3858 if (j == vec_len - 1)
3859 rf1->repr = rf1->factor;
3860 else
3862 for (ii = vec_len - 2; ii >= (int)j; ii--)
3864 tree op1, op2;
3866 rf1 = &repeat_factor_vec[ii];
3867 rf2 = &repeat_factor_vec[ii + 1];
3869 /* Init the last factor's representative to be itself. */
3870 if (!rf2->repr)
3871 rf2->repr = rf2->factor;
3873 op1 = rf1->factor;
3874 op2 = rf2->repr;
3876 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
3877 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
3878 target_ssa,
3879 op1, op2);
3880 gimple_set_location (mul_stmt, gimple_location (stmt));
3881 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3882 rf1->repr = target_ssa;
3884 /* Don't reprocess the multiply we just introduced. */
3885 gimple_set_visited (mul_stmt, true);
3889 /* Form a call to __builtin_powi for the maximum product
3890 just formed, raised to the power obtained earlier. */
3891 rf1 = &repeat_factor_vec[j];
3892 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
3893 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
3894 build_int_cst (integer_type_node,
3895 power));
3896 gimple_call_set_lhs (pow_stmt, iter_result);
3897 gimple_set_location (pow_stmt, gimple_location (stmt));
3898 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
3901 /* If we previously formed at least one other builtin_powi call,
3902 form the product of this one and those others. */
3903 if (result)
3905 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
3906 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
3907 result, iter_result);
3908 gimple_set_location (mul_stmt, gimple_location (stmt));
3909 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
3910 gimple_set_visited (mul_stmt, true);
3911 result = new_result;
3913 else
3914 result = iter_result;
3916 /* Decrement the occurrence count of each element in the product
3917 by the count found above, and remove this many copies of each
3918 factor from OPS. */
3919 for (i = j; i < vec_len; i++)
3921 unsigned k = power;
3922 unsigned n;
3924 rf1 = &repeat_factor_vec[i];
3925 rf1->count -= power;
3927 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
3929 if (oe->op == rf1->factor)
3931 if (oe->count <= k)
3933 ops->ordered_remove (n);
3934 k -= oe->count;
3936 if (k == 0)
3937 break;
3939 else
3941 oe->count -= k;
3942 break;
3949 /* At this point all elements in the repeated factor vector have a
3950 remaining occurrence count of 0 or 1, and those with a count of 1
3951 don't have cached representatives. Re-sort the ops vector and
3952 clean up. */
3953 ops->qsort (sort_by_operand_rank);
3954 repeat_factor_vec.release ();
3956 /* Return the final product computed herein. Note that there may
3957 still be some elements with single occurrence count left in OPS;
3958 those will be handled by the normal reassociation logic. */
3959 return result;
3962 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
3964 static void
3965 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
3967 tree rhs1;
3969 if (dump_file && (dump_flags & TDF_DETAILS))
3971 fprintf (dump_file, "Transforming ");
3972 print_gimple_stmt (dump_file, stmt, 0, 0);
3975 rhs1 = gimple_assign_rhs1 (stmt);
3976 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
3977 update_stmt (stmt);
3978 remove_visited_stmt_chain (rhs1);
3980 if (dump_file && (dump_flags & TDF_DETAILS))
3982 fprintf (dump_file, " into ");
3983 print_gimple_stmt (dump_file, stmt, 0, 0);
3987 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
3989 static void
3990 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
3991 tree rhs1, tree rhs2)
3993 if (dump_file && (dump_flags & TDF_DETAILS))
3995 fprintf (dump_file, "Transforming ");
3996 print_gimple_stmt (dump_file, stmt, 0, 0);
3999 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4000 update_stmt (gsi_stmt (*gsi));
4001 remove_visited_stmt_chain (rhs1);
4003 if (dump_file && (dump_flags & TDF_DETAILS))
4005 fprintf (dump_file, " into ");
4006 print_gimple_stmt (dump_file, stmt, 0, 0);
4010 /* Reassociate expressions in basic block BB and its post-dominator as
4011 children. */
4013 static void
4014 reassociate_bb (basic_block bb)
4016 gimple_stmt_iterator gsi;
4017 basic_block son;
4018 gimple stmt = last_stmt (bb);
4020 if (stmt && !gimple_visited_p (stmt))
4021 maybe_optimize_range_tests (stmt);
4023 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4025 stmt = gsi_stmt (gsi);
4027 if (is_gimple_assign (stmt)
4028 && !stmt_could_throw_p (stmt))
4030 tree lhs, rhs1, rhs2;
4031 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4033 /* If this is not a gimple binary expression, there is
4034 nothing for us to do with it. */
4035 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4036 continue;
4038 /* If this was part of an already processed statement,
4039 we don't need to touch it again. */
4040 if (gimple_visited_p (stmt))
4042 /* This statement might have become dead because of previous
4043 reassociations. */
4044 if (has_zero_uses (gimple_get_lhs (stmt)))
4046 gsi_remove (&gsi, true);
4047 release_defs (stmt);
4048 /* We might end up removing the last stmt above which
4049 places the iterator to the end of the sequence.
4050 Reset it to the last stmt in this case which might
4051 be the end of the sequence as well if we removed
4052 the last statement of the sequence. In which case
4053 we need to bail out. */
4054 if (gsi_end_p (gsi))
4056 gsi = gsi_last_bb (bb);
4057 if (gsi_end_p (gsi))
4058 break;
4061 continue;
4064 lhs = gimple_assign_lhs (stmt);
4065 rhs1 = gimple_assign_rhs1 (stmt);
4066 rhs2 = gimple_assign_rhs2 (stmt);
4068 /* For non-bit or min/max operations we can't associate
4069 all types. Verify that here. */
4070 if (rhs_code != BIT_IOR_EXPR
4071 && rhs_code != BIT_AND_EXPR
4072 && rhs_code != BIT_XOR_EXPR
4073 && rhs_code != MIN_EXPR
4074 && rhs_code != MAX_EXPR
4075 && (!can_reassociate_p (lhs)
4076 || !can_reassociate_p (rhs1)
4077 || !can_reassociate_p (rhs2)))
4078 continue;
4080 if (associative_tree_code (rhs_code))
4082 vec<operand_entry_t> ops = vNULL;
4083 tree powi_result = NULL_TREE;
4085 /* There may be no immediate uses left by the time we
4086 get here because we may have eliminated them all. */
4087 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4088 continue;
4090 gimple_set_visited (stmt, true);
4091 linearize_expr_tree (&ops, stmt, true, true);
4092 ops.qsort (sort_by_operand_rank);
4093 optimize_ops_list (rhs_code, &ops);
4094 if (undistribute_ops_list (rhs_code, &ops,
4095 loop_containing_stmt (stmt)))
4097 ops.qsort (sort_by_operand_rank);
4098 optimize_ops_list (rhs_code, &ops);
4101 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4102 optimize_range_tests (rhs_code, &ops);
4104 if (first_pass_instance
4105 && rhs_code == MULT_EXPR
4106 && flag_unsafe_math_optimizations)
4107 powi_result = attempt_builtin_powi (stmt, &ops);
4109 /* If the operand vector is now empty, all operands were
4110 consumed by the __builtin_powi optimization. */
4111 if (ops.length () == 0)
4112 transform_stmt_to_copy (&gsi, stmt, powi_result);
4113 else if (ops.length () == 1)
4115 tree last_op = ops.last ()->op;
4117 if (powi_result)
4118 transform_stmt_to_multiply (&gsi, stmt, last_op,
4119 powi_result);
4120 else
4121 transform_stmt_to_copy (&gsi, stmt, last_op);
4123 else
4125 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4126 int ops_num = ops.length ();
4127 int width = get_reassociation_width (ops_num, rhs_code, mode);
4129 if (dump_file && (dump_flags & TDF_DETAILS))
4130 fprintf (dump_file,
4131 "Width = %d was chosen for reassociation\n", width);
4133 if (width > 1
4134 && ops.length () > 3)
4135 rewrite_expr_tree_parallel (stmt, width, ops);
4136 else
4137 rewrite_expr_tree (stmt, 0, ops, false);
4139 /* If we combined some repeated factors into a
4140 __builtin_powi call, multiply that result by the
4141 reassociated operands. */
4142 if (powi_result)
4144 gimple mul_stmt;
4145 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4146 tree target_ssa = make_temp_ssa_name (type, NULL,
4147 "reassocpow");
4148 gimple_set_lhs (stmt, target_ssa);
4149 update_stmt (stmt);
4150 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4151 powi_result,
4152 target_ssa);
4153 gimple_set_location (mul_stmt, gimple_location (stmt));
4154 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4158 ops.release ();
4162 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4163 son;
4164 son = next_dom_son (CDI_POST_DOMINATORS, son))
4165 reassociate_bb (son);
4168 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4169 void debug_ops_vector (vec<operand_entry_t> ops);
4171 /* Dump the operand entry vector OPS to FILE. */
4173 void
4174 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4176 operand_entry_t oe;
4177 unsigned int i;
4179 FOR_EACH_VEC_ELT (ops, i, oe)
4181 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4182 print_generic_expr (file, oe->op, 0);
4186 /* Dump the operand entry vector OPS to STDERR. */
4188 DEBUG_FUNCTION void
4189 debug_ops_vector (vec<operand_entry_t> ops)
4191 dump_ops_vector (stderr, ops);
4194 static void
4195 do_reassoc (void)
4197 break_up_subtract_bb (ENTRY_BLOCK_PTR);
4198 reassociate_bb (EXIT_BLOCK_PTR);
4201 /* Initialize the reassociation pass. */
4203 static void
4204 init_reassoc (void)
4206 int i;
4207 long rank = 2;
4208 int *bbs = XNEWVEC (int, n_basic_blocks - NUM_FIXED_BLOCKS);
4210 /* Find the loops, so that we can prevent moving calculations in
4211 them. */
4212 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4214 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4216 operand_entry_pool = create_alloc_pool ("operand entry pool",
4217 sizeof (struct operand_entry), 30);
4218 next_operand_entry_id = 0;
4220 /* Reverse RPO (Reverse Post Order) will give us something where
4221 deeper loops come later. */
4222 pre_and_rev_post_order_compute (NULL, bbs, false);
4223 bb_rank = XCNEWVEC (long, last_basic_block);
4224 operand_rank = pointer_map_create ();
4226 /* Give each default definition a distinct rank. This includes
4227 parameters and the static chain. Walk backwards over all
4228 SSA names so that we get proper rank ordering according
4229 to tree_swap_operands_p. */
4230 for (i = num_ssa_names - 1; i > 0; --i)
4232 tree name = ssa_name (i);
4233 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4234 insert_operand_rank (name, ++rank);
4237 /* Set up rank for each BB */
4238 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
4239 bb_rank[bbs[i]] = ++rank << 16;
4241 free (bbs);
4242 calculate_dominance_info (CDI_POST_DOMINATORS);
4243 plus_negates = vNULL;
4246 /* Cleanup after the reassociation pass, and print stats if
4247 requested. */
4249 static void
4250 fini_reassoc (void)
4252 statistics_counter_event (cfun, "Linearized",
4253 reassociate_stats.linearized);
4254 statistics_counter_event (cfun, "Constants eliminated",
4255 reassociate_stats.constants_eliminated);
4256 statistics_counter_event (cfun, "Ops eliminated",
4257 reassociate_stats.ops_eliminated);
4258 statistics_counter_event (cfun, "Statements rewritten",
4259 reassociate_stats.rewritten);
4260 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4261 reassociate_stats.pows_encountered);
4262 statistics_counter_event (cfun, "Built-in powi calls created",
4263 reassociate_stats.pows_created);
4265 pointer_map_destroy (operand_rank);
4266 free_alloc_pool (operand_entry_pool);
4267 free (bb_rank);
4268 plus_negates.release ();
4269 free_dominance_info (CDI_POST_DOMINATORS);
4270 loop_optimizer_finalize ();
4273 /* Gate and execute functions for Reassociation. */
4275 static unsigned int
4276 execute_reassoc (void)
4278 init_reassoc ();
4280 do_reassoc ();
4281 repropagate_negates ();
4283 fini_reassoc ();
4284 return 0;
4287 static bool
4288 gate_tree_ssa_reassoc (void)
4290 return flag_tree_reassoc != 0;
4293 struct gimple_opt_pass pass_reassoc =
4296 GIMPLE_PASS,
4297 "reassoc", /* name */
4298 OPTGROUP_NONE, /* optinfo_flags */
4299 gate_tree_ssa_reassoc, /* gate */
4300 execute_reassoc, /* execute */
4301 NULL, /* sub */
4302 NULL, /* next */
4303 0, /* static_pass_number */
4304 TV_TREE_REASSOC, /* tv_id */
4305 PROP_cfg | PROP_ssa, /* properties_required */
4306 0, /* properties_provided */
4307 0, /* properties_destroyed */
4308 0, /* todo_flags_start */
4309 TODO_verify_ssa
4310 | TODO_update_ssa_only_virtuals
4311 | TODO_verify_flow /* todo_flags_finish */