2014-04-14 Martin Jambor <mjambor@suse.cz>
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobe9e29e550f7c0553f3f47ec60d66d98f27feff56
1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-inline.h"
33 #include "pointer-set.h"
34 #include "tree-ssa-alias.h"
35 #include "internal-fn.h"
36 #include "gimple-fold.h"
37 #include "tree-eh.h"
38 #include "gimple-expr.h"
39 #include "is-a.h"
40 #include "gimple.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "gimple-ssa.h"
44 #include "tree-cfg.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
51 #include "expr.h"
52 #include "tree-dfa.h"
53 #include "tree-ssa.h"
54 #include "tree-iterator.h"
55 #include "tree-pass.h"
56 #include "alloc-pool.h"
57 #include "langhooks.h"
58 #include "cfgloop.h"
59 #include "flags.h"
60 #include "target.h"
61 #include "params.h"
62 #include "diagnostic-core.h"
64 /* This is a simple global reassociation pass. It is, in part, based
65 on the LLVM pass of the same name (They do some things more/less
66 than we do, in different orders, etc).
68 It consists of five steps:
70 1. Breaking up subtract operations into addition + negate, where
71 it would promote the reassociation of adds.
73 2. Left linearization of the expression trees, so that (A+B)+(C+D)
74 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
75 During linearization, we place the operands of the binary
76 expressions into a vector of operand_entry_t
78 3. Optimization of the operand lists, eliminating things like a +
79 -a, a & a, etc.
81 3a. Combine repeated factors with the same occurrence counts
82 into a __builtin_powi call that will later be optimized into
83 an optimal number of multiplies.
85 4. Rewrite the expression trees we linearized and optimized so
86 they are in proper rank order.
88 5. Repropagate negates, as nothing else will clean it up ATM.
90 A bit of theory on #4, since nobody seems to write anything down
91 about why it makes sense to do it the way they do it:
93 We could do this much nicer theoretically, but don't (for reasons
94 explained after how to do it theoretically nice :P).
96 In order to promote the most redundancy elimination, you want
97 binary expressions whose operands are the same rank (or
98 preferably, the same value) exposed to the redundancy eliminator,
99 for possible elimination.
101 So the way to do this if we really cared, is to build the new op
102 tree from the leaves to the roots, merging as you go, and putting the
103 new op on the end of the worklist, until you are left with one
104 thing on the worklist.
106 IE if you have to rewrite the following set of operands (listed with
107 rank in parentheses), with opcode PLUS_EXPR:
109 a (1), b (1), c (1), d (2), e (2)
112 We start with our merge worklist empty, and the ops list with all of
113 those on it.
115 You want to first merge all leaves of the same rank, as much as
116 possible.
118 So first build a binary op of
120 mergetmp = a + b, and put "mergetmp" on the merge worklist.
122 Because there is no three operand form of PLUS_EXPR, c is not going to
123 be exposed to redundancy elimination as a rank 1 operand.
125 So you might as well throw it on the merge worklist (you could also
126 consider it to now be a rank two operand, and merge it with d and e,
127 but in this case, you then have evicted e from a binary op. So at
128 least in this situation, you can't win.)
130 Then build a binary op of d + e
131 mergetmp2 = d + e
133 and put mergetmp2 on the merge worklist.
135 so merge worklist = {mergetmp, c, mergetmp2}
137 Continue building binary ops of these operations until you have only
138 one operation left on the worklist.
140 So we have
142 build binary op
143 mergetmp3 = mergetmp + c
145 worklist = {mergetmp2, mergetmp3}
147 mergetmp4 = mergetmp2 + mergetmp3
149 worklist = {mergetmp4}
151 because we have one operation left, we can now just set the original
152 statement equal to the result of that operation.
154 This will at least expose a + b and d + e to redundancy elimination
155 as binary operations.
157 For extra points, you can reuse the old statements to build the
158 mergetmps, since you shouldn't run out.
160 So why don't we do this?
162 Because it's expensive, and rarely will help. Most trees we are
163 reassociating have 3 or less ops. If they have 2 ops, they already
164 will be written into a nice single binary op. If you have 3 ops, a
165 single simple check suffices to tell you whether the first two are of the
166 same rank. If so, you know to order it
168 mergetmp = op1 + op2
169 newstmt = mergetmp + op3
171 instead of
172 mergetmp = op2 + op3
173 newstmt = mergetmp + op1
175 If all three are of the same rank, you can't expose them all in a
176 single binary operator anyway, so the above is *still* the best you
177 can do.
179 Thus, this is what we do. When we have three ops left, we check to see
180 what order to put them in, and call it a day. As a nod to vector sum
181 reduction, we check if any of the ops are really a phi node that is a
182 destructive update for the associating op, and keep the destructive
183 update together for vector sum reduction recognition. */
186 /* Statistics */
187 static struct
189 int linearized;
190 int constants_eliminated;
191 int ops_eliminated;
192 int rewritten;
193 int pows_encountered;
194 int pows_created;
195 } reassociate_stats;
197 /* Operator, rank pair. */
198 typedef struct operand_entry
200 unsigned int rank;
201 int id;
202 tree op;
203 unsigned int count;
204 } *operand_entry_t;
206 static alloc_pool operand_entry_pool;
208 /* This is used to assign a unique ID to each struct operand_entry
209 so that qsort results are identical on different hosts. */
210 static int next_operand_entry_id;
212 /* Starting rank number for a given basic block, so that we can rank
213 operations using unmovable instructions in that BB based on the bb
214 depth. */
215 static long *bb_rank;
217 /* Operand->rank hashtable. */
218 static struct pointer_map_t *operand_rank;
220 /* Forward decls. */
221 static long get_rank (tree);
222 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
225 /* Bias amount for loop-carried phis. We want this to be larger than
226 the depth of any reassociation tree we can see, but not larger than
227 the rank difference between two blocks. */
228 #define PHI_LOOP_BIAS (1 << 15)
230 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
231 an innermost loop, and the phi has only a single use which is inside
232 the loop, then the rank is the block rank of the loop latch plus an
233 extra bias for the loop-carried dependence. This causes expressions
234 calculated into an accumulator variable to be independent for each
235 iteration of the loop. If STMT is some other phi, the rank is the
236 block rank of its containing block. */
237 static long
238 phi_rank (gimple stmt)
240 basic_block bb = gimple_bb (stmt);
241 struct loop *father = bb->loop_father;
242 tree res;
243 unsigned i;
244 use_operand_p use;
245 gimple use_stmt;
247 /* We only care about real loops (those with a latch). */
248 if (!father->latch)
249 return bb_rank[bb->index];
251 /* Interesting phis must be in headers of innermost loops. */
252 if (bb != father->header
253 || father->inner)
254 return bb_rank[bb->index];
256 /* Ignore virtual SSA_NAMEs. */
257 res = gimple_phi_result (stmt);
258 if (virtual_operand_p (res))
259 return bb_rank[bb->index];
261 /* The phi definition must have a single use, and that use must be
262 within the loop. Otherwise this isn't an accumulator pattern. */
263 if (!single_imm_use (res, &use, &use_stmt)
264 || gimple_bb (use_stmt)->loop_father != father)
265 return bb_rank[bb->index];
267 /* Look for phi arguments from within the loop. If found, bias this phi. */
268 for (i = 0; i < gimple_phi_num_args (stmt); i++)
270 tree arg = gimple_phi_arg_def (stmt, i);
271 if (TREE_CODE (arg) == SSA_NAME
272 && !SSA_NAME_IS_DEFAULT_DEF (arg))
274 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
275 if (gimple_bb (def_stmt)->loop_father == father)
276 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
280 /* Must be an uninteresting phi. */
281 return bb_rank[bb->index];
284 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
285 loop-carried dependence of an innermost loop, return TRUE; else
286 return FALSE. */
287 static bool
288 loop_carried_phi (tree exp)
290 gimple phi_stmt;
291 long block_rank;
293 if (TREE_CODE (exp) != SSA_NAME
294 || SSA_NAME_IS_DEFAULT_DEF (exp))
295 return false;
297 phi_stmt = SSA_NAME_DEF_STMT (exp);
299 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
300 return false;
302 /* Non-loop-carried phis have block rank. Loop-carried phis have
303 an additional bias added in. If this phi doesn't have block rank,
304 it's biased and should not be propagated. */
305 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
307 if (phi_rank (phi_stmt) != block_rank)
308 return true;
310 return false;
313 /* Return the maximum of RANK and the rank that should be propagated
314 from expression OP. For most operands, this is just the rank of OP.
315 For loop-carried phis, the value is zero to avoid undoing the bias
316 in favor of the phi. */
317 static long
318 propagate_rank (long rank, tree op)
320 long op_rank;
322 if (loop_carried_phi (op))
323 return rank;
325 op_rank = get_rank (op);
327 return MAX (rank, op_rank);
330 /* Look up the operand rank structure for expression E. */
332 static inline long
333 find_operand_rank (tree e)
335 void **slot = pointer_map_contains (operand_rank, e);
336 return slot ? (long) (intptr_t) *slot : -1;
339 /* Insert {E,RANK} into the operand rank hashtable. */
341 static inline void
342 insert_operand_rank (tree e, long rank)
344 void **slot;
345 gcc_assert (rank > 0);
346 slot = pointer_map_insert (operand_rank, e);
347 gcc_assert (!*slot);
348 *slot = (void *) (intptr_t) rank;
351 /* Given an expression E, return the rank of the expression. */
353 static long
354 get_rank (tree e)
356 /* Constants have rank 0. */
357 if (is_gimple_min_invariant (e))
358 return 0;
360 /* SSA_NAME's have the rank of the expression they are the result
362 For globals and uninitialized values, the rank is 0.
363 For function arguments, use the pre-setup rank.
364 For PHI nodes, stores, asm statements, etc, we use the rank of
365 the BB.
366 For simple operations, the rank is the maximum rank of any of
367 its operands, or the bb_rank, whichever is less.
368 I make no claims that this is optimal, however, it gives good
369 results. */
371 /* We make an exception to the normal ranking system to break
372 dependences of accumulator variables in loops. Suppose we
373 have a simple one-block loop containing:
375 x_1 = phi(x_0, x_2)
376 b = a + x_1
377 c = b + d
378 x_2 = c + e
380 As shown, each iteration of the calculation into x is fully
381 dependent upon the iteration before it. We would prefer to
382 see this in the form:
384 x_1 = phi(x_0, x_2)
385 b = a + d
386 c = b + e
387 x_2 = c + x_1
389 If the loop is unrolled, the calculations of b and c from
390 different iterations can be interleaved.
392 To obtain this result during reassociation, we bias the rank
393 of the phi definition x_1 upward, when it is recognized as an
394 accumulator pattern. The artificial rank causes it to be
395 added last, providing the desired independence. */
397 if (TREE_CODE (e) == SSA_NAME)
399 gimple stmt;
400 long rank;
401 int i, n;
402 tree op;
404 if (SSA_NAME_IS_DEFAULT_DEF (e))
405 return find_operand_rank (e);
407 stmt = SSA_NAME_DEF_STMT (e);
408 if (gimple_code (stmt) == GIMPLE_PHI)
409 return phi_rank (stmt);
411 if (!is_gimple_assign (stmt)
412 || gimple_vdef (stmt))
413 return bb_rank[gimple_bb (stmt)->index];
415 /* If we already have a rank for this expression, use that. */
416 rank = find_operand_rank (e);
417 if (rank != -1)
418 return rank;
420 /* Otherwise, find the maximum rank for the operands. As an
421 exception, remove the bias from loop-carried phis when propagating
422 the rank so that dependent operations are not also biased. */
423 rank = 0;
424 if (gimple_assign_single_p (stmt))
426 tree rhs = gimple_assign_rhs1 (stmt);
427 n = TREE_OPERAND_LENGTH (rhs);
428 if (n == 0)
429 rank = propagate_rank (rank, rhs);
430 else
432 for (i = 0; i < n; i++)
434 op = TREE_OPERAND (rhs, i);
436 if (op != NULL_TREE)
437 rank = propagate_rank (rank, op);
441 else
443 n = gimple_num_ops (stmt);
444 for (i = 1; i < n; i++)
446 op = gimple_op (stmt, i);
447 gcc_assert (op);
448 rank = propagate_rank (rank, op);
452 if (dump_file && (dump_flags & TDF_DETAILS))
454 fprintf (dump_file, "Rank for ");
455 print_generic_expr (dump_file, e, 0);
456 fprintf (dump_file, " is %ld\n", (rank + 1));
459 /* Note the rank in the hashtable so we don't recompute it. */
460 insert_operand_rank (e, (rank + 1));
461 return (rank + 1);
464 /* Globals, etc, are rank 0 */
465 return 0;
469 /* We want integer ones to end up last no matter what, since they are
470 the ones we can do the most with. */
471 #define INTEGER_CONST_TYPE 1 << 3
472 #define FLOAT_CONST_TYPE 1 << 2
473 #define OTHER_CONST_TYPE 1 << 1
475 /* Classify an invariant tree into integer, float, or other, so that
476 we can sort them to be near other constants of the same type. */
477 static inline int
478 constant_type (tree t)
480 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
481 return INTEGER_CONST_TYPE;
482 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
483 return FLOAT_CONST_TYPE;
484 else
485 return OTHER_CONST_TYPE;
488 /* qsort comparison function to sort operand entries PA and PB by rank
489 so that the sorted array is ordered by rank in decreasing order. */
490 static int
491 sort_by_operand_rank (const void *pa, const void *pb)
493 const operand_entry_t oea = *(const operand_entry_t *)pa;
494 const operand_entry_t oeb = *(const operand_entry_t *)pb;
496 /* It's nicer for optimize_expression if constants that are likely
497 to fold when added/multiplied//whatever are put next to each
498 other. Since all constants have rank 0, order them by type. */
499 if (oeb->rank == 0 && oea->rank == 0)
501 if (constant_type (oeb->op) != constant_type (oea->op))
502 return constant_type (oeb->op) - constant_type (oea->op);
503 else
504 /* To make sorting result stable, we use unique IDs to determine
505 order. */
506 return oeb->id - oea->id;
509 /* Lastly, make sure the versions that are the same go next to each
510 other. */
511 if ((oeb->rank - oea->rank == 0)
512 && TREE_CODE (oea->op) == SSA_NAME
513 && TREE_CODE (oeb->op) == SSA_NAME)
515 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
516 versions of removed SSA_NAMEs, so if possible, prefer to sort
517 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
518 See PR60418. */
519 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
520 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
521 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
523 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
524 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
525 basic_block bba = gimple_bb (stmta);
526 basic_block bbb = gimple_bb (stmtb);
527 if (bbb != bba)
529 if (bb_rank[bbb->index] != bb_rank[bba->index])
530 return bb_rank[bbb->index] - bb_rank[bba->index];
532 else
534 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
535 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
536 if (da != db)
537 return da ? 1 : -1;
541 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
542 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
543 else
544 return oeb->id - oea->id;
547 if (oeb->rank != oea->rank)
548 return oeb->rank - oea->rank;
549 else
550 return oeb->id - oea->id;
553 /* Add an operand entry to *OPS for the tree operand OP. */
555 static void
556 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
558 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
560 oe->op = op;
561 oe->rank = get_rank (op);
562 oe->id = next_operand_entry_id++;
563 oe->count = 1;
564 ops->safe_push (oe);
567 /* Add an operand entry to *OPS for the tree operand OP with repeat
568 count REPEAT. */
570 static void
571 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
572 HOST_WIDE_INT repeat)
574 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
576 oe->op = op;
577 oe->rank = get_rank (op);
578 oe->id = next_operand_entry_id++;
579 oe->count = repeat;
580 ops->safe_push (oe);
582 reassociate_stats.pows_encountered++;
585 /* Return true if STMT is reassociable operation containing a binary
586 operation with tree code CODE, and is inside LOOP. */
588 static bool
589 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
591 basic_block bb = gimple_bb (stmt);
593 if (gimple_bb (stmt) == NULL)
594 return false;
596 if (!flow_bb_inside_loop_p (loop, bb))
597 return false;
599 if (is_gimple_assign (stmt)
600 && gimple_assign_rhs_code (stmt) == code
601 && has_single_use (gimple_assign_lhs (stmt)))
602 return true;
604 return false;
608 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
609 operand of the negate operation. Otherwise, return NULL. */
611 static tree
612 get_unary_op (tree name, enum tree_code opcode)
614 gimple stmt = SSA_NAME_DEF_STMT (name);
616 if (!is_gimple_assign (stmt))
617 return NULL_TREE;
619 if (gimple_assign_rhs_code (stmt) == opcode)
620 return gimple_assign_rhs1 (stmt);
621 return NULL_TREE;
624 /* If CURR and LAST are a pair of ops that OPCODE allows us to
625 eliminate through equivalences, do so, remove them from OPS, and
626 return true. Otherwise, return false. */
628 static bool
629 eliminate_duplicate_pair (enum tree_code opcode,
630 vec<operand_entry_t> *ops,
631 bool *all_done,
632 unsigned int i,
633 operand_entry_t curr,
634 operand_entry_t last)
637 /* If we have two of the same op, and the opcode is & |, min, or max,
638 we can eliminate one of them.
639 If we have two of the same op, and the opcode is ^, we can
640 eliminate both of them. */
642 if (last && last->op == curr->op)
644 switch (opcode)
646 case MAX_EXPR:
647 case MIN_EXPR:
648 case BIT_IOR_EXPR:
649 case BIT_AND_EXPR:
650 if (dump_file && (dump_flags & TDF_DETAILS))
652 fprintf (dump_file, "Equivalence: ");
653 print_generic_expr (dump_file, curr->op, 0);
654 fprintf (dump_file, " [&|minmax] ");
655 print_generic_expr (dump_file, last->op, 0);
656 fprintf (dump_file, " -> ");
657 print_generic_stmt (dump_file, last->op, 0);
660 ops->ordered_remove (i);
661 reassociate_stats.ops_eliminated ++;
663 return true;
665 case BIT_XOR_EXPR:
666 if (dump_file && (dump_flags & TDF_DETAILS))
668 fprintf (dump_file, "Equivalence: ");
669 print_generic_expr (dump_file, curr->op, 0);
670 fprintf (dump_file, " ^ ");
671 print_generic_expr (dump_file, last->op, 0);
672 fprintf (dump_file, " -> nothing\n");
675 reassociate_stats.ops_eliminated += 2;
677 if (ops->length () == 2)
679 ops->create (0);
680 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
681 *all_done = true;
683 else
685 ops->ordered_remove (i-1);
686 ops->ordered_remove (i-1);
689 return true;
691 default:
692 break;
695 return false;
698 static vec<tree> plus_negates;
700 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
701 expression, look in OPS for a corresponding positive operation to cancel
702 it out. If we find one, remove the other from OPS, replace
703 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
704 return false. */
706 static bool
707 eliminate_plus_minus_pair (enum tree_code opcode,
708 vec<operand_entry_t> *ops,
709 unsigned int currindex,
710 operand_entry_t curr)
712 tree negateop;
713 tree notop;
714 unsigned int i;
715 operand_entry_t oe;
717 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
718 return false;
720 negateop = get_unary_op (curr->op, NEGATE_EXPR);
721 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
722 if (negateop == NULL_TREE && notop == NULL_TREE)
723 return false;
725 /* Any non-negated version will have a rank that is one less than
726 the current rank. So once we hit those ranks, if we don't find
727 one, we can stop. */
729 for (i = currindex + 1;
730 ops->iterate (i, &oe)
731 && oe->rank >= curr->rank - 1 ;
732 i++)
734 if (oe->op == negateop)
737 if (dump_file && (dump_flags & TDF_DETAILS))
739 fprintf (dump_file, "Equivalence: ");
740 print_generic_expr (dump_file, negateop, 0);
741 fprintf (dump_file, " + -");
742 print_generic_expr (dump_file, oe->op, 0);
743 fprintf (dump_file, " -> 0\n");
746 ops->ordered_remove (i);
747 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
748 ops->ordered_remove (currindex);
749 reassociate_stats.ops_eliminated ++;
751 return true;
753 else if (oe->op == notop)
755 tree op_type = TREE_TYPE (oe->op);
757 if (dump_file && (dump_flags & TDF_DETAILS))
759 fprintf (dump_file, "Equivalence: ");
760 print_generic_expr (dump_file, notop, 0);
761 fprintf (dump_file, " + ~");
762 print_generic_expr (dump_file, oe->op, 0);
763 fprintf (dump_file, " -> -1\n");
766 ops->ordered_remove (i);
767 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
768 ops->ordered_remove (currindex);
769 reassociate_stats.ops_eliminated ++;
771 return true;
775 /* CURR->OP is a negate expr in a plus expr: save it for later
776 inspection in repropagate_negates(). */
777 if (negateop != NULL_TREE)
778 plus_negates.safe_push (curr->op);
780 return false;
783 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
784 bitwise not expression, look in OPS for a corresponding operand to
785 cancel it out. If we find one, remove the other from OPS, replace
786 OPS[CURRINDEX] with 0, and return true. Otherwise, return
787 false. */
789 static bool
790 eliminate_not_pairs (enum tree_code opcode,
791 vec<operand_entry_t> *ops,
792 unsigned int currindex,
793 operand_entry_t curr)
795 tree notop;
796 unsigned int i;
797 operand_entry_t oe;
799 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
800 || TREE_CODE (curr->op) != SSA_NAME)
801 return false;
803 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
804 if (notop == NULL_TREE)
805 return false;
807 /* Any non-not version will have a rank that is one less than
808 the current rank. So once we hit those ranks, if we don't find
809 one, we can stop. */
811 for (i = currindex + 1;
812 ops->iterate (i, &oe)
813 && oe->rank >= curr->rank - 1;
814 i++)
816 if (oe->op == notop)
818 if (dump_file && (dump_flags & TDF_DETAILS))
820 fprintf (dump_file, "Equivalence: ");
821 print_generic_expr (dump_file, notop, 0);
822 if (opcode == BIT_AND_EXPR)
823 fprintf (dump_file, " & ~");
824 else if (opcode == BIT_IOR_EXPR)
825 fprintf (dump_file, " | ~");
826 print_generic_expr (dump_file, oe->op, 0);
827 if (opcode == BIT_AND_EXPR)
828 fprintf (dump_file, " -> 0\n");
829 else if (opcode == BIT_IOR_EXPR)
830 fprintf (dump_file, " -> -1\n");
833 if (opcode == BIT_AND_EXPR)
834 oe->op = build_zero_cst (TREE_TYPE (oe->op));
835 else if (opcode == BIT_IOR_EXPR)
836 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
838 reassociate_stats.ops_eliminated += ops->length () - 1;
839 ops->truncate (0);
840 ops->quick_push (oe);
841 return true;
845 return false;
848 /* Use constant value that may be present in OPS to try to eliminate
849 operands. Note that this function is only really used when we've
850 eliminated ops for other reasons, or merged constants. Across
851 single statements, fold already does all of this, plus more. There
852 is little point in duplicating logic, so I've only included the
853 identities that I could ever construct testcases to trigger. */
855 static void
856 eliminate_using_constants (enum tree_code opcode,
857 vec<operand_entry_t> *ops)
859 operand_entry_t oelast = ops->last ();
860 tree type = TREE_TYPE (oelast->op);
862 if (oelast->rank == 0
863 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
865 switch (opcode)
867 case BIT_AND_EXPR:
868 if (integer_zerop (oelast->op))
870 if (ops->length () != 1)
872 if (dump_file && (dump_flags & TDF_DETAILS))
873 fprintf (dump_file, "Found & 0, removing all other ops\n");
875 reassociate_stats.ops_eliminated += ops->length () - 1;
877 ops->truncate (0);
878 ops->quick_push (oelast);
879 return;
882 else if (integer_all_onesp (oelast->op))
884 if (ops->length () != 1)
886 if (dump_file && (dump_flags & TDF_DETAILS))
887 fprintf (dump_file, "Found & -1, removing\n");
888 ops->pop ();
889 reassociate_stats.ops_eliminated++;
892 break;
893 case BIT_IOR_EXPR:
894 if (integer_all_onesp (oelast->op))
896 if (ops->length () != 1)
898 if (dump_file && (dump_flags & TDF_DETAILS))
899 fprintf (dump_file, "Found | -1, removing all other ops\n");
901 reassociate_stats.ops_eliminated += ops->length () - 1;
903 ops->truncate (0);
904 ops->quick_push (oelast);
905 return;
908 else if (integer_zerop (oelast->op))
910 if (ops->length () != 1)
912 if (dump_file && (dump_flags & TDF_DETAILS))
913 fprintf (dump_file, "Found | 0, removing\n");
914 ops->pop ();
915 reassociate_stats.ops_eliminated++;
918 break;
919 case MULT_EXPR:
920 if (integer_zerop (oelast->op)
921 || (FLOAT_TYPE_P (type)
922 && !HONOR_NANS (TYPE_MODE (type))
923 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
924 && real_zerop (oelast->op)))
926 if (ops->length () != 1)
928 if (dump_file && (dump_flags & TDF_DETAILS))
929 fprintf (dump_file, "Found * 0, removing all other ops\n");
931 reassociate_stats.ops_eliminated += ops->length () - 1;
932 ops->truncate (1);
933 ops->quick_push (oelast);
934 return;
937 else if (integer_onep (oelast->op)
938 || (FLOAT_TYPE_P (type)
939 && !HONOR_SNANS (TYPE_MODE (type))
940 && real_onep (oelast->op)))
942 if (ops->length () != 1)
944 if (dump_file && (dump_flags & TDF_DETAILS))
945 fprintf (dump_file, "Found * 1, removing\n");
946 ops->pop ();
947 reassociate_stats.ops_eliminated++;
948 return;
951 break;
952 case BIT_XOR_EXPR:
953 case PLUS_EXPR:
954 case MINUS_EXPR:
955 if (integer_zerop (oelast->op)
956 || (FLOAT_TYPE_P (type)
957 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
958 && fold_real_zero_addition_p (type, oelast->op,
959 opcode == MINUS_EXPR)))
961 if (ops->length () != 1)
963 if (dump_file && (dump_flags & TDF_DETAILS))
964 fprintf (dump_file, "Found [|^+] 0, removing\n");
965 ops->pop ();
966 reassociate_stats.ops_eliminated++;
967 return;
970 break;
971 default:
972 break;
978 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
979 bool, bool);
981 /* Structure for tracking and counting operands. */
982 typedef struct oecount_s {
983 int cnt;
984 int id;
985 enum tree_code oecode;
986 tree op;
987 } oecount;
990 /* The heap for the oecount hashtable and the sorted list of operands. */
991 static vec<oecount> cvec;
994 /* Oecount hashtable helpers. */
996 struct oecount_hasher : typed_noop_remove <void>
998 /* Note that this hash table stores integers, not pointers.
999 So, observe the casting in the member functions. */
1000 typedef void value_type;
1001 typedef void compare_type;
1002 static inline hashval_t hash (const value_type *);
1003 static inline bool equal (const value_type *, const compare_type *);
1006 /* Hash function for oecount. */
1008 inline hashval_t
1009 oecount_hasher::hash (const value_type *p)
1011 const oecount *c = &cvec[(size_t)p - 42];
1012 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1015 /* Comparison function for oecount. */
1017 inline bool
1018 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
1020 const oecount *c1 = &cvec[(size_t)p1 - 42];
1021 const oecount *c2 = &cvec[(size_t)p2 - 42];
1022 return (c1->oecode == c2->oecode
1023 && c1->op == c2->op);
1026 /* Comparison function for qsort sorting oecount elements by count. */
1028 static int
1029 oecount_cmp (const void *p1, const void *p2)
1031 const oecount *c1 = (const oecount *)p1;
1032 const oecount *c2 = (const oecount *)p2;
1033 if (c1->cnt != c2->cnt)
1034 return c1->cnt - c2->cnt;
1035 else
1036 /* If counts are identical, use unique IDs to stabilize qsort. */
1037 return c1->id - c2->id;
1040 /* Return TRUE iff STMT represents a builtin call that raises OP
1041 to some exponent. */
1043 static bool
1044 stmt_is_power_of_op (gimple stmt, tree op)
1046 tree fndecl;
1048 if (!is_gimple_call (stmt))
1049 return false;
1051 fndecl = gimple_call_fndecl (stmt);
1053 if (!fndecl
1054 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1055 return false;
1057 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1059 CASE_FLT_FN (BUILT_IN_POW):
1060 CASE_FLT_FN (BUILT_IN_POWI):
1061 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1063 default:
1064 return false;
1068 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1069 in place and return the result. Assumes that stmt_is_power_of_op
1070 was previously called for STMT and returned TRUE. */
1072 static HOST_WIDE_INT
1073 decrement_power (gimple stmt)
1075 REAL_VALUE_TYPE c, cint;
1076 HOST_WIDE_INT power;
1077 tree arg1;
1079 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1081 CASE_FLT_FN (BUILT_IN_POW):
1082 arg1 = gimple_call_arg (stmt, 1);
1083 c = TREE_REAL_CST (arg1);
1084 power = real_to_integer (&c) - 1;
1085 real_from_integer (&cint, VOIDmode, power, 0, 0);
1086 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1087 return power;
1089 CASE_FLT_FN (BUILT_IN_POWI):
1090 arg1 = gimple_call_arg (stmt, 1);
1091 power = TREE_INT_CST_LOW (arg1) - 1;
1092 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1093 return power;
1095 default:
1096 gcc_unreachable ();
1100 /* Find the single immediate use of STMT's LHS, and replace it
1101 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1102 replace *DEF with OP as well. */
1104 static void
1105 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1107 tree lhs;
1108 gimple use_stmt;
1109 use_operand_p use;
1110 gimple_stmt_iterator gsi;
1112 if (is_gimple_call (stmt))
1113 lhs = gimple_call_lhs (stmt);
1114 else
1115 lhs = gimple_assign_lhs (stmt);
1117 gcc_assert (has_single_use (lhs));
1118 single_imm_use (lhs, &use, &use_stmt);
1119 if (lhs == *def)
1120 *def = op;
1121 SET_USE (use, op);
1122 if (TREE_CODE (op) != SSA_NAME)
1123 update_stmt (use_stmt);
1124 gsi = gsi_for_stmt (stmt);
1125 unlink_stmt_vdef (stmt);
1126 gsi_remove (&gsi, true);
1127 release_defs (stmt);
1130 /* Walks the linear chain with result *DEF searching for an operation
1131 with operand OP and code OPCODE removing that from the chain. *DEF
1132 is updated if there is only one operand but no operation left. */
1134 static void
1135 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1137 gimple stmt = SSA_NAME_DEF_STMT (*def);
1141 tree name;
1143 if (opcode == MULT_EXPR
1144 && stmt_is_power_of_op (stmt, op))
1146 if (decrement_power (stmt) == 1)
1147 propagate_op_to_single_use (op, stmt, def);
1148 return;
1151 name = gimple_assign_rhs1 (stmt);
1153 /* If this is the operation we look for and one of the operands
1154 is ours simply propagate the other operand into the stmts
1155 single use. */
1156 if (gimple_assign_rhs_code (stmt) == opcode
1157 && (name == op
1158 || gimple_assign_rhs2 (stmt) == op))
1160 if (name == op)
1161 name = gimple_assign_rhs2 (stmt);
1162 propagate_op_to_single_use (name, stmt, def);
1163 return;
1166 /* We might have a multiply of two __builtin_pow* calls, and
1167 the operand might be hiding in the rightmost one. */
1168 if (opcode == MULT_EXPR
1169 && gimple_assign_rhs_code (stmt) == opcode
1170 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1171 && has_single_use (gimple_assign_rhs2 (stmt)))
1173 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1174 if (stmt_is_power_of_op (stmt2, op))
1176 if (decrement_power (stmt2) == 1)
1177 propagate_op_to_single_use (op, stmt2, def);
1178 return;
1182 /* Continue walking the chain. */
1183 gcc_assert (name != op
1184 && TREE_CODE (name) == SSA_NAME);
1185 stmt = SSA_NAME_DEF_STMT (name);
1187 while (1);
1190 /* Returns true if statement S1 dominates statement S2. Like
1191 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1193 static bool
1194 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1196 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1198 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1199 SSA_NAME. Assume it lives at the beginning of function and
1200 thus dominates everything. */
1201 if (!bb1 || s1 == s2)
1202 return true;
1204 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1205 if (!bb2)
1206 return false;
1208 if (bb1 == bb2)
1210 /* PHIs in the same basic block are assumed to be
1211 executed all in parallel, if only one stmt is a PHI,
1212 it dominates the other stmt in the same basic block. */
1213 if (gimple_code (s1) == GIMPLE_PHI)
1214 return true;
1216 if (gimple_code (s2) == GIMPLE_PHI)
1217 return false;
1219 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1221 if (gimple_uid (s1) < gimple_uid (s2))
1222 return true;
1224 if (gimple_uid (s1) > gimple_uid (s2))
1225 return false;
1227 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1228 unsigned int uid = gimple_uid (s1);
1229 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1231 gimple s = gsi_stmt (gsi);
1232 if (gimple_uid (s) != uid)
1233 break;
1234 if (s == s2)
1235 return true;
1238 return false;
1241 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1244 /* Insert STMT after INSERT_POINT. */
1246 static void
1247 insert_stmt_after (gimple stmt, gimple insert_point)
1249 gimple_stmt_iterator gsi;
1250 basic_block bb;
1252 if (gimple_code (insert_point) == GIMPLE_PHI)
1253 bb = gimple_bb (insert_point);
1254 else if (!stmt_ends_bb_p (insert_point))
1256 gsi = gsi_for_stmt (insert_point);
1257 gimple_set_uid (stmt, gimple_uid (insert_point));
1258 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1259 return;
1261 else
1262 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1263 thus if it must end a basic block, it should be a call that can
1264 throw, or some assignment that can throw. If it throws, the LHS
1265 of it will not be initialized though, so only valid places using
1266 the SSA_NAME should be dominated by the fallthru edge. */
1267 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1268 gsi = gsi_after_labels (bb);
1269 if (gsi_end_p (gsi))
1271 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1272 gimple_set_uid (stmt,
1273 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1275 else
1276 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1277 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1280 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1281 the result. Places the statement after the definition of either
1282 OP1 or OP2. Returns the new statement. */
1284 static gimple
1285 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1287 gimple op1def = NULL, op2def = NULL;
1288 gimple_stmt_iterator gsi;
1289 tree op;
1290 gimple sum;
1292 /* Create the addition statement. */
1293 op = make_ssa_name (type, NULL);
1294 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1296 /* Find an insertion place and insert. */
1297 if (TREE_CODE (op1) == SSA_NAME)
1298 op1def = SSA_NAME_DEF_STMT (op1);
1299 if (TREE_CODE (op2) == SSA_NAME)
1300 op2def = SSA_NAME_DEF_STMT (op2);
1301 if ((!op1def || gimple_nop_p (op1def))
1302 && (!op2def || gimple_nop_p (op2def)))
1304 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1305 if (gsi_end_p (gsi))
1307 gimple_stmt_iterator gsi2
1308 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1309 gimple_set_uid (sum,
1310 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1312 else
1313 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1314 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1316 else
1318 gimple insert_point;
1319 if ((!op1def || gimple_nop_p (op1def))
1320 || (op2def && !gimple_nop_p (op2def)
1321 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1322 insert_point = op2def;
1323 else
1324 insert_point = op1def;
1325 insert_stmt_after (sum, insert_point);
1327 update_stmt (sum);
1329 return sum;
1332 /* Perform un-distribution of divisions and multiplications.
1333 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1334 to (A + B) / X for real X.
1336 The algorithm is organized as follows.
1338 - First we walk the addition chain *OPS looking for summands that
1339 are defined by a multiplication or a real division. This results
1340 in the candidates bitmap with relevant indices into *OPS.
1342 - Second we build the chains of multiplications or divisions for
1343 these candidates, counting the number of occurrences of (operand, code)
1344 pairs in all of the candidates chains.
1346 - Third we sort the (operand, code) pairs by number of occurrence and
1347 process them starting with the pair with the most uses.
1349 * For each such pair we walk the candidates again to build a
1350 second candidate bitmap noting all multiplication/division chains
1351 that have at least one occurrence of (operand, code).
1353 * We build an alternate addition chain only covering these
1354 candidates with one (operand, code) operation removed from their
1355 multiplication/division chain.
1357 * The first candidate gets replaced by the alternate addition chain
1358 multiplied/divided by the operand.
1360 * All candidate chains get disabled for further processing and
1361 processing of (operand, code) pairs continues.
1363 The alternate addition chains built are re-processed by the main
1364 reassociation algorithm which allows optimizing a * x * y + b * y * x
1365 to (a + b ) * x * y in one invocation of the reassociation pass. */
1367 static bool
1368 undistribute_ops_list (enum tree_code opcode,
1369 vec<operand_entry_t> *ops, struct loop *loop)
1371 unsigned int length = ops->length ();
1372 operand_entry_t oe1;
1373 unsigned i, j;
1374 sbitmap candidates, candidates2;
1375 unsigned nr_candidates, nr_candidates2;
1376 sbitmap_iterator sbi0;
1377 vec<operand_entry_t> *subops;
1378 hash_table <oecount_hasher> ctable;
1379 bool changed = false;
1380 int next_oecount_id = 0;
1382 if (length <= 1
1383 || opcode != PLUS_EXPR)
1384 return false;
1386 /* Build a list of candidates to process. */
1387 candidates = sbitmap_alloc (length);
1388 bitmap_clear (candidates);
1389 nr_candidates = 0;
1390 FOR_EACH_VEC_ELT (*ops, i, oe1)
1392 enum tree_code dcode;
1393 gimple oe1def;
1395 if (TREE_CODE (oe1->op) != SSA_NAME)
1396 continue;
1397 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1398 if (!is_gimple_assign (oe1def))
1399 continue;
1400 dcode = gimple_assign_rhs_code (oe1def);
1401 if ((dcode != MULT_EXPR
1402 && dcode != RDIV_EXPR)
1403 || !is_reassociable_op (oe1def, dcode, loop))
1404 continue;
1406 bitmap_set_bit (candidates, i);
1407 nr_candidates++;
1410 if (nr_candidates < 2)
1412 sbitmap_free (candidates);
1413 return false;
1416 if (dump_file && (dump_flags & TDF_DETAILS))
1418 fprintf (dump_file, "searching for un-distribute opportunities ");
1419 print_generic_expr (dump_file,
1420 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1421 fprintf (dump_file, " %d\n", nr_candidates);
1424 /* Build linearized sub-operand lists and the counting table. */
1425 cvec.create (0);
1426 ctable.create (15);
1427 /* ??? Macro arguments cannot have multi-argument template types in
1428 them. This typedef is needed to workaround that limitation. */
1429 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1430 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1431 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1433 gimple oedef;
1434 enum tree_code oecode;
1435 unsigned j;
1437 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1438 oecode = gimple_assign_rhs_code (oedef);
1439 linearize_expr_tree (&subops[i], oedef,
1440 associative_tree_code (oecode), false);
1442 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1444 oecount c;
1445 void **slot;
1446 size_t idx;
1447 c.oecode = oecode;
1448 c.cnt = 1;
1449 c.id = next_oecount_id++;
1450 c.op = oe1->op;
1451 cvec.safe_push (c);
1452 idx = cvec.length () + 41;
1453 slot = ctable.find_slot ((void *)idx, INSERT);
1454 if (!*slot)
1456 *slot = (void *)idx;
1458 else
1460 cvec.pop ();
1461 cvec[(size_t)*slot - 42].cnt++;
1465 ctable.dispose ();
1467 /* Sort the counting table. */
1468 cvec.qsort (oecount_cmp);
1470 if (dump_file && (dump_flags & TDF_DETAILS))
1472 oecount *c;
1473 fprintf (dump_file, "Candidates:\n");
1474 FOR_EACH_VEC_ELT (cvec, j, c)
1476 fprintf (dump_file, " %u %s: ", c->cnt,
1477 c->oecode == MULT_EXPR
1478 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1479 print_generic_expr (dump_file, c->op, 0);
1480 fprintf (dump_file, "\n");
1484 /* Process the (operand, code) pairs in order of most occurrence. */
1485 candidates2 = sbitmap_alloc (length);
1486 while (!cvec.is_empty ())
1488 oecount *c = &cvec.last ();
1489 if (c->cnt < 2)
1490 break;
1492 /* Now collect the operands in the outer chain that contain
1493 the common operand in their inner chain. */
1494 bitmap_clear (candidates2);
1495 nr_candidates2 = 0;
1496 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1498 gimple oedef;
1499 enum tree_code oecode;
1500 unsigned j;
1501 tree op = (*ops)[i]->op;
1503 /* If we undistributed in this chain already this may be
1504 a constant. */
1505 if (TREE_CODE (op) != SSA_NAME)
1506 continue;
1508 oedef = SSA_NAME_DEF_STMT (op);
1509 oecode = gimple_assign_rhs_code (oedef);
1510 if (oecode != c->oecode)
1511 continue;
1513 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1515 if (oe1->op == c->op)
1517 bitmap_set_bit (candidates2, i);
1518 ++nr_candidates2;
1519 break;
1524 if (nr_candidates2 >= 2)
1526 operand_entry_t oe1, oe2;
1527 gimple prod;
1528 int first = bitmap_first_set_bit (candidates2);
1530 /* Build the new addition chain. */
1531 oe1 = (*ops)[first];
1532 if (dump_file && (dump_flags & TDF_DETAILS))
1534 fprintf (dump_file, "Building (");
1535 print_generic_expr (dump_file, oe1->op, 0);
1537 zero_one_operation (&oe1->op, c->oecode, c->op);
1538 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1540 gimple sum;
1541 oe2 = (*ops)[i];
1542 if (dump_file && (dump_flags & TDF_DETAILS))
1544 fprintf (dump_file, " + ");
1545 print_generic_expr (dump_file, oe2->op, 0);
1547 zero_one_operation (&oe2->op, c->oecode, c->op);
1548 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1549 oe1->op, oe2->op, opcode);
1550 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1551 oe2->rank = 0;
1552 oe1->op = gimple_get_lhs (sum);
1555 /* Apply the multiplication/division. */
1556 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1557 oe1->op, c->op, c->oecode);
1558 if (dump_file && (dump_flags & TDF_DETAILS))
1560 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1561 print_generic_expr (dump_file, c->op, 0);
1562 fprintf (dump_file, "\n");
1565 /* Record it in the addition chain and disable further
1566 undistribution with this op. */
1567 oe1->op = gimple_assign_lhs (prod);
1568 oe1->rank = get_rank (oe1->op);
1569 subops[first].release ();
1571 changed = true;
1574 cvec.pop ();
1577 for (i = 0; i < ops->length (); ++i)
1578 subops[i].release ();
1579 free (subops);
1580 cvec.release ();
1581 sbitmap_free (candidates);
1582 sbitmap_free (candidates2);
1584 return changed;
1587 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1588 expression, examine the other OPS to see if any of them are comparisons
1589 of the same values, which we may be able to combine or eliminate.
1590 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1592 static bool
1593 eliminate_redundant_comparison (enum tree_code opcode,
1594 vec<operand_entry_t> *ops,
1595 unsigned int currindex,
1596 operand_entry_t curr)
1598 tree op1, op2;
1599 enum tree_code lcode, rcode;
1600 gimple def1, def2;
1601 int i;
1602 operand_entry_t oe;
1604 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1605 return false;
1607 /* Check that CURR is a comparison. */
1608 if (TREE_CODE (curr->op) != SSA_NAME)
1609 return false;
1610 def1 = SSA_NAME_DEF_STMT (curr->op);
1611 if (!is_gimple_assign (def1))
1612 return false;
1613 lcode = gimple_assign_rhs_code (def1);
1614 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1615 return false;
1616 op1 = gimple_assign_rhs1 (def1);
1617 op2 = gimple_assign_rhs2 (def1);
1619 /* Now look for a similar comparison in the remaining OPS. */
1620 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1622 tree t;
1624 if (TREE_CODE (oe->op) != SSA_NAME)
1625 continue;
1626 def2 = SSA_NAME_DEF_STMT (oe->op);
1627 if (!is_gimple_assign (def2))
1628 continue;
1629 rcode = gimple_assign_rhs_code (def2);
1630 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1631 continue;
1633 /* If we got here, we have a match. See if we can combine the
1634 two comparisons. */
1635 if (opcode == BIT_IOR_EXPR)
1636 t = maybe_fold_or_comparisons (lcode, op1, op2,
1637 rcode, gimple_assign_rhs1 (def2),
1638 gimple_assign_rhs2 (def2));
1639 else
1640 t = maybe_fold_and_comparisons (lcode, op1, op2,
1641 rcode, gimple_assign_rhs1 (def2),
1642 gimple_assign_rhs2 (def2));
1643 if (!t)
1644 continue;
1646 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1647 always give us a boolean_type_node value back. If the original
1648 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1649 we need to convert. */
1650 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1651 t = fold_convert (TREE_TYPE (curr->op), t);
1653 if (TREE_CODE (t) != INTEGER_CST
1654 && !operand_equal_p (t, curr->op, 0))
1656 enum tree_code subcode;
1657 tree newop1, newop2;
1658 if (!COMPARISON_CLASS_P (t))
1659 continue;
1660 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1661 STRIP_USELESS_TYPE_CONVERSION (newop1);
1662 STRIP_USELESS_TYPE_CONVERSION (newop2);
1663 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1664 continue;
1667 if (dump_file && (dump_flags & TDF_DETAILS))
1669 fprintf (dump_file, "Equivalence: ");
1670 print_generic_expr (dump_file, curr->op, 0);
1671 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1672 print_generic_expr (dump_file, oe->op, 0);
1673 fprintf (dump_file, " -> ");
1674 print_generic_expr (dump_file, t, 0);
1675 fprintf (dump_file, "\n");
1678 /* Now we can delete oe, as it has been subsumed by the new combined
1679 expression t. */
1680 ops->ordered_remove (i);
1681 reassociate_stats.ops_eliminated ++;
1683 /* If t is the same as curr->op, we're done. Otherwise we must
1684 replace curr->op with t. Special case is if we got a constant
1685 back, in which case we add it to the end instead of in place of
1686 the current entry. */
1687 if (TREE_CODE (t) == INTEGER_CST)
1689 ops->ordered_remove (currindex);
1690 add_to_ops_vec (ops, t);
1692 else if (!operand_equal_p (t, curr->op, 0))
1694 gimple sum;
1695 enum tree_code subcode;
1696 tree newop1;
1697 tree newop2;
1698 gcc_assert (COMPARISON_CLASS_P (t));
1699 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1700 STRIP_USELESS_TYPE_CONVERSION (newop1);
1701 STRIP_USELESS_TYPE_CONVERSION (newop2);
1702 gcc_checking_assert (is_gimple_val (newop1)
1703 && is_gimple_val (newop2));
1704 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1705 curr->op = gimple_get_lhs (sum);
1707 return true;
1710 return false;
1713 /* Perform various identities and other optimizations on the list of
1714 operand entries, stored in OPS. The tree code for the binary
1715 operation between all the operands is OPCODE. */
1717 static void
1718 optimize_ops_list (enum tree_code opcode,
1719 vec<operand_entry_t> *ops)
1721 unsigned int length = ops->length ();
1722 unsigned int i;
1723 operand_entry_t oe;
1724 operand_entry_t oelast = NULL;
1725 bool iterate = false;
1727 if (length == 1)
1728 return;
1730 oelast = ops->last ();
1732 /* If the last two are constants, pop the constants off, merge them
1733 and try the next two. */
1734 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1736 operand_entry_t oelm1 = (*ops)[length - 2];
1738 if (oelm1->rank == 0
1739 && is_gimple_min_invariant (oelm1->op)
1740 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1741 TREE_TYPE (oelast->op)))
1743 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1744 oelm1->op, oelast->op);
1746 if (folded && is_gimple_min_invariant (folded))
1748 if (dump_file && (dump_flags & TDF_DETAILS))
1749 fprintf (dump_file, "Merging constants\n");
1751 ops->pop ();
1752 ops->pop ();
1754 add_to_ops_vec (ops, folded);
1755 reassociate_stats.constants_eliminated++;
1757 optimize_ops_list (opcode, ops);
1758 return;
1763 eliminate_using_constants (opcode, ops);
1764 oelast = NULL;
1766 for (i = 0; ops->iterate (i, &oe);)
1768 bool done = false;
1770 if (eliminate_not_pairs (opcode, ops, i, oe))
1771 return;
1772 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1773 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1774 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1776 if (done)
1777 return;
1778 iterate = true;
1779 oelast = NULL;
1780 continue;
1782 oelast = oe;
1783 i++;
1786 length = ops->length ();
1787 oelast = ops->last ();
1789 if (iterate)
1790 optimize_ops_list (opcode, ops);
1793 /* The following functions are subroutines to optimize_range_tests and allow
1794 it to try to change a logical combination of comparisons into a range
1795 test.
1797 For example, both
1798 X == 2 || X == 5 || X == 3 || X == 4
1800 X >= 2 && X <= 5
1801 are converted to
1802 (unsigned) (X - 2) <= 3
1804 For more information see comments above fold_test_range in fold-const.c,
1805 this implementation is for GIMPLE. */
1807 struct range_entry
1809 tree exp;
1810 tree low;
1811 tree high;
1812 bool in_p;
1813 bool strict_overflow_p;
1814 unsigned int idx, next;
1817 /* This is similar to make_range in fold-const.c, but on top of
1818 GIMPLE instead of trees. If EXP is non-NULL, it should be
1819 an SSA_NAME and STMT argument is ignored, otherwise STMT
1820 argument should be a GIMPLE_COND. */
1822 static void
1823 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1825 int in_p;
1826 tree low, high;
1827 bool is_bool, strict_overflow_p;
1829 r->exp = NULL_TREE;
1830 r->in_p = false;
1831 r->strict_overflow_p = false;
1832 r->low = NULL_TREE;
1833 r->high = NULL_TREE;
1834 if (exp != NULL_TREE
1835 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1836 return;
1838 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1839 and see if we can refine the range. Some of the cases below may not
1840 happen, but it doesn't seem worth worrying about this. We "continue"
1841 the outer loop when we've changed something; otherwise we "break"
1842 the switch, which will "break" the while. */
1843 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1844 high = low;
1845 in_p = 0;
1846 strict_overflow_p = false;
1847 is_bool = false;
1848 if (exp == NULL_TREE)
1849 is_bool = true;
1850 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1852 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1853 is_bool = true;
1854 else
1855 return;
1857 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1858 is_bool = true;
1860 while (1)
1862 enum tree_code code;
1863 tree arg0, arg1, exp_type;
1864 tree nexp;
1865 location_t loc;
1867 if (exp != NULL_TREE)
1869 if (TREE_CODE (exp) != SSA_NAME
1870 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1871 break;
1873 stmt = SSA_NAME_DEF_STMT (exp);
1874 if (!is_gimple_assign (stmt))
1875 break;
1877 code = gimple_assign_rhs_code (stmt);
1878 arg0 = gimple_assign_rhs1 (stmt);
1879 arg1 = gimple_assign_rhs2 (stmt);
1880 exp_type = TREE_TYPE (exp);
1882 else
1884 code = gimple_cond_code (stmt);
1885 arg0 = gimple_cond_lhs (stmt);
1886 arg1 = gimple_cond_rhs (stmt);
1887 exp_type = boolean_type_node;
1890 if (TREE_CODE (arg0) != SSA_NAME)
1891 break;
1892 loc = gimple_location (stmt);
1893 switch (code)
1895 case BIT_NOT_EXPR:
1896 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1897 /* Ensure the range is either +[-,0], +[0,0],
1898 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1899 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1900 or similar expression of unconditional true or
1901 false, it should not be negated. */
1902 && ((high && integer_zerop (high))
1903 || (low && integer_onep (low))))
1905 in_p = !in_p;
1906 exp = arg0;
1907 continue;
1909 break;
1910 case SSA_NAME:
1911 exp = arg0;
1912 continue;
1913 CASE_CONVERT:
1914 if (is_bool)
1915 goto do_default;
1916 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1918 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1919 is_bool = true;
1920 else
1921 return;
1923 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1924 is_bool = true;
1925 goto do_default;
1926 case EQ_EXPR:
1927 case NE_EXPR:
1928 case LT_EXPR:
1929 case LE_EXPR:
1930 case GE_EXPR:
1931 case GT_EXPR:
1932 is_bool = true;
1933 /* FALLTHRU */
1934 default:
1935 if (!is_bool)
1936 return;
1937 do_default:
1938 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1939 &low, &high, &in_p,
1940 &strict_overflow_p);
1941 if (nexp != NULL_TREE)
1943 exp = nexp;
1944 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1945 continue;
1947 break;
1949 break;
1951 if (is_bool)
1953 r->exp = exp;
1954 r->in_p = in_p;
1955 r->low = low;
1956 r->high = high;
1957 r->strict_overflow_p = strict_overflow_p;
1961 /* Comparison function for qsort. Sort entries
1962 without SSA_NAME exp first, then with SSA_NAMEs sorted
1963 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1964 by increasing ->low and if ->low is the same, by increasing
1965 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1966 maximum. */
1968 static int
1969 range_entry_cmp (const void *a, const void *b)
1971 const struct range_entry *p = (const struct range_entry *) a;
1972 const struct range_entry *q = (const struct range_entry *) b;
1974 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
1976 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
1978 /* Group range_entries for the same SSA_NAME together. */
1979 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
1980 return -1;
1981 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
1982 return 1;
1983 /* If ->low is different, NULL low goes first, then by
1984 ascending low. */
1985 if (p->low != NULL_TREE)
1987 if (q->low != NULL_TREE)
1989 tree tem = fold_binary (LT_EXPR, boolean_type_node,
1990 p->low, q->low);
1991 if (tem && integer_onep (tem))
1992 return -1;
1993 tem = fold_binary (GT_EXPR, boolean_type_node,
1994 p->low, q->low);
1995 if (tem && integer_onep (tem))
1996 return 1;
1998 else
1999 return 1;
2001 else if (q->low != NULL_TREE)
2002 return -1;
2003 /* If ->high is different, NULL high goes last, before that by
2004 ascending high. */
2005 if (p->high != NULL_TREE)
2007 if (q->high != NULL_TREE)
2009 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2010 p->high, q->high);
2011 if (tem && integer_onep (tem))
2012 return -1;
2013 tem = fold_binary (GT_EXPR, boolean_type_node,
2014 p->high, q->high);
2015 if (tem && integer_onep (tem))
2016 return 1;
2018 else
2019 return -1;
2021 else if (p->high != NULL_TREE)
2022 return 1;
2023 /* If both ranges are the same, sort below by ascending idx. */
2025 else
2026 return 1;
2028 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2029 return -1;
2031 if (p->idx < q->idx)
2032 return -1;
2033 else
2035 gcc_checking_assert (p->idx > q->idx);
2036 return 1;
2040 /* Helper routine of optimize_range_test.
2041 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2042 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2043 OPCODE and OPS are arguments of optimize_range_tests. Return
2044 true if the range merge has been successful.
2045 If OPCODE is ERROR_MARK, this is called from within
2046 maybe_optimize_range_tests and is performing inter-bb range optimization.
2047 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2048 oe->rank. */
2050 static bool
2051 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2052 unsigned int count, enum tree_code opcode,
2053 vec<operand_entry_t> *ops, tree exp, bool in_p,
2054 tree low, tree high, bool strict_overflow_p)
2056 operand_entry_t oe = (*ops)[range->idx];
2057 tree op = oe->op;
2058 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2059 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2060 location_t loc = gimple_location (stmt);
2061 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2062 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2063 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2064 gimple_stmt_iterator gsi;
2066 if (tem == NULL_TREE)
2067 return false;
2069 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2070 warning_at (loc, OPT_Wstrict_overflow,
2071 "assuming signed overflow does not occur "
2072 "when simplifying range test");
2074 if (dump_file && (dump_flags & TDF_DETAILS))
2076 struct range_entry *r;
2077 fprintf (dump_file, "Optimizing range tests ");
2078 print_generic_expr (dump_file, range->exp, 0);
2079 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2080 print_generic_expr (dump_file, range->low, 0);
2081 fprintf (dump_file, ", ");
2082 print_generic_expr (dump_file, range->high, 0);
2083 fprintf (dump_file, "]");
2084 for (r = otherrange; r < otherrange + count; r++)
2086 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2087 print_generic_expr (dump_file, r->low, 0);
2088 fprintf (dump_file, ", ");
2089 print_generic_expr (dump_file, r->high, 0);
2090 fprintf (dump_file, "]");
2092 fprintf (dump_file, "\n into ");
2093 print_generic_expr (dump_file, tem, 0);
2094 fprintf (dump_file, "\n");
2097 if (opcode == BIT_IOR_EXPR
2098 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2099 tem = invert_truthvalue_loc (loc, tem);
2101 tem = fold_convert_loc (loc, optype, tem);
2102 gsi = gsi_for_stmt (stmt);
2103 /* In rare cases range->exp can be equal to lhs of stmt.
2104 In that case we have to insert after the stmt rather then before
2105 it. */
2106 if (op == range->exp)
2107 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2108 GSI_CONTINUE_LINKING);
2109 else
2111 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2112 GSI_SAME_STMT);
2113 gsi_prev (&gsi);
2115 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2116 if (gimple_uid (gsi_stmt (gsi)))
2117 break;
2118 else
2119 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2121 oe->op = tem;
2122 range->exp = exp;
2123 range->low = low;
2124 range->high = high;
2125 range->in_p = in_p;
2126 range->strict_overflow_p = false;
2128 for (range = otherrange; range < otherrange + count; range++)
2130 oe = (*ops)[range->idx];
2131 /* Now change all the other range test immediate uses, so that
2132 those tests will be optimized away. */
2133 if (opcode == ERROR_MARK)
2135 if (oe->op)
2136 oe->op = build_int_cst (TREE_TYPE (oe->op),
2137 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2138 else
2139 oe->op = (oe->rank == BIT_IOR_EXPR
2140 ? boolean_false_node : boolean_true_node);
2142 else
2143 oe->op = error_mark_node;
2144 range->exp = NULL_TREE;
2146 return true;
2149 /* Optimize X == CST1 || X == CST2
2150 if popcount (CST1 ^ CST2) == 1 into
2151 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2152 Similarly for ranges. E.g.
2153 X != 2 && X != 3 && X != 10 && X != 11
2154 will be transformed by the previous optimization into
2155 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2156 and this loop can transform that into
2157 !(((X & ~8) - 2U) <= 1U). */
2159 static bool
2160 optimize_range_tests_xor (enum tree_code opcode, tree type,
2161 tree lowi, tree lowj, tree highi, tree highj,
2162 vec<operand_entry_t> *ops,
2163 struct range_entry *rangei,
2164 struct range_entry *rangej)
2166 tree lowxor, highxor, tem, exp;
2167 /* Check highi ^ lowi == highj ^ lowj and
2168 popcount (highi ^ lowi) == 1. */
2169 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2170 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2171 return false;
2172 if (tree_log2 (lowxor) < 0)
2173 return false;
2174 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2175 if (!tree_int_cst_equal (lowxor, highxor))
2176 return false;
2178 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2179 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2180 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2181 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2182 if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2183 rangei->in_p, lowj, highj,
2184 rangei->strict_overflow_p
2185 || rangej->strict_overflow_p))
2186 return true;
2187 return false;
2190 /* Optimize X == CST1 || X == CST2
2191 if popcount (CST2 - CST1) == 1 into
2192 ((X - CST1) & ~(CST2 - CST1)) == 0.
2193 Similarly for ranges. E.g.
2194 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2195 || X == 75 || X == 45
2196 will be transformed by the previous optimization into
2197 (X - 43U) <= 3U || (X - 75U) <= 3U
2198 and this loop can transform that into
2199 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2200 static bool
2201 optimize_range_tests_diff (enum tree_code opcode, tree type,
2202 tree lowi, tree lowj, tree highi, tree highj,
2203 vec<operand_entry_t> *ops,
2204 struct range_entry *rangei,
2205 struct range_entry *rangej)
2207 tree tem1, tem2, mask;
2208 /* Check highi - lowi == highj - lowj. */
2209 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2210 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2211 return false;
2212 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2213 if (!tree_int_cst_equal (tem1, tem2))
2214 return false;
2215 /* Check popcount (lowj - lowi) == 1. */
2216 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2217 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2218 return false;
2219 if (tree_log2 (tem1) < 0)
2220 return false;
2222 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2223 tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2224 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2225 lowj = build_int_cst (type, 0);
2226 if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2227 rangei->in_p, lowj, tem2,
2228 rangei->strict_overflow_p
2229 || rangej->strict_overflow_p))
2230 return true;
2231 return false;
2234 /* It does some common checks for function optimize_range_tests_xor and
2235 optimize_range_tests_diff.
2236 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2237 Else it calls optimize_range_tests_diff. */
2239 static bool
2240 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2241 bool optimize_xor, vec<operand_entry_t> *ops,
2242 struct range_entry *ranges)
2244 int i, j;
2245 bool any_changes = false;
2246 for (i = first; i < length; i++)
2248 tree lowi, highi, lowj, highj, type, tem;
2250 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2251 continue;
2252 type = TREE_TYPE (ranges[i].exp);
2253 if (!INTEGRAL_TYPE_P (type))
2254 continue;
2255 lowi = ranges[i].low;
2256 if (lowi == NULL_TREE)
2257 lowi = TYPE_MIN_VALUE (type);
2258 highi = ranges[i].high;
2259 if (highi == NULL_TREE)
2260 continue;
2261 for (j = i + 1; j < length && j < i + 64; j++)
2263 bool changes;
2264 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2265 continue;
2266 lowj = ranges[j].low;
2267 if (lowj == NULL_TREE)
2268 continue;
2269 highj = ranges[j].high;
2270 if (highj == NULL_TREE)
2271 highj = TYPE_MAX_VALUE (type);
2272 /* Check lowj > highi. */
2273 tem = fold_binary (GT_EXPR, boolean_type_node,
2274 lowj, highi);
2275 if (tem == NULL_TREE || !integer_onep (tem))
2276 continue;
2277 if (optimize_xor)
2278 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2279 highi, highj, ops,
2280 ranges + i, ranges + j);
2281 else
2282 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2283 highi, highj, ops,
2284 ranges + i, ranges + j);
2285 if (changes)
2287 any_changes = true;
2288 break;
2292 return any_changes;
2295 /* Optimize range tests, similarly how fold_range_test optimizes
2296 it on trees. The tree code for the binary
2297 operation between all the operands is OPCODE.
2298 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2299 maybe_optimize_range_tests for inter-bb range optimization.
2300 In that case if oe->op is NULL, oe->id is bb->index whose
2301 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2302 the actual opcode. */
2304 static bool
2305 optimize_range_tests (enum tree_code opcode,
2306 vec<operand_entry_t> *ops)
2308 unsigned int length = ops->length (), i, j, first;
2309 operand_entry_t oe;
2310 struct range_entry *ranges;
2311 bool any_changes = false;
2313 if (length == 1)
2314 return false;
2316 ranges = XNEWVEC (struct range_entry, length);
2317 for (i = 0; i < length; i++)
2319 oe = (*ops)[i];
2320 ranges[i].idx = i;
2321 init_range_entry (ranges + i, oe->op,
2322 oe->op ? NULL :
2323 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2324 /* For | invert it now, we will invert it again before emitting
2325 the optimized expression. */
2326 if (opcode == BIT_IOR_EXPR
2327 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2328 ranges[i].in_p = !ranges[i].in_p;
2331 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2332 for (i = 0; i < length; i++)
2333 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2334 break;
2336 /* Try to merge ranges. */
2337 for (first = i; i < length; i++)
2339 tree low = ranges[i].low;
2340 tree high = ranges[i].high;
2341 int in_p = ranges[i].in_p;
2342 bool strict_overflow_p = ranges[i].strict_overflow_p;
2343 int update_fail_count = 0;
2345 for (j = i + 1; j < length; j++)
2347 if (ranges[i].exp != ranges[j].exp)
2348 break;
2349 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2350 ranges[j].in_p, ranges[j].low, ranges[j].high))
2351 break;
2352 strict_overflow_p |= ranges[j].strict_overflow_p;
2355 if (j == i + 1)
2356 continue;
2358 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2359 ops, ranges[i].exp, in_p, low, high,
2360 strict_overflow_p))
2362 i = j - 1;
2363 any_changes = true;
2365 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2366 while update_range_test would fail. */
2367 else if (update_fail_count == 64)
2368 i = j - 1;
2369 else
2370 ++update_fail_count;
2373 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2374 ops, ranges);
2376 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2377 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2378 ops, ranges);
2380 if (any_changes && opcode != ERROR_MARK)
2382 j = 0;
2383 FOR_EACH_VEC_ELT (*ops, i, oe)
2385 if (oe->op == error_mark_node)
2386 continue;
2387 else if (i != j)
2388 (*ops)[j] = oe;
2389 j++;
2391 ops->truncate (j);
2394 XDELETEVEC (ranges);
2395 return any_changes;
2398 /* Return true if STMT is a cast like:
2399 <bb N>:
2401 _123 = (int) _234;
2403 <bb M>:
2404 # _345 = PHI <_123(N), 1(...), 1(...)>
2405 where _234 has bool type, _123 has single use and
2406 bb N has a single successor M. This is commonly used in
2407 the last block of a range test. */
2409 static bool
2410 final_range_test_p (gimple stmt)
2412 basic_block bb, rhs_bb;
2413 edge e;
2414 tree lhs, rhs;
2415 use_operand_p use_p;
2416 gimple use_stmt;
2418 if (!gimple_assign_cast_p (stmt))
2419 return false;
2420 bb = gimple_bb (stmt);
2421 if (!single_succ_p (bb))
2422 return false;
2423 e = single_succ_edge (bb);
2424 if (e->flags & EDGE_COMPLEX)
2425 return false;
2427 lhs = gimple_assign_lhs (stmt);
2428 rhs = gimple_assign_rhs1 (stmt);
2429 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2430 || TREE_CODE (rhs) != SSA_NAME
2431 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2432 return false;
2434 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2435 if (!single_imm_use (lhs, &use_p, &use_stmt))
2436 return false;
2438 if (gimple_code (use_stmt) != GIMPLE_PHI
2439 || gimple_bb (use_stmt) != e->dest)
2440 return false;
2442 /* And that the rhs is defined in the same loop. */
2443 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2444 if (rhs_bb == NULL
2445 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2446 return false;
2448 return true;
2451 /* Return true if BB is suitable basic block for inter-bb range test
2452 optimization. If BACKWARD is true, BB should be the only predecessor
2453 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2454 or compared with to find a common basic block to which all conditions
2455 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2456 be the only predecessor of BB. */
2458 static bool
2459 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2460 bool backward)
2462 edge_iterator ei, ei2;
2463 edge e, e2;
2464 gimple stmt;
2465 gimple_stmt_iterator gsi;
2466 bool other_edge_seen = false;
2467 bool is_cond;
2469 if (test_bb == bb)
2470 return false;
2471 /* Check last stmt first. */
2472 stmt = last_stmt (bb);
2473 if (stmt == NULL
2474 || (gimple_code (stmt) != GIMPLE_COND
2475 && (backward || !final_range_test_p (stmt)))
2476 || gimple_visited_p (stmt)
2477 || stmt_could_throw_p (stmt)
2478 || *other_bb == bb)
2479 return false;
2480 is_cond = gimple_code (stmt) == GIMPLE_COND;
2481 if (is_cond)
2483 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2484 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2485 to *OTHER_BB (if not set yet, try to find it out). */
2486 if (EDGE_COUNT (bb->succs) != 2)
2487 return false;
2488 FOR_EACH_EDGE (e, ei, bb->succs)
2490 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2491 return false;
2492 if (e->dest == test_bb)
2494 if (backward)
2495 continue;
2496 else
2497 return false;
2499 if (e->dest == bb)
2500 return false;
2501 if (*other_bb == NULL)
2503 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2504 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2505 return false;
2506 else if (e->dest == e2->dest)
2507 *other_bb = e->dest;
2508 if (*other_bb == NULL)
2509 return false;
2511 if (e->dest == *other_bb)
2512 other_edge_seen = true;
2513 else if (backward)
2514 return false;
2516 if (*other_bb == NULL || !other_edge_seen)
2517 return false;
2519 else if (single_succ (bb) != *other_bb)
2520 return false;
2522 /* Now check all PHIs of *OTHER_BB. */
2523 e = find_edge (bb, *other_bb);
2524 e2 = find_edge (test_bb, *other_bb);
2525 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2527 gimple phi = gsi_stmt (gsi);
2528 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2529 corresponding to BB and TEST_BB predecessor must be the same. */
2530 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2531 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2533 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2534 one of the PHIs should have the lhs of the last stmt in
2535 that block as PHI arg and that PHI should have 0 or 1
2536 corresponding to it in all other range test basic blocks
2537 considered. */
2538 if (!is_cond)
2540 if (gimple_phi_arg_def (phi, e->dest_idx)
2541 == gimple_assign_lhs (stmt)
2542 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2543 || integer_onep (gimple_phi_arg_def (phi,
2544 e2->dest_idx))))
2545 continue;
2547 else
2549 gimple test_last = last_stmt (test_bb);
2550 if (gimple_code (test_last) != GIMPLE_COND
2551 && gimple_phi_arg_def (phi, e2->dest_idx)
2552 == gimple_assign_lhs (test_last)
2553 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2554 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2555 continue;
2558 return false;
2561 return true;
2564 /* Return true if BB doesn't have side-effects that would disallow
2565 range test optimization, all SSA_NAMEs set in the bb are consumed
2566 in the bb and there are no PHIs. */
2568 static bool
2569 no_side_effect_bb (basic_block bb)
2571 gimple_stmt_iterator gsi;
2572 gimple last;
2574 if (!gimple_seq_empty_p (phi_nodes (bb)))
2575 return false;
2576 last = last_stmt (bb);
2577 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2579 gimple stmt = gsi_stmt (gsi);
2580 tree lhs;
2581 imm_use_iterator imm_iter;
2582 use_operand_p use_p;
2584 if (is_gimple_debug (stmt))
2585 continue;
2586 if (gimple_has_side_effects (stmt))
2587 return false;
2588 if (stmt == last)
2589 return true;
2590 if (!is_gimple_assign (stmt))
2591 return false;
2592 lhs = gimple_assign_lhs (stmt);
2593 if (TREE_CODE (lhs) != SSA_NAME)
2594 return false;
2595 if (gimple_assign_rhs_could_trap_p (stmt))
2596 return false;
2597 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2599 gimple use_stmt = USE_STMT (use_p);
2600 if (is_gimple_debug (use_stmt))
2601 continue;
2602 if (gimple_bb (use_stmt) != bb)
2603 return false;
2606 return false;
2609 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2610 return true and fill in *OPS recursively. */
2612 static bool
2613 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2614 struct loop *loop)
2616 gimple stmt = SSA_NAME_DEF_STMT (var);
2617 tree rhs[2];
2618 int i;
2620 if (!is_reassociable_op (stmt, code, loop))
2621 return false;
2623 rhs[0] = gimple_assign_rhs1 (stmt);
2624 rhs[1] = gimple_assign_rhs2 (stmt);
2625 gimple_set_visited (stmt, true);
2626 for (i = 0; i < 2; i++)
2627 if (TREE_CODE (rhs[i]) == SSA_NAME
2628 && !get_ops (rhs[i], code, ops, loop)
2629 && has_single_use (rhs[i]))
2631 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2633 oe->op = rhs[i];
2634 oe->rank = code;
2635 oe->id = 0;
2636 oe->count = 1;
2637 ops->safe_push (oe);
2639 return true;
2642 /* Find the ops that were added by get_ops starting from VAR, see if
2643 they were changed during update_range_test and if yes, create new
2644 stmts. */
2646 static tree
2647 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2648 unsigned int *pidx, struct loop *loop)
2650 gimple stmt = SSA_NAME_DEF_STMT (var);
2651 tree rhs[4];
2652 int i;
2654 if (!is_reassociable_op (stmt, code, loop))
2655 return NULL;
2657 rhs[0] = gimple_assign_rhs1 (stmt);
2658 rhs[1] = gimple_assign_rhs2 (stmt);
2659 rhs[2] = rhs[0];
2660 rhs[3] = rhs[1];
2661 for (i = 0; i < 2; i++)
2662 if (TREE_CODE (rhs[i]) == SSA_NAME)
2664 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2665 if (rhs[2 + i] == NULL_TREE)
2667 if (has_single_use (rhs[i]))
2668 rhs[2 + i] = ops[(*pidx)++]->op;
2669 else
2670 rhs[2 + i] = rhs[i];
2673 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2674 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2676 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2677 var = make_ssa_name (TREE_TYPE (var), NULL);
2678 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2679 var, rhs[2], rhs[3]);
2680 gimple_set_uid (g, gimple_uid (stmt));
2681 gimple_set_visited (g, true);
2682 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2684 return var;
2687 /* Structure to track the initial value passed to get_ops and
2688 the range in the ops vector for each basic block. */
2690 struct inter_bb_range_test_entry
2692 tree op;
2693 unsigned int first_idx, last_idx;
2696 /* Inter-bb range test optimization. */
2698 static void
2699 maybe_optimize_range_tests (gimple stmt)
2701 basic_block first_bb = gimple_bb (stmt);
2702 basic_block last_bb = first_bb;
2703 basic_block other_bb = NULL;
2704 basic_block bb;
2705 edge_iterator ei;
2706 edge e;
2707 auto_vec<operand_entry_t> ops;
2708 auto_vec<inter_bb_range_test_entry> bbinfo;
2709 bool any_changes = false;
2711 /* Consider only basic blocks that end with GIMPLE_COND or
2712 a cast statement satisfying final_range_test_p. All
2713 but the last bb in the first_bb .. last_bb range
2714 should end with GIMPLE_COND. */
2715 if (gimple_code (stmt) == GIMPLE_COND)
2717 if (EDGE_COUNT (first_bb->succs) != 2)
2718 return;
2720 else if (final_range_test_p (stmt))
2721 other_bb = single_succ (first_bb);
2722 else
2723 return;
2725 if (stmt_could_throw_p (stmt))
2726 return;
2728 /* As relative ordering of post-dominator sons isn't fixed,
2729 maybe_optimize_range_tests can be called first on any
2730 bb in the range we want to optimize. So, start searching
2731 backwards, if first_bb can be set to a predecessor. */
2732 while (single_pred_p (first_bb))
2734 basic_block pred_bb = single_pred (first_bb);
2735 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2736 break;
2737 if (!no_side_effect_bb (first_bb))
2738 break;
2739 first_bb = pred_bb;
2741 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2742 Before starting forward search in last_bb successors, find
2743 out the other_bb. */
2744 if (first_bb == last_bb)
2746 other_bb = NULL;
2747 /* As non-GIMPLE_COND last stmt always terminates the range,
2748 if forward search didn't discover anything, just give up. */
2749 if (gimple_code (stmt) != GIMPLE_COND)
2750 return;
2751 /* Look at both successors. Either it ends with a GIMPLE_COND
2752 and satisfies suitable_cond_bb, or ends with a cast and
2753 other_bb is that cast's successor. */
2754 FOR_EACH_EDGE (e, ei, first_bb->succs)
2755 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2756 || e->dest == first_bb)
2757 return;
2758 else if (single_pred_p (e->dest))
2760 stmt = last_stmt (e->dest);
2761 if (stmt
2762 && gimple_code (stmt) == GIMPLE_COND
2763 && EDGE_COUNT (e->dest->succs) == 2)
2765 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2766 break;
2767 else
2768 other_bb = NULL;
2770 else if (stmt
2771 && final_range_test_p (stmt)
2772 && find_edge (first_bb, single_succ (e->dest)))
2774 other_bb = single_succ (e->dest);
2775 if (other_bb == first_bb)
2776 other_bb = NULL;
2779 if (other_bb == NULL)
2780 return;
2782 /* Now do the forward search, moving last_bb to successor bbs
2783 that aren't other_bb. */
2784 while (EDGE_COUNT (last_bb->succs) == 2)
2786 FOR_EACH_EDGE (e, ei, last_bb->succs)
2787 if (e->dest != other_bb)
2788 break;
2789 if (e == NULL)
2790 break;
2791 if (!single_pred_p (e->dest))
2792 break;
2793 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2794 break;
2795 if (!no_side_effect_bb (e->dest))
2796 break;
2797 last_bb = e->dest;
2799 if (first_bb == last_bb)
2800 return;
2801 /* Here basic blocks first_bb through last_bb's predecessor
2802 end with GIMPLE_COND, all of them have one of the edges to
2803 other_bb and another to another block in the range,
2804 all blocks except first_bb don't have side-effects and
2805 last_bb ends with either GIMPLE_COND, or cast satisfying
2806 final_range_test_p. */
2807 for (bb = last_bb; ; bb = single_pred (bb))
2809 enum tree_code code;
2810 tree lhs, rhs;
2811 inter_bb_range_test_entry bb_ent;
2813 bb_ent.op = NULL_TREE;
2814 bb_ent.first_idx = ops.length ();
2815 bb_ent.last_idx = bb_ent.first_idx;
2816 e = find_edge (bb, other_bb);
2817 stmt = last_stmt (bb);
2818 gimple_set_visited (stmt, true);
2819 if (gimple_code (stmt) != GIMPLE_COND)
2821 use_operand_p use_p;
2822 gimple phi;
2823 edge e2;
2824 unsigned int d;
2826 lhs = gimple_assign_lhs (stmt);
2827 rhs = gimple_assign_rhs1 (stmt);
2828 gcc_assert (bb == last_bb);
2830 /* stmt is
2831 _123 = (int) _234;
2833 followed by:
2834 <bb M>:
2835 # _345 = PHI <_123(N), 1(...), 1(...)>
2837 or 0 instead of 1. If it is 0, the _234
2838 range test is anded together with all the
2839 other range tests, if it is 1, it is ored with
2840 them. */
2841 single_imm_use (lhs, &use_p, &phi);
2842 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2843 e2 = find_edge (first_bb, other_bb);
2844 d = e2->dest_idx;
2845 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2846 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2847 code = BIT_AND_EXPR;
2848 else
2850 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2851 code = BIT_IOR_EXPR;
2854 /* If _234 SSA_NAME_DEF_STMT is
2855 _234 = _567 | _789;
2856 (or &, corresponding to 1/0 in the phi arguments,
2857 push into ops the individual range test arguments
2858 of the bitwise or resp. and, recursively. */
2859 if (!get_ops (rhs, code, &ops,
2860 loop_containing_stmt (stmt))
2861 && has_single_use (rhs))
2863 /* Otherwise, push the _234 range test itself. */
2864 operand_entry_t oe
2865 = (operand_entry_t) pool_alloc (operand_entry_pool);
2867 oe->op = rhs;
2868 oe->rank = code;
2869 oe->id = 0;
2870 oe->count = 1;
2871 ops.safe_push (oe);
2872 bb_ent.last_idx++;
2874 else
2875 bb_ent.last_idx = ops.length ();
2876 bb_ent.op = rhs;
2877 bbinfo.safe_push (bb_ent);
2878 continue;
2880 /* Otherwise stmt is GIMPLE_COND. */
2881 code = gimple_cond_code (stmt);
2882 lhs = gimple_cond_lhs (stmt);
2883 rhs = gimple_cond_rhs (stmt);
2884 if (TREE_CODE (lhs) == SSA_NAME
2885 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2886 && ((code != EQ_EXPR && code != NE_EXPR)
2887 || rhs != boolean_false_node
2888 /* Either push into ops the individual bitwise
2889 or resp. and operands, depending on which
2890 edge is other_bb. */
2891 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2892 ^ (code == EQ_EXPR))
2893 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2894 loop_containing_stmt (stmt))))
2896 /* Or push the GIMPLE_COND stmt itself. */
2897 operand_entry_t oe
2898 = (operand_entry_t) pool_alloc (operand_entry_pool);
2900 oe->op = NULL;
2901 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2902 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2903 /* oe->op = NULL signs that there is no SSA_NAME
2904 for the range test, and oe->id instead is the
2905 basic block number, at which's end the GIMPLE_COND
2906 is. */
2907 oe->id = bb->index;
2908 oe->count = 1;
2909 ops.safe_push (oe);
2910 bb_ent.op = NULL;
2911 bb_ent.last_idx++;
2913 else if (ops.length () > bb_ent.first_idx)
2915 bb_ent.op = lhs;
2916 bb_ent.last_idx = ops.length ();
2918 bbinfo.safe_push (bb_ent);
2919 if (bb == first_bb)
2920 break;
2922 if (ops.length () > 1)
2923 any_changes = optimize_range_tests (ERROR_MARK, &ops);
2924 if (any_changes)
2926 unsigned int idx;
2927 /* update_ops relies on has_single_use predicates returning the
2928 same values as it did during get_ops earlier. Additionally it
2929 never removes statements, only adds new ones and it should walk
2930 from the single imm use and check the predicate already before
2931 making those changes.
2932 On the other side, the handling of GIMPLE_COND directly can turn
2933 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2934 it needs to be done in a separate loop afterwards. */
2935 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2937 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2938 && bbinfo[idx].op != NULL_TREE)
2940 tree new_op;
2942 stmt = last_stmt (bb);
2943 new_op = update_ops (bbinfo[idx].op,
2944 (enum tree_code)
2945 ops[bbinfo[idx].first_idx]->rank,
2946 ops, &bbinfo[idx].first_idx,
2947 loop_containing_stmt (stmt));
2948 if (new_op == NULL_TREE)
2950 gcc_assert (bb == last_bb);
2951 new_op = ops[bbinfo[idx].first_idx++]->op;
2953 if (bbinfo[idx].op != new_op)
2955 imm_use_iterator iter;
2956 use_operand_p use_p;
2957 gimple use_stmt, cast_stmt = NULL;
2959 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2960 if (is_gimple_debug (use_stmt))
2961 continue;
2962 else if (gimple_code (use_stmt) == GIMPLE_COND
2963 || gimple_code (use_stmt) == GIMPLE_PHI)
2964 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2965 SET_USE (use_p, new_op);
2966 else if (gimple_assign_cast_p (use_stmt))
2967 cast_stmt = use_stmt;
2968 else
2969 gcc_unreachable ();
2970 if (cast_stmt)
2972 gcc_assert (bb == last_bb);
2973 tree lhs = gimple_assign_lhs (cast_stmt);
2974 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
2975 enum tree_code rhs_code
2976 = gimple_assign_rhs_code (cast_stmt);
2977 gimple g;
2978 if (is_gimple_min_invariant (new_op))
2980 new_op = fold_convert (TREE_TYPE (lhs), new_op);
2981 g = gimple_build_assign (new_lhs, new_op);
2983 else
2984 g = gimple_build_assign_with_ops (rhs_code, new_lhs,
2985 new_op, NULL_TREE);
2986 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
2987 gimple_set_uid (g, gimple_uid (cast_stmt));
2988 gimple_set_visited (g, true);
2989 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2990 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
2991 if (is_gimple_debug (use_stmt))
2992 continue;
2993 else if (gimple_code (use_stmt) == GIMPLE_COND
2994 || gimple_code (use_stmt) == GIMPLE_PHI)
2995 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2996 SET_USE (use_p, new_lhs);
2997 else
2998 gcc_unreachable ();
3002 if (bb == first_bb)
3003 break;
3005 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3007 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3008 && bbinfo[idx].op == NULL_TREE
3009 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3011 stmt = last_stmt (bb);
3012 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3013 gimple_cond_make_false (stmt);
3014 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3015 gimple_cond_make_true (stmt);
3016 else
3018 gimple_cond_set_code (stmt, NE_EXPR);
3019 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3020 gimple_cond_set_rhs (stmt, boolean_false_node);
3022 update_stmt (stmt);
3024 if (bb == first_bb)
3025 break;
3030 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3031 of STMT in it's operands. This is also known as a "destructive
3032 update" operation. */
3034 static bool
3035 is_phi_for_stmt (gimple stmt, tree operand)
3037 gimple def_stmt;
3038 tree lhs;
3039 use_operand_p arg_p;
3040 ssa_op_iter i;
3042 if (TREE_CODE (operand) != SSA_NAME)
3043 return false;
3045 lhs = gimple_assign_lhs (stmt);
3047 def_stmt = SSA_NAME_DEF_STMT (operand);
3048 if (gimple_code (def_stmt) != GIMPLE_PHI)
3049 return false;
3051 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3052 if (lhs == USE_FROM_PTR (arg_p))
3053 return true;
3054 return false;
3057 /* Remove def stmt of VAR if VAR has zero uses and recurse
3058 on rhs1 operand if so. */
3060 static void
3061 remove_visited_stmt_chain (tree var)
3063 gimple stmt;
3064 gimple_stmt_iterator gsi;
3066 while (1)
3068 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3069 return;
3070 stmt = SSA_NAME_DEF_STMT (var);
3071 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3073 var = gimple_assign_rhs1 (stmt);
3074 gsi = gsi_for_stmt (stmt);
3075 gsi_remove (&gsi, true);
3076 release_defs (stmt);
3078 else
3079 return;
3083 /* This function checks three consequtive operands in
3084 passed operands vector OPS starting from OPINDEX and
3085 swaps two operands if it is profitable for binary operation
3086 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3088 We pair ops with the same rank if possible.
3090 The alternative we try is to see if STMT is a destructive
3091 update style statement, which is like:
3092 b = phi (a, ...)
3093 a = c + b;
3094 In that case, we want to use the destructive update form to
3095 expose the possible vectorizer sum reduction opportunity.
3096 In that case, the third operand will be the phi node. This
3097 check is not performed if STMT is null.
3099 We could, of course, try to be better as noted above, and do a
3100 lot of work to try to find these opportunities in >3 operand
3101 cases, but it is unlikely to be worth it. */
3103 static void
3104 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3105 unsigned int opindex, gimple stmt)
3107 operand_entry_t oe1, oe2, oe3;
3109 oe1 = ops[opindex];
3110 oe2 = ops[opindex + 1];
3111 oe3 = ops[opindex + 2];
3113 if ((oe1->rank == oe2->rank
3114 && oe2->rank != oe3->rank)
3115 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3116 && !is_phi_for_stmt (stmt, oe1->op)
3117 && !is_phi_for_stmt (stmt, oe2->op)))
3119 struct operand_entry temp = *oe3;
3120 oe3->op = oe1->op;
3121 oe3->rank = oe1->rank;
3122 oe1->op = temp.op;
3123 oe1->rank= temp.rank;
3125 else if ((oe1->rank == oe3->rank
3126 && oe2->rank != oe3->rank)
3127 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3128 && !is_phi_for_stmt (stmt, oe1->op)
3129 && !is_phi_for_stmt (stmt, oe3->op)))
3131 struct operand_entry temp = *oe2;
3132 oe2->op = oe1->op;
3133 oe2->rank = oe1->rank;
3134 oe1->op = temp.op;
3135 oe1->rank = temp.rank;
3139 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3140 two definitions, otherwise return STMT. */
3142 static inline gimple
3143 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3145 if (TREE_CODE (rhs1) == SSA_NAME
3146 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3147 stmt = SSA_NAME_DEF_STMT (rhs1);
3148 if (TREE_CODE (rhs2) == SSA_NAME
3149 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3150 stmt = SSA_NAME_DEF_STMT (rhs2);
3151 return stmt;
3154 /* Recursively rewrite our linearized statements so that the operators
3155 match those in OPS[OPINDEX], putting the computation in rank
3156 order. Return new lhs. */
3158 static tree
3159 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3160 vec<operand_entry_t> ops, bool changed)
3162 tree rhs1 = gimple_assign_rhs1 (stmt);
3163 tree rhs2 = gimple_assign_rhs2 (stmt);
3164 tree lhs = gimple_assign_lhs (stmt);
3165 operand_entry_t oe;
3167 /* The final recursion case for this function is that you have
3168 exactly two operations left.
3169 If we had one exactly one op in the entire list to start with, we
3170 would have never called this function, and the tail recursion
3171 rewrites them one at a time. */
3172 if (opindex + 2 == ops.length ())
3174 operand_entry_t oe1, oe2;
3176 oe1 = ops[opindex];
3177 oe2 = ops[opindex + 1];
3179 if (rhs1 != oe1->op || rhs2 != oe2->op)
3181 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3182 unsigned int uid = gimple_uid (stmt);
3184 if (dump_file && (dump_flags & TDF_DETAILS))
3186 fprintf (dump_file, "Transforming ");
3187 print_gimple_stmt (dump_file, stmt, 0, 0);
3190 if (changed)
3192 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3193 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3194 stmt
3195 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3196 lhs, oe1->op, oe2->op);
3197 gimple_set_uid (stmt, uid);
3198 gimple_set_visited (stmt, true);
3199 if (insert_point == gsi_stmt (gsi))
3200 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3201 else
3202 insert_stmt_after (stmt, insert_point);
3204 else
3206 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3207 == stmt);
3208 gimple_assign_set_rhs1 (stmt, oe1->op);
3209 gimple_assign_set_rhs2 (stmt, oe2->op);
3210 update_stmt (stmt);
3213 if (rhs1 != oe1->op && rhs1 != oe2->op)
3214 remove_visited_stmt_chain (rhs1);
3216 if (dump_file && (dump_flags & TDF_DETAILS))
3218 fprintf (dump_file, " into ");
3219 print_gimple_stmt (dump_file, stmt, 0, 0);
3222 return lhs;
3225 /* If we hit here, we should have 3 or more ops left. */
3226 gcc_assert (opindex + 2 < ops.length ());
3228 /* Rewrite the next operator. */
3229 oe = ops[opindex];
3231 /* Recurse on the LHS of the binary operator, which is guaranteed to
3232 be the non-leaf side. */
3233 tree new_rhs1
3234 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3235 changed || oe->op != rhs2);
3237 if (oe->op != rhs2 || new_rhs1 != rhs1)
3239 if (dump_file && (dump_flags & TDF_DETAILS))
3241 fprintf (dump_file, "Transforming ");
3242 print_gimple_stmt (dump_file, stmt, 0, 0);
3245 /* If changed is false, this is either opindex == 0
3246 or all outer rhs2's were equal to corresponding oe->op,
3247 and powi_result is NULL.
3248 That means lhs is equivalent before and after reassociation.
3249 Otherwise ensure the old lhs SSA_NAME is not reused and
3250 create a new stmt as well, so that any debug stmts will be
3251 properly adjusted. */
3252 if (changed)
3254 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3255 unsigned int uid = gimple_uid (stmt);
3256 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3258 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3259 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3260 lhs, new_rhs1, oe->op);
3261 gimple_set_uid (stmt, uid);
3262 gimple_set_visited (stmt, true);
3263 if (insert_point == gsi_stmt (gsi))
3264 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3265 else
3266 insert_stmt_after (stmt, insert_point);
3268 else
3270 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3271 == stmt);
3272 gimple_assign_set_rhs1 (stmt, new_rhs1);
3273 gimple_assign_set_rhs2 (stmt, oe->op);
3274 update_stmt (stmt);
3277 if (dump_file && (dump_flags & TDF_DETAILS))
3279 fprintf (dump_file, " into ");
3280 print_gimple_stmt (dump_file, stmt, 0, 0);
3283 return lhs;
3286 /* Find out how many cycles we need to compute statements chain.
3287 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3288 maximum number of independent statements we may execute per cycle. */
3290 static int
3291 get_required_cycles (int ops_num, int cpu_width)
3293 int res;
3294 int elog;
3295 unsigned int rest;
3297 /* While we have more than 2 * cpu_width operands
3298 we may reduce number of operands by cpu_width
3299 per cycle. */
3300 res = ops_num / (2 * cpu_width);
3302 /* Remained operands count may be reduced twice per cycle
3303 until we have only one operand. */
3304 rest = (unsigned)(ops_num - res * cpu_width);
3305 elog = exact_log2 (rest);
3306 if (elog >= 0)
3307 res += elog;
3308 else
3309 res += floor_log2 (rest) + 1;
3311 return res;
3314 /* Returns an optimal number of registers to use for computation of
3315 given statements. */
3317 static int
3318 get_reassociation_width (int ops_num, enum tree_code opc,
3319 enum machine_mode mode)
3321 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3322 int width;
3323 int width_min;
3324 int cycles_best;
3326 if (param_width > 0)
3327 width = param_width;
3328 else
3329 width = targetm.sched.reassociation_width (opc, mode);
3331 if (width == 1)
3332 return width;
3334 /* Get the minimal time required for sequence computation. */
3335 cycles_best = get_required_cycles (ops_num, width);
3337 /* Check if we may use less width and still compute sequence for
3338 the same time. It will allow us to reduce registers usage.
3339 get_required_cycles is monotonically increasing with lower width
3340 so we can perform a binary search for the minimal width that still
3341 results in the optimal cycle count. */
3342 width_min = 1;
3343 while (width > width_min)
3345 int width_mid = (width + width_min) / 2;
3347 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3348 width = width_mid;
3349 else if (width_min < width_mid)
3350 width_min = width_mid;
3351 else
3352 break;
3355 return width;
3358 /* Recursively rewrite our linearized statements so that the operators
3359 match those in OPS[OPINDEX], putting the computation in rank
3360 order and trying to allow operations to be executed in
3361 parallel. */
3363 static void
3364 rewrite_expr_tree_parallel (gimple stmt, int width,
3365 vec<operand_entry_t> ops)
3367 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3368 int op_num = ops.length ();
3369 int stmt_num = op_num - 1;
3370 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3371 int op_index = op_num - 1;
3372 int stmt_index = 0;
3373 int ready_stmts_end = 0;
3374 int i = 0;
3375 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3377 /* We start expression rewriting from the top statements.
3378 So, in this loop we create a full list of statements
3379 we will work with. */
3380 stmts[stmt_num - 1] = stmt;
3381 for (i = stmt_num - 2; i >= 0; i--)
3382 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3384 for (i = 0; i < stmt_num; i++)
3386 tree op1, op2;
3388 /* Determine whether we should use results of
3389 already handled statements or not. */
3390 if (ready_stmts_end == 0
3391 && (i - stmt_index >= width || op_index < 1))
3392 ready_stmts_end = i;
3394 /* Now we choose operands for the next statement. Non zero
3395 value in ready_stmts_end means here that we should use
3396 the result of already generated statements as new operand. */
3397 if (ready_stmts_end > 0)
3399 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3400 if (ready_stmts_end > stmt_index)
3401 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3402 else if (op_index >= 0)
3403 op2 = ops[op_index--]->op;
3404 else
3406 gcc_assert (stmt_index < i);
3407 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3410 if (stmt_index >= ready_stmts_end)
3411 ready_stmts_end = 0;
3413 else
3415 if (op_index > 1)
3416 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3417 op2 = ops[op_index--]->op;
3418 op1 = ops[op_index--]->op;
3421 /* If we emit the last statement then we should put
3422 operands into the last statement. It will also
3423 break the loop. */
3424 if (op_index < 0 && stmt_index == i)
3425 i = stmt_num - 1;
3427 if (dump_file && (dump_flags & TDF_DETAILS))
3429 fprintf (dump_file, "Transforming ");
3430 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3433 /* We keep original statement only for the last one. All
3434 others are recreated. */
3435 if (i == stmt_num - 1)
3437 gimple_assign_set_rhs1 (stmts[i], op1);
3438 gimple_assign_set_rhs2 (stmts[i], op2);
3439 update_stmt (stmts[i]);
3441 else
3442 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3444 if (dump_file && (dump_flags & TDF_DETAILS))
3446 fprintf (dump_file, " into ");
3447 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3451 remove_visited_stmt_chain (last_rhs1);
3454 /* Transform STMT, which is really (A +B) + (C + D) into the left
3455 linear form, ((A+B)+C)+D.
3456 Recurse on D if necessary. */
3458 static void
3459 linearize_expr (gimple stmt)
3461 gimple_stmt_iterator gsi;
3462 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3463 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3464 gimple oldbinrhs = binrhs;
3465 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3466 gimple newbinrhs = NULL;
3467 struct loop *loop = loop_containing_stmt (stmt);
3468 tree lhs = gimple_assign_lhs (stmt);
3470 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3471 && is_reassociable_op (binrhs, rhscode, loop));
3473 gsi = gsi_for_stmt (stmt);
3475 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3476 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3477 make_ssa_name (TREE_TYPE (lhs), NULL),
3478 gimple_assign_lhs (binlhs),
3479 gimple_assign_rhs2 (binrhs));
3480 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3481 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3482 gimple_set_uid (binrhs, gimple_uid (stmt));
3484 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3485 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3487 if (dump_file && (dump_flags & TDF_DETAILS))
3489 fprintf (dump_file, "Linearized: ");
3490 print_gimple_stmt (dump_file, stmt, 0, 0);
3493 reassociate_stats.linearized++;
3494 update_stmt (stmt);
3496 gsi = gsi_for_stmt (oldbinrhs);
3497 gsi_remove (&gsi, true);
3498 release_defs (oldbinrhs);
3500 gimple_set_visited (stmt, true);
3501 gimple_set_visited (binlhs, true);
3502 gimple_set_visited (binrhs, true);
3504 /* Tail recurse on the new rhs if it still needs reassociation. */
3505 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3506 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3507 want to change the algorithm while converting to tuples. */
3508 linearize_expr (stmt);
3511 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3512 it. Otherwise, return NULL. */
3514 static gimple
3515 get_single_immediate_use (tree lhs)
3517 use_operand_p immuse;
3518 gimple immusestmt;
3520 if (TREE_CODE (lhs) == SSA_NAME
3521 && single_imm_use (lhs, &immuse, &immusestmt)
3522 && is_gimple_assign (immusestmt))
3523 return immusestmt;
3525 return NULL;
3528 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3529 representing the negated value. Insertions of any necessary
3530 instructions go before GSI.
3531 This function is recursive in that, if you hand it "a_5" as the
3532 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3533 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3535 static tree
3536 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3538 gimple negatedefstmt = NULL;
3539 tree resultofnegate;
3540 gimple_stmt_iterator gsi;
3541 unsigned int uid;
3543 /* If we are trying to negate a name, defined by an add, negate the
3544 add operands instead. */
3545 if (TREE_CODE (tonegate) == SSA_NAME)
3546 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3547 if (TREE_CODE (tonegate) == SSA_NAME
3548 && is_gimple_assign (negatedefstmt)
3549 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3550 && has_single_use (gimple_assign_lhs (negatedefstmt))
3551 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3553 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3554 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3555 tree lhs = gimple_assign_lhs (negatedefstmt);
3556 gimple g;
3558 gsi = gsi_for_stmt (negatedefstmt);
3559 rhs1 = negate_value (rhs1, &gsi);
3561 gsi = gsi_for_stmt (negatedefstmt);
3562 rhs2 = negate_value (rhs2, &gsi);
3564 gsi = gsi_for_stmt (negatedefstmt);
3565 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3566 gimple_set_visited (negatedefstmt, true);
3567 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3568 gimple_set_uid (g, gimple_uid (negatedefstmt));
3569 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3570 return lhs;
3573 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3574 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3575 NULL_TREE, true, GSI_SAME_STMT);
3576 gsi = *gsip;
3577 uid = gimple_uid (gsi_stmt (gsi));
3578 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3580 gimple stmt = gsi_stmt (gsi);
3581 if (gimple_uid (stmt) != 0)
3582 break;
3583 gimple_set_uid (stmt, uid);
3585 return resultofnegate;
3588 /* Return true if we should break up the subtract in STMT into an add
3589 with negate. This is true when we the subtract operands are really
3590 adds, or the subtract itself is used in an add expression. In
3591 either case, breaking up the subtract into an add with negate
3592 exposes the adds to reassociation. */
3594 static bool
3595 should_break_up_subtract (gimple stmt)
3597 tree lhs = gimple_assign_lhs (stmt);
3598 tree binlhs = gimple_assign_rhs1 (stmt);
3599 tree binrhs = gimple_assign_rhs2 (stmt);
3600 gimple immusestmt;
3601 struct loop *loop = loop_containing_stmt (stmt);
3603 if (TREE_CODE (binlhs) == SSA_NAME
3604 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3605 return true;
3607 if (TREE_CODE (binrhs) == SSA_NAME
3608 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3609 return true;
3611 if (TREE_CODE (lhs) == SSA_NAME
3612 && (immusestmt = get_single_immediate_use (lhs))
3613 && is_gimple_assign (immusestmt)
3614 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3615 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3616 return true;
3617 return false;
3620 /* Transform STMT from A - B into A + -B. */
3622 static void
3623 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3625 tree rhs1 = gimple_assign_rhs1 (stmt);
3626 tree rhs2 = gimple_assign_rhs2 (stmt);
3628 if (dump_file && (dump_flags & TDF_DETAILS))
3630 fprintf (dump_file, "Breaking up subtract ");
3631 print_gimple_stmt (dump_file, stmt, 0, 0);
3634 rhs2 = negate_value (rhs2, gsip);
3635 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3636 update_stmt (stmt);
3639 /* Determine whether STMT is a builtin call that raises an SSA name
3640 to an integer power and has only one use. If so, and this is early
3641 reassociation and unsafe math optimizations are permitted, place
3642 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3643 If any of these conditions does not hold, return FALSE. */
3645 static bool
3646 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3648 tree fndecl, arg1;
3649 REAL_VALUE_TYPE c, cint;
3651 if (!first_pass_instance
3652 || !flag_unsafe_math_optimizations
3653 || !is_gimple_call (stmt)
3654 || !has_single_use (gimple_call_lhs (stmt)))
3655 return false;
3657 fndecl = gimple_call_fndecl (stmt);
3659 if (!fndecl
3660 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3661 return false;
3663 switch (DECL_FUNCTION_CODE (fndecl))
3665 CASE_FLT_FN (BUILT_IN_POW):
3666 *base = gimple_call_arg (stmt, 0);
3667 arg1 = gimple_call_arg (stmt, 1);
3669 if (TREE_CODE (arg1) != REAL_CST)
3670 return false;
3672 c = TREE_REAL_CST (arg1);
3674 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3675 return false;
3677 *exponent = real_to_integer (&c);
3678 real_from_integer (&cint, VOIDmode, *exponent,
3679 *exponent < 0 ? -1 : 0, 0);
3680 if (!real_identical (&c, &cint))
3681 return false;
3683 break;
3685 CASE_FLT_FN (BUILT_IN_POWI):
3686 *base = gimple_call_arg (stmt, 0);
3687 arg1 = gimple_call_arg (stmt, 1);
3689 if (!tree_fits_shwi_p (arg1))
3690 return false;
3692 *exponent = tree_to_shwi (arg1);
3693 break;
3695 default:
3696 return false;
3699 /* Expanding negative exponents is generally unproductive, so we don't
3700 complicate matters with those. Exponents of zero and one should
3701 have been handled by expression folding. */
3702 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3703 return false;
3705 return true;
3708 /* Recursively linearize a binary expression that is the RHS of STMT.
3709 Place the operands of the expression tree in the vector named OPS. */
3711 static void
3712 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3713 bool is_associative, bool set_visited)
3715 tree binlhs = gimple_assign_rhs1 (stmt);
3716 tree binrhs = gimple_assign_rhs2 (stmt);
3717 gimple binlhsdef = NULL, binrhsdef = NULL;
3718 bool binlhsisreassoc = false;
3719 bool binrhsisreassoc = false;
3720 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3721 struct loop *loop = loop_containing_stmt (stmt);
3722 tree base = NULL_TREE;
3723 HOST_WIDE_INT exponent = 0;
3725 if (set_visited)
3726 gimple_set_visited (stmt, true);
3728 if (TREE_CODE (binlhs) == SSA_NAME)
3730 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3731 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3732 && !stmt_could_throw_p (binlhsdef));
3735 if (TREE_CODE (binrhs) == SSA_NAME)
3737 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3738 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3739 && !stmt_could_throw_p (binrhsdef));
3742 /* If the LHS is not reassociable, but the RHS is, we need to swap
3743 them. If neither is reassociable, there is nothing we can do, so
3744 just put them in the ops vector. If the LHS is reassociable,
3745 linearize it. If both are reassociable, then linearize the RHS
3746 and the LHS. */
3748 if (!binlhsisreassoc)
3750 tree temp;
3752 /* If this is not a associative operation like division, give up. */
3753 if (!is_associative)
3755 add_to_ops_vec (ops, binrhs);
3756 return;
3759 if (!binrhsisreassoc)
3761 if (rhscode == MULT_EXPR
3762 && TREE_CODE (binrhs) == SSA_NAME
3763 && acceptable_pow_call (binrhsdef, &base, &exponent))
3765 add_repeat_to_ops_vec (ops, base, exponent);
3766 gimple_set_visited (binrhsdef, true);
3768 else
3769 add_to_ops_vec (ops, binrhs);
3771 if (rhscode == MULT_EXPR
3772 && TREE_CODE (binlhs) == SSA_NAME
3773 && acceptable_pow_call (binlhsdef, &base, &exponent))
3775 add_repeat_to_ops_vec (ops, base, exponent);
3776 gimple_set_visited (binlhsdef, true);
3778 else
3779 add_to_ops_vec (ops, binlhs);
3781 return;
3784 if (dump_file && (dump_flags & TDF_DETAILS))
3786 fprintf (dump_file, "swapping operands of ");
3787 print_gimple_stmt (dump_file, stmt, 0, 0);
3790 swap_ssa_operands (stmt,
3791 gimple_assign_rhs1_ptr (stmt),
3792 gimple_assign_rhs2_ptr (stmt));
3793 update_stmt (stmt);
3795 if (dump_file && (dump_flags & TDF_DETAILS))
3797 fprintf (dump_file, " is now ");
3798 print_gimple_stmt (dump_file, stmt, 0, 0);
3801 /* We want to make it so the lhs is always the reassociative op,
3802 so swap. */
3803 temp = binlhs;
3804 binlhs = binrhs;
3805 binrhs = temp;
3807 else if (binrhsisreassoc)
3809 linearize_expr (stmt);
3810 binlhs = gimple_assign_rhs1 (stmt);
3811 binrhs = gimple_assign_rhs2 (stmt);
3814 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3815 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3816 rhscode, loop));
3817 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3818 is_associative, set_visited);
3820 if (rhscode == MULT_EXPR
3821 && TREE_CODE (binrhs) == SSA_NAME
3822 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3824 add_repeat_to_ops_vec (ops, base, exponent);
3825 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3827 else
3828 add_to_ops_vec (ops, binrhs);
3831 /* Repropagate the negates back into subtracts, since no other pass
3832 currently does it. */
3834 static void
3835 repropagate_negates (void)
3837 unsigned int i = 0;
3838 tree negate;
3840 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3842 gimple user = get_single_immediate_use (negate);
3844 if (!user || !is_gimple_assign (user))
3845 continue;
3847 /* The negate operand can be either operand of a PLUS_EXPR
3848 (it can be the LHS if the RHS is a constant for example).
3850 Force the negate operand to the RHS of the PLUS_EXPR, then
3851 transform the PLUS_EXPR into a MINUS_EXPR. */
3852 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3854 /* If the negated operand appears on the LHS of the
3855 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3856 to force the negated operand to the RHS of the PLUS_EXPR. */
3857 if (gimple_assign_rhs1 (user) == negate)
3859 swap_ssa_operands (user,
3860 gimple_assign_rhs1_ptr (user),
3861 gimple_assign_rhs2_ptr (user));
3864 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3865 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3866 if (gimple_assign_rhs2 (user) == negate)
3868 tree rhs1 = gimple_assign_rhs1 (user);
3869 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3870 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3871 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3872 update_stmt (user);
3875 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3877 if (gimple_assign_rhs1 (user) == negate)
3879 /* We have
3880 x = -a
3881 y = x - b
3882 which we transform into
3883 x = a + b
3884 y = -x .
3885 This pushes down the negate which we possibly can merge
3886 into some other operation, hence insert it into the
3887 plus_negates vector. */
3888 gimple feed = SSA_NAME_DEF_STMT (negate);
3889 tree a = gimple_assign_rhs1 (feed);
3890 tree b = gimple_assign_rhs2 (user);
3891 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3892 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3893 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3894 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3895 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3896 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3897 user = gsi_stmt (gsi2);
3898 update_stmt (user);
3899 gsi_remove (&gsi, true);
3900 release_defs (feed);
3901 plus_negates.safe_push (gimple_assign_lhs (user));
3903 else
3905 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3906 rid of one operation. */
3907 gimple feed = SSA_NAME_DEF_STMT (negate);
3908 tree a = gimple_assign_rhs1 (feed);
3909 tree rhs1 = gimple_assign_rhs1 (user);
3910 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3911 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3912 update_stmt (gsi_stmt (gsi));
3918 /* Returns true if OP is of a type for which we can do reassociation.
3919 That is for integral or non-saturating fixed-point types, and for
3920 floating point type when associative-math is enabled. */
3922 static bool
3923 can_reassociate_p (tree op)
3925 tree type = TREE_TYPE (op);
3926 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3927 || NON_SAT_FIXED_POINT_TYPE_P (type)
3928 || (flag_associative_math && FLOAT_TYPE_P (type)))
3929 return true;
3930 return false;
3933 /* Break up subtract operations in block BB.
3935 We do this top down because we don't know whether the subtract is
3936 part of a possible chain of reassociation except at the top.
3938 IE given
3939 d = f + g
3940 c = a + e
3941 b = c - d
3942 q = b - r
3943 k = t - q
3945 we want to break up k = t - q, but we won't until we've transformed q
3946 = b - r, which won't be broken up until we transform b = c - d.
3948 En passant, clear the GIMPLE visited flag on every statement
3949 and set UIDs within each basic block. */
3951 static void
3952 break_up_subtract_bb (basic_block bb)
3954 gimple_stmt_iterator gsi;
3955 basic_block son;
3956 unsigned int uid = 1;
3958 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3960 gimple stmt = gsi_stmt (gsi);
3961 gimple_set_visited (stmt, false);
3962 gimple_set_uid (stmt, uid++);
3964 if (!is_gimple_assign (stmt)
3965 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3966 continue;
3968 /* Look for simple gimple subtract operations. */
3969 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
3971 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
3972 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
3973 continue;
3975 /* Check for a subtract used only in an addition. If this
3976 is the case, transform it into add of a negate for better
3977 reassociation. IE transform C = A-B into C = A + -B if C
3978 is only used in an addition. */
3979 if (should_break_up_subtract (stmt))
3980 break_up_subtract (stmt, &gsi);
3982 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
3983 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
3984 plus_negates.safe_push (gimple_assign_lhs (stmt));
3986 for (son = first_dom_son (CDI_DOMINATORS, bb);
3987 son;
3988 son = next_dom_son (CDI_DOMINATORS, son))
3989 break_up_subtract_bb (son);
3992 /* Used for repeated factor analysis. */
3993 struct repeat_factor_d
3995 /* An SSA name that occurs in a multiply chain. */
3996 tree factor;
3998 /* Cached rank of the factor. */
3999 unsigned rank;
4001 /* Number of occurrences of the factor in the chain. */
4002 HOST_WIDE_INT count;
4004 /* An SSA name representing the product of this factor and
4005 all factors appearing later in the repeated factor vector. */
4006 tree repr;
4009 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4010 typedef const struct repeat_factor_d *const_repeat_factor_t;
4013 static vec<repeat_factor> repeat_factor_vec;
4015 /* Used for sorting the repeat factor vector. Sort primarily by
4016 ascending occurrence count, secondarily by descending rank. */
4018 static int
4019 compare_repeat_factors (const void *x1, const void *x2)
4021 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4022 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4024 if (rf1->count != rf2->count)
4025 return rf1->count - rf2->count;
4027 return rf2->rank - rf1->rank;
4030 /* Look for repeated operands in OPS in the multiply tree rooted at
4031 STMT. Replace them with an optimal sequence of multiplies and powi
4032 builtin calls, and remove the used operands from OPS. Return an
4033 SSA name representing the value of the replacement sequence. */
4035 static tree
4036 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4038 unsigned i, j, vec_len;
4039 int ii;
4040 operand_entry_t oe;
4041 repeat_factor_t rf1, rf2;
4042 repeat_factor rfnew;
4043 tree result = NULL_TREE;
4044 tree target_ssa, iter_result;
4045 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4046 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4047 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4048 gimple mul_stmt, pow_stmt;
4050 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4051 target. */
4052 if (!powi_fndecl)
4053 return NULL_TREE;
4055 /* Allocate the repeated factor vector. */
4056 repeat_factor_vec.create (10);
4058 /* Scan the OPS vector for all SSA names in the product and build
4059 up a vector of occurrence counts for each factor. */
4060 FOR_EACH_VEC_ELT (*ops, i, oe)
4062 if (TREE_CODE (oe->op) == SSA_NAME)
4064 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4066 if (rf1->factor == oe->op)
4068 rf1->count += oe->count;
4069 break;
4073 if (j >= repeat_factor_vec.length ())
4075 rfnew.factor = oe->op;
4076 rfnew.rank = oe->rank;
4077 rfnew.count = oe->count;
4078 rfnew.repr = NULL_TREE;
4079 repeat_factor_vec.safe_push (rfnew);
4084 /* Sort the repeated factor vector by (a) increasing occurrence count,
4085 and (b) decreasing rank. */
4086 repeat_factor_vec.qsort (compare_repeat_factors);
4088 /* It is generally best to combine as many base factors as possible
4089 into a product before applying __builtin_powi to the result.
4090 However, the sort order chosen for the repeated factor vector
4091 allows us to cache partial results for the product of the base
4092 factors for subsequent use. When we already have a cached partial
4093 result from a previous iteration, it is best to make use of it
4094 before looking for another __builtin_pow opportunity.
4096 As an example, consider x * x * y * y * y * z * z * z * z.
4097 We want to first compose the product x * y * z, raise it to the
4098 second power, then multiply this by y * z, and finally multiply
4099 by z. This can be done in 5 multiplies provided we cache y * z
4100 for use in both expressions:
4102 t1 = y * z
4103 t2 = t1 * x
4104 t3 = t2 * t2
4105 t4 = t1 * t3
4106 result = t4 * z
4108 If we instead ignored the cached y * z and first multiplied by
4109 the __builtin_pow opportunity z * z, we would get the inferior:
4111 t1 = y * z
4112 t2 = t1 * x
4113 t3 = t2 * t2
4114 t4 = z * z
4115 t5 = t3 * t4
4116 result = t5 * y */
4118 vec_len = repeat_factor_vec.length ();
4120 /* Repeatedly look for opportunities to create a builtin_powi call. */
4121 while (true)
4123 HOST_WIDE_INT power;
4125 /* First look for the largest cached product of factors from
4126 preceding iterations. If found, create a builtin_powi for
4127 it if the minimum occurrence count for its factors is at
4128 least 2, or just use this cached product as our next
4129 multiplicand if the minimum occurrence count is 1. */
4130 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4132 if (rf1->repr && rf1->count > 0)
4133 break;
4136 if (j < vec_len)
4138 power = rf1->count;
4140 if (power == 1)
4142 iter_result = rf1->repr;
4144 if (dump_file && (dump_flags & TDF_DETAILS))
4146 unsigned elt;
4147 repeat_factor_t rf;
4148 fputs ("Multiplying by cached product ", dump_file);
4149 for (elt = j; elt < vec_len; elt++)
4151 rf = &repeat_factor_vec[elt];
4152 print_generic_expr (dump_file, rf->factor, 0);
4153 if (elt < vec_len - 1)
4154 fputs (" * ", dump_file);
4156 fputs ("\n", dump_file);
4159 else
4161 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4162 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4163 build_int_cst (integer_type_node,
4164 power));
4165 gimple_call_set_lhs (pow_stmt, iter_result);
4166 gimple_set_location (pow_stmt, gimple_location (stmt));
4167 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4169 if (dump_file && (dump_flags & TDF_DETAILS))
4171 unsigned elt;
4172 repeat_factor_t rf;
4173 fputs ("Building __builtin_pow call for cached product (",
4174 dump_file);
4175 for (elt = j; elt < vec_len; elt++)
4177 rf = &repeat_factor_vec[elt];
4178 print_generic_expr (dump_file, rf->factor, 0);
4179 if (elt < vec_len - 1)
4180 fputs (" * ", dump_file);
4182 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4183 power);
4187 else
4189 /* Otherwise, find the first factor in the repeated factor
4190 vector whose occurrence count is at least 2. If no such
4191 factor exists, there are no builtin_powi opportunities
4192 remaining. */
4193 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4195 if (rf1->count >= 2)
4196 break;
4199 if (j >= vec_len)
4200 break;
4202 power = rf1->count;
4204 if (dump_file && (dump_flags & TDF_DETAILS))
4206 unsigned elt;
4207 repeat_factor_t rf;
4208 fputs ("Building __builtin_pow call for (", dump_file);
4209 for (elt = j; elt < vec_len; elt++)
4211 rf = &repeat_factor_vec[elt];
4212 print_generic_expr (dump_file, rf->factor, 0);
4213 if (elt < vec_len - 1)
4214 fputs (" * ", dump_file);
4216 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4219 reassociate_stats.pows_created++;
4221 /* Visit each element of the vector in reverse order (so that
4222 high-occurrence elements are visited first, and within the
4223 same occurrence count, lower-ranked elements are visited
4224 first). Form a linear product of all elements in this order
4225 whose occurrencce count is at least that of element J.
4226 Record the SSA name representing the product of each element
4227 with all subsequent elements in the vector. */
4228 if (j == vec_len - 1)
4229 rf1->repr = rf1->factor;
4230 else
4232 for (ii = vec_len - 2; ii >= (int)j; ii--)
4234 tree op1, op2;
4236 rf1 = &repeat_factor_vec[ii];
4237 rf2 = &repeat_factor_vec[ii + 1];
4239 /* Init the last factor's representative to be itself. */
4240 if (!rf2->repr)
4241 rf2->repr = rf2->factor;
4243 op1 = rf1->factor;
4244 op2 = rf2->repr;
4246 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4247 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4248 target_ssa,
4249 op1, op2);
4250 gimple_set_location (mul_stmt, gimple_location (stmt));
4251 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4252 rf1->repr = target_ssa;
4254 /* Don't reprocess the multiply we just introduced. */
4255 gimple_set_visited (mul_stmt, true);
4259 /* Form a call to __builtin_powi for the maximum product
4260 just formed, raised to the power obtained earlier. */
4261 rf1 = &repeat_factor_vec[j];
4262 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4263 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4264 build_int_cst (integer_type_node,
4265 power));
4266 gimple_call_set_lhs (pow_stmt, iter_result);
4267 gimple_set_location (pow_stmt, gimple_location (stmt));
4268 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4271 /* If we previously formed at least one other builtin_powi call,
4272 form the product of this one and those others. */
4273 if (result)
4275 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4276 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4277 result, iter_result);
4278 gimple_set_location (mul_stmt, gimple_location (stmt));
4279 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4280 gimple_set_visited (mul_stmt, true);
4281 result = new_result;
4283 else
4284 result = iter_result;
4286 /* Decrement the occurrence count of each element in the product
4287 by the count found above, and remove this many copies of each
4288 factor from OPS. */
4289 for (i = j; i < vec_len; i++)
4291 unsigned k = power;
4292 unsigned n;
4294 rf1 = &repeat_factor_vec[i];
4295 rf1->count -= power;
4297 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4299 if (oe->op == rf1->factor)
4301 if (oe->count <= k)
4303 ops->ordered_remove (n);
4304 k -= oe->count;
4306 if (k == 0)
4307 break;
4309 else
4311 oe->count -= k;
4312 break;
4319 /* At this point all elements in the repeated factor vector have a
4320 remaining occurrence count of 0 or 1, and those with a count of 1
4321 don't have cached representatives. Re-sort the ops vector and
4322 clean up. */
4323 ops->qsort (sort_by_operand_rank);
4324 repeat_factor_vec.release ();
4326 /* Return the final product computed herein. Note that there may
4327 still be some elements with single occurrence count left in OPS;
4328 those will be handled by the normal reassociation logic. */
4329 return result;
4332 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4334 static void
4335 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4337 tree rhs1;
4339 if (dump_file && (dump_flags & TDF_DETAILS))
4341 fprintf (dump_file, "Transforming ");
4342 print_gimple_stmt (dump_file, stmt, 0, 0);
4345 rhs1 = gimple_assign_rhs1 (stmt);
4346 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4347 update_stmt (stmt);
4348 remove_visited_stmt_chain (rhs1);
4350 if (dump_file && (dump_flags & TDF_DETAILS))
4352 fprintf (dump_file, " into ");
4353 print_gimple_stmt (dump_file, stmt, 0, 0);
4357 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4359 static void
4360 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4361 tree rhs1, tree rhs2)
4363 if (dump_file && (dump_flags & TDF_DETAILS))
4365 fprintf (dump_file, "Transforming ");
4366 print_gimple_stmt (dump_file, stmt, 0, 0);
4369 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4370 update_stmt (gsi_stmt (*gsi));
4371 remove_visited_stmt_chain (rhs1);
4373 if (dump_file && (dump_flags & TDF_DETAILS))
4375 fprintf (dump_file, " into ");
4376 print_gimple_stmt (dump_file, stmt, 0, 0);
4380 /* Reassociate expressions in basic block BB and its post-dominator as
4381 children. */
4383 static void
4384 reassociate_bb (basic_block bb)
4386 gimple_stmt_iterator gsi;
4387 basic_block son;
4388 gimple stmt = last_stmt (bb);
4390 if (stmt && !gimple_visited_p (stmt))
4391 maybe_optimize_range_tests (stmt);
4393 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4395 stmt = gsi_stmt (gsi);
4397 if (is_gimple_assign (stmt)
4398 && !stmt_could_throw_p (stmt))
4400 tree lhs, rhs1, rhs2;
4401 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4403 /* If this is not a gimple binary expression, there is
4404 nothing for us to do with it. */
4405 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4406 continue;
4408 /* If this was part of an already processed statement,
4409 we don't need to touch it again. */
4410 if (gimple_visited_p (stmt))
4412 /* This statement might have become dead because of previous
4413 reassociations. */
4414 if (has_zero_uses (gimple_get_lhs (stmt)))
4416 gsi_remove (&gsi, true);
4417 release_defs (stmt);
4418 /* We might end up removing the last stmt above which
4419 places the iterator to the end of the sequence.
4420 Reset it to the last stmt in this case which might
4421 be the end of the sequence as well if we removed
4422 the last statement of the sequence. In which case
4423 we need to bail out. */
4424 if (gsi_end_p (gsi))
4426 gsi = gsi_last_bb (bb);
4427 if (gsi_end_p (gsi))
4428 break;
4431 continue;
4434 lhs = gimple_assign_lhs (stmt);
4435 rhs1 = gimple_assign_rhs1 (stmt);
4436 rhs2 = gimple_assign_rhs2 (stmt);
4438 /* For non-bit or min/max operations we can't associate
4439 all types. Verify that here. */
4440 if (rhs_code != BIT_IOR_EXPR
4441 && rhs_code != BIT_AND_EXPR
4442 && rhs_code != BIT_XOR_EXPR
4443 && rhs_code != MIN_EXPR
4444 && rhs_code != MAX_EXPR
4445 && (!can_reassociate_p (lhs)
4446 || !can_reassociate_p (rhs1)
4447 || !can_reassociate_p (rhs2)))
4448 continue;
4450 if (associative_tree_code (rhs_code))
4452 auto_vec<operand_entry_t> ops;
4453 tree powi_result = NULL_TREE;
4455 /* There may be no immediate uses left by the time we
4456 get here because we may have eliminated them all. */
4457 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4458 continue;
4460 gimple_set_visited (stmt, true);
4461 linearize_expr_tree (&ops, stmt, true, true);
4462 ops.qsort (sort_by_operand_rank);
4463 optimize_ops_list (rhs_code, &ops);
4464 if (undistribute_ops_list (rhs_code, &ops,
4465 loop_containing_stmt (stmt)))
4467 ops.qsort (sort_by_operand_rank);
4468 optimize_ops_list (rhs_code, &ops);
4471 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4472 optimize_range_tests (rhs_code, &ops);
4474 if (first_pass_instance
4475 && rhs_code == MULT_EXPR
4476 && flag_unsafe_math_optimizations)
4477 powi_result = attempt_builtin_powi (stmt, &ops);
4479 /* If the operand vector is now empty, all operands were
4480 consumed by the __builtin_powi optimization. */
4481 if (ops.length () == 0)
4482 transform_stmt_to_copy (&gsi, stmt, powi_result);
4483 else if (ops.length () == 1)
4485 tree last_op = ops.last ()->op;
4487 if (powi_result)
4488 transform_stmt_to_multiply (&gsi, stmt, last_op,
4489 powi_result);
4490 else
4491 transform_stmt_to_copy (&gsi, stmt, last_op);
4493 else
4495 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4496 int ops_num = ops.length ();
4497 int width = get_reassociation_width (ops_num, rhs_code, mode);
4498 tree new_lhs = lhs;
4500 if (dump_file && (dump_flags & TDF_DETAILS))
4501 fprintf (dump_file,
4502 "Width = %d was chosen for reassociation\n", width);
4504 if (width > 1
4505 && ops.length () > 3)
4506 rewrite_expr_tree_parallel (stmt, width, ops);
4507 else
4509 /* When there are three operands left, we want
4510 to make sure the ones that get the double
4511 binary op are chosen wisely. */
4512 int len = ops.length ();
4513 if (len >= 3)
4514 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4516 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4517 powi_result != NULL);
4520 /* If we combined some repeated factors into a
4521 __builtin_powi call, multiply that result by the
4522 reassociated operands. */
4523 if (powi_result)
4525 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4526 tree type = TREE_TYPE (lhs);
4527 tree target_ssa = make_temp_ssa_name (type, NULL,
4528 "reassocpow");
4529 gimple_set_lhs (lhs_stmt, target_ssa);
4530 update_stmt (lhs_stmt);
4531 if (lhs != new_lhs)
4532 target_ssa = new_lhs;
4533 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4534 powi_result,
4535 target_ssa);
4536 gimple_set_location (mul_stmt, gimple_location (stmt));
4537 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4543 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4544 son;
4545 son = next_dom_son (CDI_POST_DOMINATORS, son))
4546 reassociate_bb (son);
4549 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4550 void debug_ops_vector (vec<operand_entry_t> ops);
4552 /* Dump the operand entry vector OPS to FILE. */
4554 void
4555 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4557 operand_entry_t oe;
4558 unsigned int i;
4560 FOR_EACH_VEC_ELT (ops, i, oe)
4562 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4563 print_generic_expr (file, oe->op, 0);
4567 /* Dump the operand entry vector OPS to STDERR. */
4569 DEBUG_FUNCTION void
4570 debug_ops_vector (vec<operand_entry_t> ops)
4572 dump_ops_vector (stderr, ops);
4575 static void
4576 do_reassoc (void)
4578 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4579 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4582 /* Initialize the reassociation pass. */
4584 static void
4585 init_reassoc (void)
4587 int i;
4588 long rank = 2;
4589 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4591 /* Find the loops, so that we can prevent moving calculations in
4592 them. */
4593 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4595 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4597 operand_entry_pool = create_alloc_pool ("operand entry pool",
4598 sizeof (struct operand_entry), 30);
4599 next_operand_entry_id = 0;
4601 /* Reverse RPO (Reverse Post Order) will give us something where
4602 deeper loops come later. */
4603 pre_and_rev_post_order_compute (NULL, bbs, false);
4604 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4605 operand_rank = pointer_map_create ();
4607 /* Give each default definition a distinct rank. This includes
4608 parameters and the static chain. Walk backwards over all
4609 SSA names so that we get proper rank ordering according
4610 to tree_swap_operands_p. */
4611 for (i = num_ssa_names - 1; i > 0; --i)
4613 tree name = ssa_name (i);
4614 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4615 insert_operand_rank (name, ++rank);
4618 /* Set up rank for each BB */
4619 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4620 bb_rank[bbs[i]] = ++rank << 16;
4622 free (bbs);
4623 calculate_dominance_info (CDI_POST_DOMINATORS);
4624 plus_negates = vNULL;
4627 /* Cleanup after the reassociation pass, and print stats if
4628 requested. */
4630 static void
4631 fini_reassoc (void)
4633 statistics_counter_event (cfun, "Linearized",
4634 reassociate_stats.linearized);
4635 statistics_counter_event (cfun, "Constants eliminated",
4636 reassociate_stats.constants_eliminated);
4637 statistics_counter_event (cfun, "Ops eliminated",
4638 reassociate_stats.ops_eliminated);
4639 statistics_counter_event (cfun, "Statements rewritten",
4640 reassociate_stats.rewritten);
4641 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4642 reassociate_stats.pows_encountered);
4643 statistics_counter_event (cfun, "Built-in powi calls created",
4644 reassociate_stats.pows_created);
4646 pointer_map_destroy (operand_rank);
4647 free_alloc_pool (operand_entry_pool);
4648 free (bb_rank);
4649 plus_negates.release ();
4650 free_dominance_info (CDI_POST_DOMINATORS);
4651 loop_optimizer_finalize ();
4654 /* Gate and execute functions for Reassociation. */
4656 static unsigned int
4657 execute_reassoc (void)
4659 init_reassoc ();
4661 do_reassoc ();
4662 repropagate_negates ();
4664 fini_reassoc ();
4665 return 0;
4668 static bool
4669 gate_tree_ssa_reassoc (void)
4671 return flag_tree_reassoc != 0;
4674 namespace {
4676 const pass_data pass_data_reassoc =
4678 GIMPLE_PASS, /* type */
4679 "reassoc", /* name */
4680 OPTGROUP_NONE, /* optinfo_flags */
4681 true, /* has_gate */
4682 true, /* has_execute */
4683 TV_TREE_REASSOC, /* tv_id */
4684 ( PROP_cfg | PROP_ssa ), /* properties_required */
4685 0, /* properties_provided */
4686 0, /* properties_destroyed */
4687 0, /* todo_flags_start */
4688 ( TODO_verify_ssa
4689 | TODO_update_ssa_only_virtuals
4690 | TODO_verify_flow ), /* todo_flags_finish */
4693 class pass_reassoc : public gimple_opt_pass
4695 public:
4696 pass_reassoc (gcc::context *ctxt)
4697 : gimple_opt_pass (pass_data_reassoc, ctxt)
4700 /* opt_pass methods: */
4701 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4702 bool gate () { return gate_tree_ssa_reassoc (); }
4703 unsigned int execute () { return execute_reassoc (); }
4705 }; // class pass_reassoc
4707 } // anon namespace
4709 gimple_opt_pass *
4710 make_pass_reassoc (gcc::context *ctxt)
4712 return new pass_reassoc (ctxt);