IPA ICF, part 4/5
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob4714a389eb35752d1892d25d337772bf7000b5bc
1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-inline.h"
33 #include "hash-map.h"
34 #include "tree-ssa-alias.h"
35 #include "internal-fn.h"
36 #include "gimple-fold.h"
37 #include "tree-eh.h"
38 #include "gimple-expr.h"
39 #include "is-a.h"
40 #include "gimple.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "gimple-ssa.h"
44 #include "tree-cfg.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
51 #include "expr.h"
52 #include "tree-dfa.h"
53 #include "tree-ssa.h"
54 #include "tree-iterator.h"
55 #include "tree-pass.h"
56 #include "alloc-pool.h"
57 #include "langhooks.h"
58 #include "cfgloop.h"
59 #include "flags.h"
60 #include "target.h"
61 #include "params.h"
62 #include "diagnostic-core.h"
63 #include "builtins.h"
65 /* This is a simple global reassociation pass. It is, in part, based
66 on the LLVM pass of the same name (They do some things more/less
67 than we do, in different orders, etc).
69 It consists of five steps:
71 1. Breaking up subtract operations into addition + negate, where
72 it would promote the reassociation of adds.
74 2. Left linearization of the expression trees, so that (A+B)+(C+D)
75 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
76 During linearization, we place the operands of the binary
77 expressions into a vector of operand_entry_t
79 3. Optimization of the operand lists, eliminating things like a +
80 -a, a & a, etc.
82 3a. Combine repeated factors with the same occurrence counts
83 into a __builtin_powi call that will later be optimized into
84 an optimal number of multiplies.
86 4. Rewrite the expression trees we linearized and optimized so
87 they are in proper rank order.
89 5. Repropagate negates, as nothing else will clean it up ATM.
91 A bit of theory on #4, since nobody seems to write anything down
92 about why it makes sense to do it the way they do it:
94 We could do this much nicer theoretically, but don't (for reasons
95 explained after how to do it theoretically nice :P).
97 In order to promote the most redundancy elimination, you want
98 binary expressions whose operands are the same rank (or
99 preferably, the same value) exposed to the redundancy eliminator,
100 for possible elimination.
102 So the way to do this if we really cared, is to build the new op
103 tree from the leaves to the roots, merging as you go, and putting the
104 new op on the end of the worklist, until you are left with one
105 thing on the worklist.
107 IE if you have to rewrite the following set of operands (listed with
108 rank in parentheses), with opcode PLUS_EXPR:
110 a (1), b (1), c (1), d (2), e (2)
113 We start with our merge worklist empty, and the ops list with all of
114 those on it.
116 You want to first merge all leaves of the same rank, as much as
117 possible.
119 So first build a binary op of
121 mergetmp = a + b, and put "mergetmp" on the merge worklist.
123 Because there is no three operand form of PLUS_EXPR, c is not going to
124 be exposed to redundancy elimination as a rank 1 operand.
126 So you might as well throw it on the merge worklist (you could also
127 consider it to now be a rank two operand, and merge it with d and e,
128 but in this case, you then have evicted e from a binary op. So at
129 least in this situation, you can't win.)
131 Then build a binary op of d + e
132 mergetmp2 = d + e
134 and put mergetmp2 on the merge worklist.
136 so merge worklist = {mergetmp, c, mergetmp2}
138 Continue building binary ops of these operations until you have only
139 one operation left on the worklist.
141 So we have
143 build binary op
144 mergetmp3 = mergetmp + c
146 worklist = {mergetmp2, mergetmp3}
148 mergetmp4 = mergetmp2 + mergetmp3
150 worklist = {mergetmp4}
152 because we have one operation left, we can now just set the original
153 statement equal to the result of that operation.
155 This will at least expose a + b and d + e to redundancy elimination
156 as binary operations.
158 For extra points, you can reuse the old statements to build the
159 mergetmps, since you shouldn't run out.
161 So why don't we do this?
163 Because it's expensive, and rarely will help. Most trees we are
164 reassociating have 3 or less ops. If they have 2 ops, they already
165 will be written into a nice single binary op. If you have 3 ops, a
166 single simple check suffices to tell you whether the first two are of the
167 same rank. If so, you know to order it
169 mergetmp = op1 + op2
170 newstmt = mergetmp + op3
172 instead of
173 mergetmp = op2 + op3
174 newstmt = mergetmp + op1
176 If all three are of the same rank, you can't expose them all in a
177 single binary operator anyway, so the above is *still* the best you
178 can do.
180 Thus, this is what we do. When we have three ops left, we check to see
181 what order to put them in, and call it a day. As a nod to vector sum
182 reduction, we check if any of the ops are really a phi node that is a
183 destructive update for the associating op, and keep the destructive
184 update together for vector sum reduction recognition. */
187 /* Statistics */
188 static struct
190 int linearized;
191 int constants_eliminated;
192 int ops_eliminated;
193 int rewritten;
194 int pows_encountered;
195 int pows_created;
196 } reassociate_stats;
198 /* Operator, rank pair. */
199 typedef struct operand_entry
201 unsigned int rank;
202 int id;
203 tree op;
204 unsigned int count;
205 } *operand_entry_t;
207 static alloc_pool operand_entry_pool;
209 /* This is used to assign a unique ID to each struct operand_entry
210 so that qsort results are identical on different hosts. */
211 static int next_operand_entry_id;
213 /* Starting rank number for a given basic block, so that we can rank
214 operations using unmovable instructions in that BB based on the bb
215 depth. */
216 static long *bb_rank;
218 /* Operand->rank hashtable. */
219 static hash_map<tree, long> *operand_rank;
221 /* Forward decls. */
222 static long get_rank (tree);
223 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
226 possibly added by gsi_remove. */
228 bool
229 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
231 gimple stmt = gsi_stmt (*gsi);
233 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
234 return gsi_remove (gsi, true);
236 gimple_stmt_iterator prev = *gsi;
237 gsi_prev (&prev);
238 unsigned uid = gimple_uid (stmt);
239 basic_block bb = gimple_bb (stmt);
240 bool ret = gsi_remove (gsi, true);
241 if (!gsi_end_p (prev))
242 gsi_next (&prev);
243 else
244 prev = gsi_start_bb (bb);
245 gimple end_stmt = gsi_stmt (*gsi);
246 while ((stmt = gsi_stmt (prev)) != end_stmt)
248 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
249 gimple_set_uid (stmt, uid);
250 gsi_next (&prev);
252 return ret;
255 /* Bias amount for loop-carried phis. We want this to be larger than
256 the depth of any reassociation tree we can see, but not larger than
257 the rank difference between two blocks. */
258 #define PHI_LOOP_BIAS (1 << 15)
260 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
261 an innermost loop, and the phi has only a single use which is inside
262 the loop, then the rank is the block rank of the loop latch plus an
263 extra bias for the loop-carried dependence. This causes expressions
264 calculated into an accumulator variable to be independent for each
265 iteration of the loop. If STMT is some other phi, the rank is the
266 block rank of its containing block. */
267 static long
268 phi_rank (gimple stmt)
270 basic_block bb = gimple_bb (stmt);
271 struct loop *father = bb->loop_father;
272 tree res;
273 unsigned i;
274 use_operand_p use;
275 gimple use_stmt;
277 /* We only care about real loops (those with a latch). */
278 if (!father->latch)
279 return bb_rank[bb->index];
281 /* Interesting phis must be in headers of innermost loops. */
282 if (bb != father->header
283 || father->inner)
284 return bb_rank[bb->index];
286 /* Ignore virtual SSA_NAMEs. */
287 res = gimple_phi_result (stmt);
288 if (virtual_operand_p (res))
289 return bb_rank[bb->index];
291 /* The phi definition must have a single use, and that use must be
292 within the loop. Otherwise this isn't an accumulator pattern. */
293 if (!single_imm_use (res, &use, &use_stmt)
294 || gimple_bb (use_stmt)->loop_father != father)
295 return bb_rank[bb->index];
297 /* Look for phi arguments from within the loop. If found, bias this phi. */
298 for (i = 0; i < gimple_phi_num_args (stmt); i++)
300 tree arg = gimple_phi_arg_def (stmt, i);
301 if (TREE_CODE (arg) == SSA_NAME
302 && !SSA_NAME_IS_DEFAULT_DEF (arg))
304 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
305 if (gimple_bb (def_stmt)->loop_father == father)
306 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
310 /* Must be an uninteresting phi. */
311 return bb_rank[bb->index];
314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
315 loop-carried dependence of an innermost loop, return TRUE; else
316 return FALSE. */
317 static bool
318 loop_carried_phi (tree exp)
320 gimple phi_stmt;
321 long block_rank;
323 if (TREE_CODE (exp) != SSA_NAME
324 || SSA_NAME_IS_DEFAULT_DEF (exp))
325 return false;
327 phi_stmt = SSA_NAME_DEF_STMT (exp);
329 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
330 return false;
332 /* Non-loop-carried phis have block rank. Loop-carried phis have
333 an additional bias added in. If this phi doesn't have block rank,
334 it's biased and should not be propagated. */
335 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
337 if (phi_rank (phi_stmt) != block_rank)
338 return true;
340 return false;
343 /* Return the maximum of RANK and the rank that should be propagated
344 from expression OP. For most operands, this is just the rank of OP.
345 For loop-carried phis, the value is zero to avoid undoing the bias
346 in favor of the phi. */
347 static long
348 propagate_rank (long rank, tree op)
350 long op_rank;
352 if (loop_carried_phi (op))
353 return rank;
355 op_rank = get_rank (op);
357 return MAX (rank, op_rank);
360 /* Look up the operand rank structure for expression E. */
362 static inline long
363 find_operand_rank (tree e)
365 long *slot = operand_rank->get (e);
366 return slot ? *slot : -1;
369 /* Insert {E,RANK} into the operand rank hashtable. */
371 static inline void
372 insert_operand_rank (tree e, long rank)
374 gcc_assert (rank > 0);
375 gcc_assert (!operand_rank->put (e, rank));
378 /* Given an expression E, return the rank of the expression. */
380 static long
381 get_rank (tree e)
383 /* Constants have rank 0. */
384 if (is_gimple_min_invariant (e))
385 return 0;
387 /* SSA_NAME's have the rank of the expression they are the result
389 For globals and uninitialized values, the rank is 0.
390 For function arguments, use the pre-setup rank.
391 For PHI nodes, stores, asm statements, etc, we use the rank of
392 the BB.
393 For simple operations, the rank is the maximum rank of any of
394 its operands, or the bb_rank, whichever is less.
395 I make no claims that this is optimal, however, it gives good
396 results. */
398 /* We make an exception to the normal ranking system to break
399 dependences of accumulator variables in loops. Suppose we
400 have a simple one-block loop containing:
402 x_1 = phi(x_0, x_2)
403 b = a + x_1
404 c = b + d
405 x_2 = c + e
407 As shown, each iteration of the calculation into x is fully
408 dependent upon the iteration before it. We would prefer to
409 see this in the form:
411 x_1 = phi(x_0, x_2)
412 b = a + d
413 c = b + e
414 x_2 = c + x_1
416 If the loop is unrolled, the calculations of b and c from
417 different iterations can be interleaved.
419 To obtain this result during reassociation, we bias the rank
420 of the phi definition x_1 upward, when it is recognized as an
421 accumulator pattern. The artificial rank causes it to be
422 added last, providing the desired independence. */
424 if (TREE_CODE (e) == SSA_NAME)
426 gimple stmt;
427 long rank;
428 int i, n;
429 tree op;
431 if (SSA_NAME_IS_DEFAULT_DEF (e))
432 return find_operand_rank (e);
434 stmt = SSA_NAME_DEF_STMT (e);
435 if (gimple_code (stmt) == GIMPLE_PHI)
436 return phi_rank (stmt);
438 if (!is_gimple_assign (stmt)
439 || gimple_vdef (stmt))
440 return bb_rank[gimple_bb (stmt)->index];
442 /* If we already have a rank for this expression, use that. */
443 rank = find_operand_rank (e);
444 if (rank != -1)
445 return rank;
447 /* Otherwise, find the maximum rank for the operands. As an
448 exception, remove the bias from loop-carried phis when propagating
449 the rank so that dependent operations are not also biased. */
450 rank = 0;
451 if (gimple_assign_single_p (stmt))
453 tree rhs = gimple_assign_rhs1 (stmt);
454 n = TREE_OPERAND_LENGTH (rhs);
455 if (n == 0)
456 rank = propagate_rank (rank, rhs);
457 else
459 for (i = 0; i < n; i++)
461 op = TREE_OPERAND (rhs, i);
463 if (op != NULL_TREE)
464 rank = propagate_rank (rank, op);
468 else
470 n = gimple_num_ops (stmt);
471 for (i = 1; i < n; i++)
473 op = gimple_op (stmt, i);
474 gcc_assert (op);
475 rank = propagate_rank (rank, op);
479 if (dump_file && (dump_flags & TDF_DETAILS))
481 fprintf (dump_file, "Rank for ");
482 print_generic_expr (dump_file, e, 0);
483 fprintf (dump_file, " is %ld\n", (rank + 1));
486 /* Note the rank in the hashtable so we don't recompute it. */
487 insert_operand_rank (e, (rank + 1));
488 return (rank + 1);
491 /* Globals, etc, are rank 0 */
492 return 0;
496 /* We want integer ones to end up last no matter what, since they are
497 the ones we can do the most with. */
498 #define INTEGER_CONST_TYPE 1 << 3
499 #define FLOAT_CONST_TYPE 1 << 2
500 #define OTHER_CONST_TYPE 1 << 1
502 /* Classify an invariant tree into integer, float, or other, so that
503 we can sort them to be near other constants of the same type. */
504 static inline int
505 constant_type (tree t)
507 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
508 return INTEGER_CONST_TYPE;
509 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
510 return FLOAT_CONST_TYPE;
511 else
512 return OTHER_CONST_TYPE;
515 /* qsort comparison function to sort operand entries PA and PB by rank
516 so that the sorted array is ordered by rank in decreasing order. */
517 static int
518 sort_by_operand_rank (const void *pa, const void *pb)
520 const operand_entry_t oea = *(const operand_entry_t *)pa;
521 const operand_entry_t oeb = *(const operand_entry_t *)pb;
523 /* It's nicer for optimize_expression if constants that are likely
524 to fold when added/multiplied//whatever are put next to each
525 other. Since all constants have rank 0, order them by type. */
526 if (oeb->rank == 0 && oea->rank == 0)
528 if (constant_type (oeb->op) != constant_type (oea->op))
529 return constant_type (oeb->op) - constant_type (oea->op);
530 else
531 /* To make sorting result stable, we use unique IDs to determine
532 order. */
533 return oeb->id - oea->id;
536 /* Lastly, make sure the versions that are the same go next to each
537 other. */
538 if ((oeb->rank - oea->rank == 0)
539 && TREE_CODE (oea->op) == SSA_NAME
540 && TREE_CODE (oeb->op) == SSA_NAME)
542 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
543 versions of removed SSA_NAMEs, so if possible, prefer to sort
544 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
545 See PR60418. */
546 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
547 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
548 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
550 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
551 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
552 basic_block bba = gimple_bb (stmta);
553 basic_block bbb = gimple_bb (stmtb);
554 if (bbb != bba)
556 if (bb_rank[bbb->index] != bb_rank[bba->index])
557 return bb_rank[bbb->index] - bb_rank[bba->index];
559 else
561 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
562 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
563 if (da != db)
564 return da ? 1 : -1;
568 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
569 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
570 else
571 return oeb->id - oea->id;
574 if (oeb->rank != oea->rank)
575 return oeb->rank - oea->rank;
576 else
577 return oeb->id - oea->id;
580 /* Add an operand entry to *OPS for the tree operand OP. */
582 static void
583 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
585 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
587 oe->op = op;
588 oe->rank = get_rank (op);
589 oe->id = next_operand_entry_id++;
590 oe->count = 1;
591 ops->safe_push (oe);
594 /* Add an operand entry to *OPS for the tree operand OP with repeat
595 count REPEAT. */
597 static void
598 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
599 HOST_WIDE_INT repeat)
601 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
603 oe->op = op;
604 oe->rank = get_rank (op);
605 oe->id = next_operand_entry_id++;
606 oe->count = repeat;
607 ops->safe_push (oe);
609 reassociate_stats.pows_encountered++;
612 /* Return true if STMT is reassociable operation containing a binary
613 operation with tree code CODE, and is inside LOOP. */
615 static bool
616 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
618 basic_block bb = gimple_bb (stmt);
620 if (gimple_bb (stmt) == NULL)
621 return false;
623 if (!flow_bb_inside_loop_p (loop, bb))
624 return false;
626 if (is_gimple_assign (stmt)
627 && gimple_assign_rhs_code (stmt) == code
628 && has_single_use (gimple_assign_lhs (stmt)))
629 return true;
631 return false;
635 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
636 operand of the negate operation. Otherwise, return NULL. */
638 static tree
639 get_unary_op (tree name, enum tree_code opcode)
641 gimple stmt = SSA_NAME_DEF_STMT (name);
643 if (!is_gimple_assign (stmt))
644 return NULL_TREE;
646 if (gimple_assign_rhs_code (stmt) == opcode)
647 return gimple_assign_rhs1 (stmt);
648 return NULL_TREE;
651 /* If CURR and LAST are a pair of ops that OPCODE allows us to
652 eliminate through equivalences, do so, remove them from OPS, and
653 return true. Otherwise, return false. */
655 static bool
656 eliminate_duplicate_pair (enum tree_code opcode,
657 vec<operand_entry_t> *ops,
658 bool *all_done,
659 unsigned int i,
660 operand_entry_t curr,
661 operand_entry_t last)
664 /* If we have two of the same op, and the opcode is & |, min, or max,
665 we can eliminate one of them.
666 If we have two of the same op, and the opcode is ^, we can
667 eliminate both of them. */
669 if (last && last->op == curr->op)
671 switch (opcode)
673 case MAX_EXPR:
674 case MIN_EXPR:
675 case BIT_IOR_EXPR:
676 case BIT_AND_EXPR:
677 if (dump_file && (dump_flags & TDF_DETAILS))
679 fprintf (dump_file, "Equivalence: ");
680 print_generic_expr (dump_file, curr->op, 0);
681 fprintf (dump_file, " [&|minmax] ");
682 print_generic_expr (dump_file, last->op, 0);
683 fprintf (dump_file, " -> ");
684 print_generic_stmt (dump_file, last->op, 0);
687 ops->ordered_remove (i);
688 reassociate_stats.ops_eliminated ++;
690 return true;
692 case BIT_XOR_EXPR:
693 if (dump_file && (dump_flags & TDF_DETAILS))
695 fprintf (dump_file, "Equivalence: ");
696 print_generic_expr (dump_file, curr->op, 0);
697 fprintf (dump_file, " ^ ");
698 print_generic_expr (dump_file, last->op, 0);
699 fprintf (dump_file, " -> nothing\n");
702 reassociate_stats.ops_eliminated += 2;
704 if (ops->length () == 2)
706 ops->create (0);
707 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
708 *all_done = true;
710 else
712 ops->ordered_remove (i-1);
713 ops->ordered_remove (i-1);
716 return true;
718 default:
719 break;
722 return false;
725 static vec<tree> plus_negates;
727 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
728 expression, look in OPS for a corresponding positive operation to cancel
729 it out. If we find one, remove the other from OPS, replace
730 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
731 return false. */
733 static bool
734 eliminate_plus_minus_pair (enum tree_code opcode,
735 vec<operand_entry_t> *ops,
736 unsigned int currindex,
737 operand_entry_t curr)
739 tree negateop;
740 tree notop;
741 unsigned int i;
742 operand_entry_t oe;
744 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
745 return false;
747 negateop = get_unary_op (curr->op, NEGATE_EXPR);
748 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
749 if (negateop == NULL_TREE && notop == NULL_TREE)
750 return false;
752 /* Any non-negated version will have a rank that is one less than
753 the current rank. So once we hit those ranks, if we don't find
754 one, we can stop. */
756 for (i = currindex + 1;
757 ops->iterate (i, &oe)
758 && oe->rank >= curr->rank - 1 ;
759 i++)
761 if (oe->op == negateop)
764 if (dump_file && (dump_flags & TDF_DETAILS))
766 fprintf (dump_file, "Equivalence: ");
767 print_generic_expr (dump_file, negateop, 0);
768 fprintf (dump_file, " + -");
769 print_generic_expr (dump_file, oe->op, 0);
770 fprintf (dump_file, " -> 0\n");
773 ops->ordered_remove (i);
774 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
775 ops->ordered_remove (currindex);
776 reassociate_stats.ops_eliminated ++;
778 return true;
780 else if (oe->op == notop)
782 tree op_type = TREE_TYPE (oe->op);
784 if (dump_file && (dump_flags & TDF_DETAILS))
786 fprintf (dump_file, "Equivalence: ");
787 print_generic_expr (dump_file, notop, 0);
788 fprintf (dump_file, " + ~");
789 print_generic_expr (dump_file, oe->op, 0);
790 fprintf (dump_file, " -> -1\n");
793 ops->ordered_remove (i);
794 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
795 ops->ordered_remove (currindex);
796 reassociate_stats.ops_eliminated ++;
798 return true;
802 /* CURR->OP is a negate expr in a plus expr: save it for later
803 inspection in repropagate_negates(). */
804 if (negateop != NULL_TREE)
805 plus_negates.safe_push (curr->op);
807 return false;
810 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
811 bitwise not expression, look in OPS for a corresponding operand to
812 cancel it out. If we find one, remove the other from OPS, replace
813 OPS[CURRINDEX] with 0, and return true. Otherwise, return
814 false. */
816 static bool
817 eliminate_not_pairs (enum tree_code opcode,
818 vec<operand_entry_t> *ops,
819 unsigned int currindex,
820 operand_entry_t curr)
822 tree notop;
823 unsigned int i;
824 operand_entry_t oe;
826 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
827 || TREE_CODE (curr->op) != SSA_NAME)
828 return false;
830 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
831 if (notop == NULL_TREE)
832 return false;
834 /* Any non-not version will have a rank that is one less than
835 the current rank. So once we hit those ranks, if we don't find
836 one, we can stop. */
838 for (i = currindex + 1;
839 ops->iterate (i, &oe)
840 && oe->rank >= curr->rank - 1;
841 i++)
843 if (oe->op == notop)
845 if (dump_file && (dump_flags & TDF_DETAILS))
847 fprintf (dump_file, "Equivalence: ");
848 print_generic_expr (dump_file, notop, 0);
849 if (opcode == BIT_AND_EXPR)
850 fprintf (dump_file, " & ~");
851 else if (opcode == BIT_IOR_EXPR)
852 fprintf (dump_file, " | ~");
853 print_generic_expr (dump_file, oe->op, 0);
854 if (opcode == BIT_AND_EXPR)
855 fprintf (dump_file, " -> 0\n");
856 else if (opcode == BIT_IOR_EXPR)
857 fprintf (dump_file, " -> -1\n");
860 if (opcode == BIT_AND_EXPR)
861 oe->op = build_zero_cst (TREE_TYPE (oe->op));
862 else if (opcode == BIT_IOR_EXPR)
863 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
865 reassociate_stats.ops_eliminated += ops->length () - 1;
866 ops->truncate (0);
867 ops->quick_push (oe);
868 return true;
872 return false;
875 /* Use constant value that may be present in OPS to try to eliminate
876 operands. Note that this function is only really used when we've
877 eliminated ops for other reasons, or merged constants. Across
878 single statements, fold already does all of this, plus more. There
879 is little point in duplicating logic, so I've only included the
880 identities that I could ever construct testcases to trigger. */
882 static void
883 eliminate_using_constants (enum tree_code opcode,
884 vec<operand_entry_t> *ops)
886 operand_entry_t oelast = ops->last ();
887 tree type = TREE_TYPE (oelast->op);
889 if (oelast->rank == 0
890 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
892 switch (opcode)
894 case BIT_AND_EXPR:
895 if (integer_zerop (oelast->op))
897 if (ops->length () != 1)
899 if (dump_file && (dump_flags & TDF_DETAILS))
900 fprintf (dump_file, "Found & 0, removing all other ops\n");
902 reassociate_stats.ops_eliminated += ops->length () - 1;
904 ops->truncate (0);
905 ops->quick_push (oelast);
906 return;
909 else if (integer_all_onesp (oelast->op))
911 if (ops->length () != 1)
913 if (dump_file && (dump_flags & TDF_DETAILS))
914 fprintf (dump_file, "Found & -1, removing\n");
915 ops->pop ();
916 reassociate_stats.ops_eliminated++;
919 break;
920 case BIT_IOR_EXPR:
921 if (integer_all_onesp (oelast->op))
923 if (ops->length () != 1)
925 if (dump_file && (dump_flags & TDF_DETAILS))
926 fprintf (dump_file, "Found | -1, removing all other ops\n");
928 reassociate_stats.ops_eliminated += ops->length () - 1;
930 ops->truncate (0);
931 ops->quick_push (oelast);
932 return;
935 else if (integer_zerop (oelast->op))
937 if (ops->length () != 1)
939 if (dump_file && (dump_flags & TDF_DETAILS))
940 fprintf (dump_file, "Found | 0, removing\n");
941 ops->pop ();
942 reassociate_stats.ops_eliminated++;
945 break;
946 case MULT_EXPR:
947 if (integer_zerop (oelast->op)
948 || (FLOAT_TYPE_P (type)
949 && !HONOR_NANS (TYPE_MODE (type))
950 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
951 && real_zerop (oelast->op)))
953 if (ops->length () != 1)
955 if (dump_file && (dump_flags & TDF_DETAILS))
956 fprintf (dump_file, "Found * 0, removing all other ops\n");
958 reassociate_stats.ops_eliminated += ops->length () - 1;
959 ops->truncate (1);
960 ops->quick_push (oelast);
961 return;
964 else if (integer_onep (oelast->op)
965 || (FLOAT_TYPE_P (type)
966 && !HONOR_SNANS (TYPE_MODE (type))
967 && real_onep (oelast->op)))
969 if (ops->length () != 1)
971 if (dump_file && (dump_flags & TDF_DETAILS))
972 fprintf (dump_file, "Found * 1, removing\n");
973 ops->pop ();
974 reassociate_stats.ops_eliminated++;
975 return;
978 break;
979 case BIT_XOR_EXPR:
980 case PLUS_EXPR:
981 case MINUS_EXPR:
982 if (integer_zerop (oelast->op)
983 || (FLOAT_TYPE_P (type)
984 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
985 && fold_real_zero_addition_p (type, oelast->op,
986 opcode == MINUS_EXPR)))
988 if (ops->length () != 1)
990 if (dump_file && (dump_flags & TDF_DETAILS))
991 fprintf (dump_file, "Found [|^+] 0, removing\n");
992 ops->pop ();
993 reassociate_stats.ops_eliminated++;
994 return;
997 break;
998 default:
999 break;
1005 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1006 bool, bool);
1008 /* Structure for tracking and counting operands. */
1009 typedef struct oecount_s {
1010 int cnt;
1011 int id;
1012 enum tree_code oecode;
1013 tree op;
1014 } oecount;
1017 /* The heap for the oecount hashtable and the sorted list of operands. */
1018 static vec<oecount> cvec;
1021 /* Oecount hashtable helpers. */
1023 struct oecount_hasher
1025 typedef int value_type;
1026 typedef int compare_type;
1027 typedef int store_values_directly;
1028 static inline hashval_t hash (const value_type &);
1029 static inline bool equal (const value_type &, const compare_type &);
1030 static bool is_deleted (int &v) { return v == 1; }
1031 static void mark_deleted (int &e) { e = 1; }
1032 static bool is_empty (int &v) { return v == 0; }
1033 static void mark_empty (int &e) { e = 0; }
1034 static void remove (int &) {}
1037 /* Hash function for oecount. */
1039 inline hashval_t
1040 oecount_hasher::hash (const value_type &p)
1042 const oecount *c = &cvec[p - 42];
1043 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1046 /* Comparison function for oecount. */
1048 inline bool
1049 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1051 const oecount *c1 = &cvec[p1 - 42];
1052 const oecount *c2 = &cvec[p2 - 42];
1053 return (c1->oecode == c2->oecode
1054 && c1->op == c2->op);
1057 /* Comparison function for qsort sorting oecount elements by count. */
1059 static int
1060 oecount_cmp (const void *p1, const void *p2)
1062 const oecount *c1 = (const oecount *)p1;
1063 const oecount *c2 = (const oecount *)p2;
1064 if (c1->cnt != c2->cnt)
1065 return c1->cnt - c2->cnt;
1066 else
1067 /* If counts are identical, use unique IDs to stabilize qsort. */
1068 return c1->id - c2->id;
1071 /* Return TRUE iff STMT represents a builtin call that raises OP
1072 to some exponent. */
1074 static bool
1075 stmt_is_power_of_op (gimple stmt, tree op)
1077 tree fndecl;
1079 if (!is_gimple_call (stmt))
1080 return false;
1082 fndecl = gimple_call_fndecl (stmt);
1084 if (!fndecl
1085 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1086 return false;
1088 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1090 CASE_FLT_FN (BUILT_IN_POW):
1091 CASE_FLT_FN (BUILT_IN_POWI):
1092 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1094 default:
1095 return false;
1099 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1100 in place and return the result. Assumes that stmt_is_power_of_op
1101 was previously called for STMT and returned TRUE. */
1103 static HOST_WIDE_INT
1104 decrement_power (gimple stmt)
1106 REAL_VALUE_TYPE c, cint;
1107 HOST_WIDE_INT power;
1108 tree arg1;
1110 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1112 CASE_FLT_FN (BUILT_IN_POW):
1113 arg1 = gimple_call_arg (stmt, 1);
1114 c = TREE_REAL_CST (arg1);
1115 power = real_to_integer (&c) - 1;
1116 real_from_integer (&cint, VOIDmode, power, SIGNED);
1117 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1118 return power;
1120 CASE_FLT_FN (BUILT_IN_POWI):
1121 arg1 = gimple_call_arg (stmt, 1);
1122 power = TREE_INT_CST_LOW (arg1) - 1;
1123 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1124 return power;
1126 default:
1127 gcc_unreachable ();
1131 /* Find the single immediate use of STMT's LHS, and replace it
1132 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1133 replace *DEF with OP as well. */
1135 static void
1136 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1138 tree lhs;
1139 gimple use_stmt;
1140 use_operand_p use;
1141 gimple_stmt_iterator gsi;
1143 if (is_gimple_call (stmt))
1144 lhs = gimple_call_lhs (stmt);
1145 else
1146 lhs = gimple_assign_lhs (stmt);
1148 gcc_assert (has_single_use (lhs));
1149 single_imm_use (lhs, &use, &use_stmt);
1150 if (lhs == *def)
1151 *def = op;
1152 SET_USE (use, op);
1153 if (TREE_CODE (op) != SSA_NAME)
1154 update_stmt (use_stmt);
1155 gsi = gsi_for_stmt (stmt);
1156 unlink_stmt_vdef (stmt);
1157 reassoc_remove_stmt (&gsi);
1158 release_defs (stmt);
1161 /* Walks the linear chain with result *DEF searching for an operation
1162 with operand OP and code OPCODE removing that from the chain. *DEF
1163 is updated if there is only one operand but no operation left. */
1165 static void
1166 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1168 gimple stmt = SSA_NAME_DEF_STMT (*def);
1172 tree name;
1174 if (opcode == MULT_EXPR
1175 && stmt_is_power_of_op (stmt, op))
1177 if (decrement_power (stmt) == 1)
1178 propagate_op_to_single_use (op, stmt, def);
1179 return;
1182 name = gimple_assign_rhs1 (stmt);
1184 /* If this is the operation we look for and one of the operands
1185 is ours simply propagate the other operand into the stmts
1186 single use. */
1187 if (gimple_assign_rhs_code (stmt) == opcode
1188 && (name == op
1189 || gimple_assign_rhs2 (stmt) == op))
1191 if (name == op)
1192 name = gimple_assign_rhs2 (stmt);
1193 propagate_op_to_single_use (name, stmt, def);
1194 return;
1197 /* We might have a multiply of two __builtin_pow* calls, and
1198 the operand might be hiding in the rightmost one. */
1199 if (opcode == MULT_EXPR
1200 && gimple_assign_rhs_code (stmt) == opcode
1201 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1202 && has_single_use (gimple_assign_rhs2 (stmt)))
1204 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1205 if (stmt_is_power_of_op (stmt2, op))
1207 if (decrement_power (stmt2) == 1)
1208 propagate_op_to_single_use (op, stmt2, def);
1209 return;
1213 /* Continue walking the chain. */
1214 gcc_assert (name != op
1215 && TREE_CODE (name) == SSA_NAME);
1216 stmt = SSA_NAME_DEF_STMT (name);
1218 while (1);
1221 /* Returns true if statement S1 dominates statement S2. Like
1222 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1224 static bool
1225 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1227 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1229 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1230 SSA_NAME. Assume it lives at the beginning of function and
1231 thus dominates everything. */
1232 if (!bb1 || s1 == s2)
1233 return true;
1235 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1236 if (!bb2)
1237 return false;
1239 if (bb1 == bb2)
1241 /* PHIs in the same basic block are assumed to be
1242 executed all in parallel, if only one stmt is a PHI,
1243 it dominates the other stmt in the same basic block. */
1244 if (gimple_code (s1) == GIMPLE_PHI)
1245 return true;
1247 if (gimple_code (s2) == GIMPLE_PHI)
1248 return false;
1250 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1252 if (gimple_uid (s1) < gimple_uid (s2))
1253 return true;
1255 if (gimple_uid (s1) > gimple_uid (s2))
1256 return false;
1258 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1259 unsigned int uid = gimple_uid (s1);
1260 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1262 gimple s = gsi_stmt (gsi);
1263 if (gimple_uid (s) != uid)
1264 break;
1265 if (s == s2)
1266 return true;
1269 return false;
1272 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1275 /* Insert STMT after INSERT_POINT. */
1277 static void
1278 insert_stmt_after (gimple stmt, gimple insert_point)
1280 gimple_stmt_iterator gsi;
1281 basic_block bb;
1283 if (gimple_code (insert_point) == GIMPLE_PHI)
1284 bb = gimple_bb (insert_point);
1285 else if (!stmt_ends_bb_p (insert_point))
1287 gsi = gsi_for_stmt (insert_point);
1288 gimple_set_uid (stmt, gimple_uid (insert_point));
1289 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1290 return;
1292 else
1293 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1294 thus if it must end a basic block, it should be a call that can
1295 throw, or some assignment that can throw. If it throws, the LHS
1296 of it will not be initialized though, so only valid places using
1297 the SSA_NAME should be dominated by the fallthru edge. */
1298 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1299 gsi = gsi_after_labels (bb);
1300 if (gsi_end_p (gsi))
1302 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1303 gimple_set_uid (stmt,
1304 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1306 else
1307 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1308 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1311 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1312 the result. Places the statement after the definition of either
1313 OP1 or OP2. Returns the new statement. */
1315 static gimple
1316 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1318 gimple op1def = NULL, op2def = NULL;
1319 gimple_stmt_iterator gsi;
1320 tree op;
1321 gimple sum;
1323 /* Create the addition statement. */
1324 op = make_ssa_name (type, NULL);
1325 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1327 /* Find an insertion place and insert. */
1328 if (TREE_CODE (op1) == SSA_NAME)
1329 op1def = SSA_NAME_DEF_STMT (op1);
1330 if (TREE_CODE (op2) == SSA_NAME)
1331 op2def = SSA_NAME_DEF_STMT (op2);
1332 if ((!op1def || gimple_nop_p (op1def))
1333 && (!op2def || gimple_nop_p (op2def)))
1335 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1336 if (gsi_end_p (gsi))
1338 gimple_stmt_iterator gsi2
1339 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1340 gimple_set_uid (sum,
1341 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1343 else
1344 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1345 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1347 else
1349 gimple insert_point;
1350 if ((!op1def || gimple_nop_p (op1def))
1351 || (op2def && !gimple_nop_p (op2def)
1352 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1353 insert_point = op2def;
1354 else
1355 insert_point = op1def;
1356 insert_stmt_after (sum, insert_point);
1358 update_stmt (sum);
1360 return sum;
1363 /* Perform un-distribution of divisions and multiplications.
1364 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1365 to (A + B) / X for real X.
1367 The algorithm is organized as follows.
1369 - First we walk the addition chain *OPS looking for summands that
1370 are defined by a multiplication or a real division. This results
1371 in the candidates bitmap with relevant indices into *OPS.
1373 - Second we build the chains of multiplications or divisions for
1374 these candidates, counting the number of occurrences of (operand, code)
1375 pairs in all of the candidates chains.
1377 - Third we sort the (operand, code) pairs by number of occurrence and
1378 process them starting with the pair with the most uses.
1380 * For each such pair we walk the candidates again to build a
1381 second candidate bitmap noting all multiplication/division chains
1382 that have at least one occurrence of (operand, code).
1384 * We build an alternate addition chain only covering these
1385 candidates with one (operand, code) operation removed from their
1386 multiplication/division chain.
1388 * The first candidate gets replaced by the alternate addition chain
1389 multiplied/divided by the operand.
1391 * All candidate chains get disabled for further processing and
1392 processing of (operand, code) pairs continues.
1394 The alternate addition chains built are re-processed by the main
1395 reassociation algorithm which allows optimizing a * x * y + b * y * x
1396 to (a + b ) * x * y in one invocation of the reassociation pass. */
1398 static bool
1399 undistribute_ops_list (enum tree_code opcode,
1400 vec<operand_entry_t> *ops, struct loop *loop)
1402 unsigned int length = ops->length ();
1403 operand_entry_t oe1;
1404 unsigned i, j;
1405 sbitmap candidates, candidates2;
1406 unsigned nr_candidates, nr_candidates2;
1407 sbitmap_iterator sbi0;
1408 vec<operand_entry_t> *subops;
1409 bool changed = false;
1410 int next_oecount_id = 0;
1412 if (length <= 1
1413 || opcode != PLUS_EXPR)
1414 return false;
1416 /* Build a list of candidates to process. */
1417 candidates = sbitmap_alloc (length);
1418 bitmap_clear (candidates);
1419 nr_candidates = 0;
1420 FOR_EACH_VEC_ELT (*ops, i, oe1)
1422 enum tree_code dcode;
1423 gimple oe1def;
1425 if (TREE_CODE (oe1->op) != SSA_NAME)
1426 continue;
1427 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1428 if (!is_gimple_assign (oe1def))
1429 continue;
1430 dcode = gimple_assign_rhs_code (oe1def);
1431 if ((dcode != MULT_EXPR
1432 && dcode != RDIV_EXPR)
1433 || !is_reassociable_op (oe1def, dcode, loop))
1434 continue;
1436 bitmap_set_bit (candidates, i);
1437 nr_candidates++;
1440 if (nr_candidates < 2)
1442 sbitmap_free (candidates);
1443 return false;
1446 if (dump_file && (dump_flags & TDF_DETAILS))
1448 fprintf (dump_file, "searching for un-distribute opportunities ");
1449 print_generic_expr (dump_file,
1450 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1451 fprintf (dump_file, " %d\n", nr_candidates);
1454 /* Build linearized sub-operand lists and the counting table. */
1455 cvec.create (0);
1457 hash_table<oecount_hasher> ctable (15);
1459 /* ??? Macro arguments cannot have multi-argument template types in
1460 them. This typedef is needed to workaround that limitation. */
1461 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1462 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1463 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1465 gimple oedef;
1466 enum tree_code oecode;
1467 unsigned j;
1469 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1470 oecode = gimple_assign_rhs_code (oedef);
1471 linearize_expr_tree (&subops[i], oedef,
1472 associative_tree_code (oecode), false);
1474 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1476 oecount c;
1477 int *slot;
1478 int idx;
1479 c.oecode = oecode;
1480 c.cnt = 1;
1481 c.id = next_oecount_id++;
1482 c.op = oe1->op;
1483 cvec.safe_push (c);
1484 idx = cvec.length () + 41;
1485 slot = ctable.find_slot (idx, INSERT);
1486 if (!*slot)
1488 *slot = idx;
1490 else
1492 cvec.pop ();
1493 cvec[*slot - 42].cnt++;
1498 /* Sort the counting table. */
1499 cvec.qsort (oecount_cmp);
1501 if (dump_file && (dump_flags & TDF_DETAILS))
1503 oecount *c;
1504 fprintf (dump_file, "Candidates:\n");
1505 FOR_EACH_VEC_ELT (cvec, j, c)
1507 fprintf (dump_file, " %u %s: ", c->cnt,
1508 c->oecode == MULT_EXPR
1509 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1510 print_generic_expr (dump_file, c->op, 0);
1511 fprintf (dump_file, "\n");
1515 /* Process the (operand, code) pairs in order of most occurrence. */
1516 candidates2 = sbitmap_alloc (length);
1517 while (!cvec.is_empty ())
1519 oecount *c = &cvec.last ();
1520 if (c->cnt < 2)
1521 break;
1523 /* Now collect the operands in the outer chain that contain
1524 the common operand in their inner chain. */
1525 bitmap_clear (candidates2);
1526 nr_candidates2 = 0;
1527 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1529 gimple oedef;
1530 enum tree_code oecode;
1531 unsigned j;
1532 tree op = (*ops)[i]->op;
1534 /* If we undistributed in this chain already this may be
1535 a constant. */
1536 if (TREE_CODE (op) != SSA_NAME)
1537 continue;
1539 oedef = SSA_NAME_DEF_STMT (op);
1540 oecode = gimple_assign_rhs_code (oedef);
1541 if (oecode != c->oecode)
1542 continue;
1544 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1546 if (oe1->op == c->op)
1548 bitmap_set_bit (candidates2, i);
1549 ++nr_candidates2;
1550 break;
1555 if (nr_candidates2 >= 2)
1557 operand_entry_t oe1, oe2;
1558 gimple prod;
1559 int first = bitmap_first_set_bit (candidates2);
1561 /* Build the new addition chain. */
1562 oe1 = (*ops)[first];
1563 if (dump_file && (dump_flags & TDF_DETAILS))
1565 fprintf (dump_file, "Building (");
1566 print_generic_expr (dump_file, oe1->op, 0);
1568 zero_one_operation (&oe1->op, c->oecode, c->op);
1569 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1571 gimple sum;
1572 oe2 = (*ops)[i];
1573 if (dump_file && (dump_flags & TDF_DETAILS))
1575 fprintf (dump_file, " + ");
1576 print_generic_expr (dump_file, oe2->op, 0);
1578 zero_one_operation (&oe2->op, c->oecode, c->op);
1579 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1580 oe1->op, oe2->op, opcode);
1581 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1582 oe2->rank = 0;
1583 oe1->op = gimple_get_lhs (sum);
1586 /* Apply the multiplication/division. */
1587 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1588 oe1->op, c->op, c->oecode);
1589 if (dump_file && (dump_flags & TDF_DETAILS))
1591 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1592 print_generic_expr (dump_file, c->op, 0);
1593 fprintf (dump_file, "\n");
1596 /* Record it in the addition chain and disable further
1597 undistribution with this op. */
1598 oe1->op = gimple_assign_lhs (prod);
1599 oe1->rank = get_rank (oe1->op);
1600 subops[first].release ();
1602 changed = true;
1605 cvec.pop ();
1608 for (i = 0; i < ops->length (); ++i)
1609 subops[i].release ();
1610 free (subops);
1611 cvec.release ();
1612 sbitmap_free (candidates);
1613 sbitmap_free (candidates2);
1615 return changed;
1618 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1619 expression, examine the other OPS to see if any of them are comparisons
1620 of the same values, which we may be able to combine or eliminate.
1621 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1623 static bool
1624 eliminate_redundant_comparison (enum tree_code opcode,
1625 vec<operand_entry_t> *ops,
1626 unsigned int currindex,
1627 operand_entry_t curr)
1629 tree op1, op2;
1630 enum tree_code lcode, rcode;
1631 gimple def1, def2;
1632 int i;
1633 operand_entry_t oe;
1635 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1636 return false;
1638 /* Check that CURR is a comparison. */
1639 if (TREE_CODE (curr->op) != SSA_NAME)
1640 return false;
1641 def1 = SSA_NAME_DEF_STMT (curr->op);
1642 if (!is_gimple_assign (def1))
1643 return false;
1644 lcode = gimple_assign_rhs_code (def1);
1645 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1646 return false;
1647 op1 = gimple_assign_rhs1 (def1);
1648 op2 = gimple_assign_rhs2 (def1);
1650 /* Now look for a similar comparison in the remaining OPS. */
1651 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1653 tree t;
1655 if (TREE_CODE (oe->op) != SSA_NAME)
1656 continue;
1657 def2 = SSA_NAME_DEF_STMT (oe->op);
1658 if (!is_gimple_assign (def2))
1659 continue;
1660 rcode = gimple_assign_rhs_code (def2);
1661 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1662 continue;
1664 /* If we got here, we have a match. See if we can combine the
1665 two comparisons. */
1666 if (opcode == BIT_IOR_EXPR)
1667 t = maybe_fold_or_comparisons (lcode, op1, op2,
1668 rcode, gimple_assign_rhs1 (def2),
1669 gimple_assign_rhs2 (def2));
1670 else
1671 t = maybe_fold_and_comparisons (lcode, op1, op2,
1672 rcode, gimple_assign_rhs1 (def2),
1673 gimple_assign_rhs2 (def2));
1674 if (!t)
1675 continue;
1677 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1678 always give us a boolean_type_node value back. If the original
1679 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1680 we need to convert. */
1681 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1682 t = fold_convert (TREE_TYPE (curr->op), t);
1684 if (TREE_CODE (t) != INTEGER_CST
1685 && !operand_equal_p (t, curr->op, 0))
1687 enum tree_code subcode;
1688 tree newop1, newop2;
1689 if (!COMPARISON_CLASS_P (t))
1690 continue;
1691 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1692 STRIP_USELESS_TYPE_CONVERSION (newop1);
1693 STRIP_USELESS_TYPE_CONVERSION (newop2);
1694 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1695 continue;
1698 if (dump_file && (dump_flags & TDF_DETAILS))
1700 fprintf (dump_file, "Equivalence: ");
1701 print_generic_expr (dump_file, curr->op, 0);
1702 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1703 print_generic_expr (dump_file, oe->op, 0);
1704 fprintf (dump_file, " -> ");
1705 print_generic_expr (dump_file, t, 0);
1706 fprintf (dump_file, "\n");
1709 /* Now we can delete oe, as it has been subsumed by the new combined
1710 expression t. */
1711 ops->ordered_remove (i);
1712 reassociate_stats.ops_eliminated ++;
1714 /* If t is the same as curr->op, we're done. Otherwise we must
1715 replace curr->op with t. Special case is if we got a constant
1716 back, in which case we add it to the end instead of in place of
1717 the current entry. */
1718 if (TREE_CODE (t) == INTEGER_CST)
1720 ops->ordered_remove (currindex);
1721 add_to_ops_vec (ops, t);
1723 else if (!operand_equal_p (t, curr->op, 0))
1725 gimple sum;
1726 enum tree_code subcode;
1727 tree newop1;
1728 tree newop2;
1729 gcc_assert (COMPARISON_CLASS_P (t));
1730 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1731 STRIP_USELESS_TYPE_CONVERSION (newop1);
1732 STRIP_USELESS_TYPE_CONVERSION (newop2);
1733 gcc_checking_assert (is_gimple_val (newop1)
1734 && is_gimple_val (newop2));
1735 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1736 curr->op = gimple_get_lhs (sum);
1738 return true;
1741 return false;
1744 /* Perform various identities and other optimizations on the list of
1745 operand entries, stored in OPS. The tree code for the binary
1746 operation between all the operands is OPCODE. */
1748 static void
1749 optimize_ops_list (enum tree_code opcode,
1750 vec<operand_entry_t> *ops)
1752 unsigned int length = ops->length ();
1753 unsigned int i;
1754 operand_entry_t oe;
1755 operand_entry_t oelast = NULL;
1756 bool iterate = false;
1758 if (length == 1)
1759 return;
1761 oelast = ops->last ();
1763 /* If the last two are constants, pop the constants off, merge them
1764 and try the next two. */
1765 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1767 operand_entry_t oelm1 = (*ops)[length - 2];
1769 if (oelm1->rank == 0
1770 && is_gimple_min_invariant (oelm1->op)
1771 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1772 TREE_TYPE (oelast->op)))
1774 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1775 oelm1->op, oelast->op);
1777 if (folded && is_gimple_min_invariant (folded))
1779 if (dump_file && (dump_flags & TDF_DETAILS))
1780 fprintf (dump_file, "Merging constants\n");
1782 ops->pop ();
1783 ops->pop ();
1785 add_to_ops_vec (ops, folded);
1786 reassociate_stats.constants_eliminated++;
1788 optimize_ops_list (opcode, ops);
1789 return;
1794 eliminate_using_constants (opcode, ops);
1795 oelast = NULL;
1797 for (i = 0; ops->iterate (i, &oe);)
1799 bool done = false;
1801 if (eliminate_not_pairs (opcode, ops, i, oe))
1802 return;
1803 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1804 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1805 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1807 if (done)
1808 return;
1809 iterate = true;
1810 oelast = NULL;
1811 continue;
1813 oelast = oe;
1814 i++;
1817 length = ops->length ();
1818 oelast = ops->last ();
1820 if (iterate)
1821 optimize_ops_list (opcode, ops);
1824 /* The following functions are subroutines to optimize_range_tests and allow
1825 it to try to change a logical combination of comparisons into a range
1826 test.
1828 For example, both
1829 X == 2 || X == 5 || X == 3 || X == 4
1831 X >= 2 && X <= 5
1832 are converted to
1833 (unsigned) (X - 2) <= 3
1835 For more information see comments above fold_test_range in fold-const.c,
1836 this implementation is for GIMPLE. */
1838 struct range_entry
1840 tree exp;
1841 tree low;
1842 tree high;
1843 bool in_p;
1844 bool strict_overflow_p;
1845 unsigned int idx, next;
1848 /* This is similar to make_range in fold-const.c, but on top of
1849 GIMPLE instead of trees. If EXP is non-NULL, it should be
1850 an SSA_NAME and STMT argument is ignored, otherwise STMT
1851 argument should be a GIMPLE_COND. */
1853 static void
1854 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1856 int in_p;
1857 tree low, high;
1858 bool is_bool, strict_overflow_p;
1860 r->exp = NULL_TREE;
1861 r->in_p = false;
1862 r->strict_overflow_p = false;
1863 r->low = NULL_TREE;
1864 r->high = NULL_TREE;
1865 if (exp != NULL_TREE
1866 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1867 return;
1869 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1870 and see if we can refine the range. Some of the cases below may not
1871 happen, but it doesn't seem worth worrying about this. We "continue"
1872 the outer loop when we've changed something; otherwise we "break"
1873 the switch, which will "break" the while. */
1874 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1875 high = low;
1876 in_p = 0;
1877 strict_overflow_p = false;
1878 is_bool = false;
1879 if (exp == NULL_TREE)
1880 is_bool = true;
1881 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1883 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1884 is_bool = true;
1885 else
1886 return;
1888 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1889 is_bool = true;
1891 while (1)
1893 enum tree_code code;
1894 tree arg0, arg1, exp_type;
1895 tree nexp;
1896 location_t loc;
1898 if (exp != NULL_TREE)
1900 if (TREE_CODE (exp) != SSA_NAME
1901 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1902 break;
1904 stmt = SSA_NAME_DEF_STMT (exp);
1905 if (!is_gimple_assign (stmt))
1906 break;
1908 code = gimple_assign_rhs_code (stmt);
1909 arg0 = gimple_assign_rhs1 (stmt);
1910 arg1 = gimple_assign_rhs2 (stmt);
1911 exp_type = TREE_TYPE (exp);
1913 else
1915 code = gimple_cond_code (stmt);
1916 arg0 = gimple_cond_lhs (stmt);
1917 arg1 = gimple_cond_rhs (stmt);
1918 exp_type = boolean_type_node;
1921 if (TREE_CODE (arg0) != SSA_NAME)
1922 break;
1923 loc = gimple_location (stmt);
1924 switch (code)
1926 case BIT_NOT_EXPR:
1927 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1928 /* Ensure the range is either +[-,0], +[0,0],
1929 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1930 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1931 or similar expression of unconditional true or
1932 false, it should not be negated. */
1933 && ((high && integer_zerop (high))
1934 || (low && integer_onep (low))))
1936 in_p = !in_p;
1937 exp = arg0;
1938 continue;
1940 break;
1941 case SSA_NAME:
1942 exp = arg0;
1943 continue;
1944 CASE_CONVERT:
1945 if (is_bool)
1946 goto do_default;
1947 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1949 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1950 is_bool = true;
1951 else
1952 return;
1954 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1955 is_bool = true;
1956 goto do_default;
1957 case EQ_EXPR:
1958 case NE_EXPR:
1959 case LT_EXPR:
1960 case LE_EXPR:
1961 case GE_EXPR:
1962 case GT_EXPR:
1963 is_bool = true;
1964 /* FALLTHRU */
1965 default:
1966 if (!is_bool)
1967 return;
1968 do_default:
1969 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1970 &low, &high, &in_p,
1971 &strict_overflow_p);
1972 if (nexp != NULL_TREE)
1974 exp = nexp;
1975 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1976 continue;
1978 break;
1980 break;
1982 if (is_bool)
1984 r->exp = exp;
1985 r->in_p = in_p;
1986 r->low = low;
1987 r->high = high;
1988 r->strict_overflow_p = strict_overflow_p;
1992 /* Comparison function for qsort. Sort entries
1993 without SSA_NAME exp first, then with SSA_NAMEs sorted
1994 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1995 by increasing ->low and if ->low is the same, by increasing
1996 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1997 maximum. */
1999 static int
2000 range_entry_cmp (const void *a, const void *b)
2002 const struct range_entry *p = (const struct range_entry *) a;
2003 const struct range_entry *q = (const struct range_entry *) b;
2005 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2007 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2009 /* Group range_entries for the same SSA_NAME together. */
2010 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2011 return -1;
2012 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2013 return 1;
2014 /* If ->low is different, NULL low goes first, then by
2015 ascending low. */
2016 if (p->low != NULL_TREE)
2018 if (q->low != NULL_TREE)
2020 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2021 p->low, q->low);
2022 if (tem && integer_onep (tem))
2023 return -1;
2024 tem = fold_binary (GT_EXPR, boolean_type_node,
2025 p->low, q->low);
2026 if (tem && integer_onep (tem))
2027 return 1;
2029 else
2030 return 1;
2032 else if (q->low != NULL_TREE)
2033 return -1;
2034 /* If ->high is different, NULL high goes last, before that by
2035 ascending high. */
2036 if (p->high != NULL_TREE)
2038 if (q->high != NULL_TREE)
2040 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2041 p->high, q->high);
2042 if (tem && integer_onep (tem))
2043 return -1;
2044 tem = fold_binary (GT_EXPR, boolean_type_node,
2045 p->high, q->high);
2046 if (tem && integer_onep (tem))
2047 return 1;
2049 else
2050 return -1;
2052 else if (p->high != NULL_TREE)
2053 return 1;
2054 /* If both ranges are the same, sort below by ascending idx. */
2056 else
2057 return 1;
2059 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2060 return -1;
2062 if (p->idx < q->idx)
2063 return -1;
2064 else
2066 gcc_checking_assert (p->idx > q->idx);
2067 return 1;
2071 /* Helper routine of optimize_range_test.
2072 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2073 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2074 OPCODE and OPS are arguments of optimize_range_tests. Return
2075 true if the range merge has been successful.
2076 If OPCODE is ERROR_MARK, this is called from within
2077 maybe_optimize_range_tests and is performing inter-bb range optimization.
2078 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2079 oe->rank. */
2081 static bool
2082 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2083 unsigned int count, enum tree_code opcode,
2084 vec<operand_entry_t> *ops, tree exp, bool in_p,
2085 tree low, tree high, bool strict_overflow_p)
2087 operand_entry_t oe = (*ops)[range->idx];
2088 tree op = oe->op;
2089 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2090 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2091 location_t loc = gimple_location (stmt);
2092 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2093 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2094 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2095 gimple_stmt_iterator gsi;
2097 if (tem == NULL_TREE)
2098 return false;
2100 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2101 warning_at (loc, OPT_Wstrict_overflow,
2102 "assuming signed overflow does not occur "
2103 "when simplifying range test");
2105 if (dump_file && (dump_flags & TDF_DETAILS))
2107 struct range_entry *r;
2108 fprintf (dump_file, "Optimizing range tests ");
2109 print_generic_expr (dump_file, range->exp, 0);
2110 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2111 print_generic_expr (dump_file, range->low, 0);
2112 fprintf (dump_file, ", ");
2113 print_generic_expr (dump_file, range->high, 0);
2114 fprintf (dump_file, "]");
2115 for (r = otherrange; r < otherrange + count; r++)
2117 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2118 print_generic_expr (dump_file, r->low, 0);
2119 fprintf (dump_file, ", ");
2120 print_generic_expr (dump_file, r->high, 0);
2121 fprintf (dump_file, "]");
2123 fprintf (dump_file, "\n into ");
2124 print_generic_expr (dump_file, tem, 0);
2125 fprintf (dump_file, "\n");
2128 if (opcode == BIT_IOR_EXPR
2129 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2130 tem = invert_truthvalue_loc (loc, tem);
2132 tem = fold_convert_loc (loc, optype, tem);
2133 gsi = gsi_for_stmt (stmt);
2134 /* In rare cases range->exp can be equal to lhs of stmt.
2135 In that case we have to insert after the stmt rather then before
2136 it. */
2137 if (op == range->exp)
2138 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2139 GSI_CONTINUE_LINKING);
2140 else
2142 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2143 GSI_SAME_STMT);
2144 gsi_prev (&gsi);
2146 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2147 if (gimple_uid (gsi_stmt (gsi)))
2148 break;
2149 else
2150 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2152 oe->op = tem;
2153 range->exp = exp;
2154 range->low = low;
2155 range->high = high;
2156 range->in_p = in_p;
2157 range->strict_overflow_p = false;
2159 for (range = otherrange; range < otherrange + count; range++)
2161 oe = (*ops)[range->idx];
2162 /* Now change all the other range test immediate uses, so that
2163 those tests will be optimized away. */
2164 if (opcode == ERROR_MARK)
2166 if (oe->op)
2167 oe->op = build_int_cst (TREE_TYPE (oe->op),
2168 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2169 else
2170 oe->op = (oe->rank == BIT_IOR_EXPR
2171 ? boolean_false_node : boolean_true_node);
2173 else
2174 oe->op = error_mark_node;
2175 range->exp = NULL_TREE;
2177 return true;
2180 /* Optimize X == CST1 || X == CST2
2181 if popcount (CST1 ^ CST2) == 1 into
2182 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2183 Similarly for ranges. E.g.
2184 X != 2 && X != 3 && X != 10 && X != 11
2185 will be transformed by the previous optimization into
2186 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2187 and this loop can transform that into
2188 !(((X & ~8) - 2U) <= 1U). */
2190 static bool
2191 optimize_range_tests_xor (enum tree_code opcode, tree type,
2192 tree lowi, tree lowj, tree highi, tree highj,
2193 vec<operand_entry_t> *ops,
2194 struct range_entry *rangei,
2195 struct range_entry *rangej)
2197 tree lowxor, highxor, tem, exp;
2198 /* Check highi ^ lowi == highj ^ lowj and
2199 popcount (highi ^ lowi) == 1. */
2200 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2201 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2202 return false;
2203 if (tree_log2 (lowxor) < 0)
2204 return false;
2205 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2206 if (!tree_int_cst_equal (lowxor, highxor))
2207 return false;
2209 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2210 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2211 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2212 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2213 if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2214 rangei->in_p, lowj, highj,
2215 rangei->strict_overflow_p
2216 || rangej->strict_overflow_p))
2217 return true;
2218 return false;
2221 /* Optimize X == CST1 || X == CST2
2222 if popcount (CST2 - CST1) == 1 into
2223 ((X - CST1) & ~(CST2 - CST1)) == 0.
2224 Similarly for ranges. E.g.
2225 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2226 || X == 75 || X == 45
2227 will be transformed by the previous optimization into
2228 (X - 43U) <= 3U || (X - 75U) <= 3U
2229 and this loop can transform that into
2230 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2231 static bool
2232 optimize_range_tests_diff (enum tree_code opcode, tree type,
2233 tree lowi, tree lowj, tree highi, tree highj,
2234 vec<operand_entry_t> *ops,
2235 struct range_entry *rangei,
2236 struct range_entry *rangej)
2238 tree tem1, tem2, mask;
2239 /* Check highi - lowi == highj - lowj. */
2240 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2241 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2242 return false;
2243 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2244 if (!tree_int_cst_equal (tem1, tem2))
2245 return false;
2246 /* Check popcount (lowj - lowi) == 1. */
2247 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2248 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2249 return false;
2250 if (tree_log2 (tem1) < 0)
2251 return false;
2253 type = unsigned_type_for (type);
2254 tem1 = fold_convert (type, tem1);
2255 tem2 = fold_convert (type, tem2);
2256 lowi = fold_convert (type, lowi);
2257 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2258 tem1 = fold_binary (MINUS_EXPR, type,
2259 fold_convert (type, rangei->exp), lowi);
2260 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2261 lowj = build_int_cst (type, 0);
2262 if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2263 rangei->in_p, lowj, tem2,
2264 rangei->strict_overflow_p
2265 || rangej->strict_overflow_p))
2266 return true;
2267 return false;
2270 /* It does some common checks for function optimize_range_tests_xor and
2271 optimize_range_tests_diff.
2272 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2273 Else it calls optimize_range_tests_diff. */
2275 static bool
2276 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2277 bool optimize_xor, vec<operand_entry_t> *ops,
2278 struct range_entry *ranges)
2280 int i, j;
2281 bool any_changes = false;
2282 for (i = first; i < length; i++)
2284 tree lowi, highi, lowj, highj, type, tem;
2286 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2287 continue;
2288 type = TREE_TYPE (ranges[i].exp);
2289 if (!INTEGRAL_TYPE_P (type))
2290 continue;
2291 lowi = ranges[i].low;
2292 if (lowi == NULL_TREE)
2293 lowi = TYPE_MIN_VALUE (type);
2294 highi = ranges[i].high;
2295 if (highi == NULL_TREE)
2296 continue;
2297 for (j = i + 1; j < length && j < i + 64; j++)
2299 bool changes;
2300 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2301 continue;
2302 lowj = ranges[j].low;
2303 if (lowj == NULL_TREE)
2304 continue;
2305 highj = ranges[j].high;
2306 if (highj == NULL_TREE)
2307 highj = TYPE_MAX_VALUE (type);
2308 /* Check lowj > highi. */
2309 tem = fold_binary (GT_EXPR, boolean_type_node,
2310 lowj, highi);
2311 if (tem == NULL_TREE || !integer_onep (tem))
2312 continue;
2313 if (optimize_xor)
2314 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2315 highi, highj, ops,
2316 ranges + i, ranges + j);
2317 else
2318 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2319 highi, highj, ops,
2320 ranges + i, ranges + j);
2321 if (changes)
2323 any_changes = true;
2324 break;
2328 return any_changes;
2331 /* Optimize range tests, similarly how fold_range_test optimizes
2332 it on trees. The tree code for the binary
2333 operation between all the operands is OPCODE.
2334 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2335 maybe_optimize_range_tests for inter-bb range optimization.
2336 In that case if oe->op is NULL, oe->id is bb->index whose
2337 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2338 the actual opcode. */
2340 static bool
2341 optimize_range_tests (enum tree_code opcode,
2342 vec<operand_entry_t> *ops)
2344 unsigned int length = ops->length (), i, j, first;
2345 operand_entry_t oe;
2346 struct range_entry *ranges;
2347 bool any_changes = false;
2349 if (length == 1)
2350 return false;
2352 ranges = XNEWVEC (struct range_entry, length);
2353 for (i = 0; i < length; i++)
2355 oe = (*ops)[i];
2356 ranges[i].idx = i;
2357 init_range_entry (ranges + i, oe->op,
2358 oe->op ? NULL :
2359 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2360 /* For | invert it now, we will invert it again before emitting
2361 the optimized expression. */
2362 if (opcode == BIT_IOR_EXPR
2363 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2364 ranges[i].in_p = !ranges[i].in_p;
2367 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2368 for (i = 0; i < length; i++)
2369 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2370 break;
2372 /* Try to merge ranges. */
2373 for (first = i; i < length; i++)
2375 tree low = ranges[i].low;
2376 tree high = ranges[i].high;
2377 int in_p = ranges[i].in_p;
2378 bool strict_overflow_p = ranges[i].strict_overflow_p;
2379 int update_fail_count = 0;
2381 for (j = i + 1; j < length; j++)
2383 if (ranges[i].exp != ranges[j].exp)
2384 break;
2385 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2386 ranges[j].in_p, ranges[j].low, ranges[j].high))
2387 break;
2388 strict_overflow_p |= ranges[j].strict_overflow_p;
2391 if (j == i + 1)
2392 continue;
2394 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2395 ops, ranges[i].exp, in_p, low, high,
2396 strict_overflow_p))
2398 i = j - 1;
2399 any_changes = true;
2401 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2402 while update_range_test would fail. */
2403 else if (update_fail_count == 64)
2404 i = j - 1;
2405 else
2406 ++update_fail_count;
2409 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2410 ops, ranges);
2412 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2413 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2414 ops, ranges);
2416 if (any_changes && opcode != ERROR_MARK)
2418 j = 0;
2419 FOR_EACH_VEC_ELT (*ops, i, oe)
2421 if (oe->op == error_mark_node)
2422 continue;
2423 else if (i != j)
2424 (*ops)[j] = oe;
2425 j++;
2427 ops->truncate (j);
2430 XDELETEVEC (ranges);
2431 return any_changes;
2434 /* Return true if STMT is a cast like:
2435 <bb N>:
2437 _123 = (int) _234;
2439 <bb M>:
2440 # _345 = PHI <_123(N), 1(...), 1(...)>
2441 where _234 has bool type, _123 has single use and
2442 bb N has a single successor M. This is commonly used in
2443 the last block of a range test. */
2445 static bool
2446 final_range_test_p (gimple stmt)
2448 basic_block bb, rhs_bb;
2449 edge e;
2450 tree lhs, rhs;
2451 use_operand_p use_p;
2452 gimple use_stmt;
2454 if (!gimple_assign_cast_p (stmt))
2455 return false;
2456 bb = gimple_bb (stmt);
2457 if (!single_succ_p (bb))
2458 return false;
2459 e = single_succ_edge (bb);
2460 if (e->flags & EDGE_COMPLEX)
2461 return false;
2463 lhs = gimple_assign_lhs (stmt);
2464 rhs = gimple_assign_rhs1 (stmt);
2465 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2466 || TREE_CODE (rhs) != SSA_NAME
2467 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2468 return false;
2470 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2471 if (!single_imm_use (lhs, &use_p, &use_stmt))
2472 return false;
2474 if (gimple_code (use_stmt) != GIMPLE_PHI
2475 || gimple_bb (use_stmt) != e->dest)
2476 return false;
2478 /* And that the rhs is defined in the same loop. */
2479 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2480 if (rhs_bb == NULL
2481 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2482 return false;
2484 return true;
2487 /* Return true if BB is suitable basic block for inter-bb range test
2488 optimization. If BACKWARD is true, BB should be the only predecessor
2489 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2490 or compared with to find a common basic block to which all conditions
2491 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2492 be the only predecessor of BB. */
2494 static bool
2495 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2496 bool backward)
2498 edge_iterator ei, ei2;
2499 edge e, e2;
2500 gimple stmt;
2501 gimple_stmt_iterator gsi;
2502 bool other_edge_seen = false;
2503 bool is_cond;
2505 if (test_bb == bb)
2506 return false;
2507 /* Check last stmt first. */
2508 stmt = last_stmt (bb);
2509 if (stmt == NULL
2510 || (gimple_code (stmt) != GIMPLE_COND
2511 && (backward || !final_range_test_p (stmt)))
2512 || gimple_visited_p (stmt)
2513 || stmt_could_throw_p (stmt)
2514 || *other_bb == bb)
2515 return false;
2516 is_cond = gimple_code (stmt) == GIMPLE_COND;
2517 if (is_cond)
2519 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2520 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2521 to *OTHER_BB (if not set yet, try to find it out). */
2522 if (EDGE_COUNT (bb->succs) != 2)
2523 return false;
2524 FOR_EACH_EDGE (e, ei, bb->succs)
2526 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2527 return false;
2528 if (e->dest == test_bb)
2530 if (backward)
2531 continue;
2532 else
2533 return false;
2535 if (e->dest == bb)
2536 return false;
2537 if (*other_bb == NULL)
2539 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2540 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2541 return false;
2542 else if (e->dest == e2->dest)
2543 *other_bb = e->dest;
2544 if (*other_bb == NULL)
2545 return false;
2547 if (e->dest == *other_bb)
2548 other_edge_seen = true;
2549 else if (backward)
2550 return false;
2552 if (*other_bb == NULL || !other_edge_seen)
2553 return false;
2555 else if (single_succ (bb) != *other_bb)
2556 return false;
2558 /* Now check all PHIs of *OTHER_BB. */
2559 e = find_edge (bb, *other_bb);
2560 e2 = find_edge (test_bb, *other_bb);
2561 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2563 gimple phi = gsi_stmt (gsi);
2564 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2565 corresponding to BB and TEST_BB predecessor must be the same. */
2566 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2567 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2569 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2570 one of the PHIs should have the lhs of the last stmt in
2571 that block as PHI arg and that PHI should have 0 or 1
2572 corresponding to it in all other range test basic blocks
2573 considered. */
2574 if (!is_cond)
2576 if (gimple_phi_arg_def (phi, e->dest_idx)
2577 == gimple_assign_lhs (stmt)
2578 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2579 || integer_onep (gimple_phi_arg_def (phi,
2580 e2->dest_idx))))
2581 continue;
2583 else
2585 gimple test_last = last_stmt (test_bb);
2586 if (gimple_code (test_last) != GIMPLE_COND
2587 && gimple_phi_arg_def (phi, e2->dest_idx)
2588 == gimple_assign_lhs (test_last)
2589 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2590 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2591 continue;
2594 return false;
2597 return true;
2600 /* Return true if BB doesn't have side-effects that would disallow
2601 range test optimization, all SSA_NAMEs set in the bb are consumed
2602 in the bb and there are no PHIs. */
2604 static bool
2605 no_side_effect_bb (basic_block bb)
2607 gimple_stmt_iterator gsi;
2608 gimple last;
2610 if (!gimple_seq_empty_p (phi_nodes (bb)))
2611 return false;
2612 last = last_stmt (bb);
2613 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2615 gimple stmt = gsi_stmt (gsi);
2616 tree lhs;
2617 imm_use_iterator imm_iter;
2618 use_operand_p use_p;
2620 if (is_gimple_debug (stmt))
2621 continue;
2622 if (gimple_has_side_effects (stmt))
2623 return false;
2624 if (stmt == last)
2625 return true;
2626 if (!is_gimple_assign (stmt))
2627 return false;
2628 lhs = gimple_assign_lhs (stmt);
2629 if (TREE_CODE (lhs) != SSA_NAME)
2630 return false;
2631 if (gimple_assign_rhs_could_trap_p (stmt))
2632 return false;
2633 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2635 gimple use_stmt = USE_STMT (use_p);
2636 if (is_gimple_debug (use_stmt))
2637 continue;
2638 if (gimple_bb (use_stmt) != bb)
2639 return false;
2642 return false;
2645 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2646 return true and fill in *OPS recursively. */
2648 static bool
2649 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2650 struct loop *loop)
2652 gimple stmt = SSA_NAME_DEF_STMT (var);
2653 tree rhs[2];
2654 int i;
2656 if (!is_reassociable_op (stmt, code, loop))
2657 return false;
2659 rhs[0] = gimple_assign_rhs1 (stmt);
2660 rhs[1] = gimple_assign_rhs2 (stmt);
2661 gimple_set_visited (stmt, true);
2662 for (i = 0; i < 2; i++)
2663 if (TREE_CODE (rhs[i]) == SSA_NAME
2664 && !get_ops (rhs[i], code, ops, loop)
2665 && has_single_use (rhs[i]))
2667 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2669 oe->op = rhs[i];
2670 oe->rank = code;
2671 oe->id = 0;
2672 oe->count = 1;
2673 ops->safe_push (oe);
2675 return true;
2678 /* Find the ops that were added by get_ops starting from VAR, see if
2679 they were changed during update_range_test and if yes, create new
2680 stmts. */
2682 static tree
2683 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2684 unsigned int *pidx, struct loop *loop)
2686 gimple stmt = SSA_NAME_DEF_STMT (var);
2687 tree rhs[4];
2688 int i;
2690 if (!is_reassociable_op (stmt, code, loop))
2691 return NULL;
2693 rhs[0] = gimple_assign_rhs1 (stmt);
2694 rhs[1] = gimple_assign_rhs2 (stmt);
2695 rhs[2] = rhs[0];
2696 rhs[3] = rhs[1];
2697 for (i = 0; i < 2; i++)
2698 if (TREE_CODE (rhs[i]) == SSA_NAME)
2700 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2701 if (rhs[2 + i] == NULL_TREE)
2703 if (has_single_use (rhs[i]))
2704 rhs[2 + i] = ops[(*pidx)++]->op;
2705 else
2706 rhs[2 + i] = rhs[i];
2709 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2710 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2712 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2713 var = make_ssa_name (TREE_TYPE (var), NULL);
2714 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2715 var, rhs[2], rhs[3]);
2716 gimple_set_uid (g, gimple_uid (stmt));
2717 gimple_set_visited (g, true);
2718 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2720 return var;
2723 /* Structure to track the initial value passed to get_ops and
2724 the range in the ops vector for each basic block. */
2726 struct inter_bb_range_test_entry
2728 tree op;
2729 unsigned int first_idx, last_idx;
2732 /* Inter-bb range test optimization. */
2734 static void
2735 maybe_optimize_range_tests (gimple stmt)
2737 basic_block first_bb = gimple_bb (stmt);
2738 basic_block last_bb = first_bb;
2739 basic_block other_bb = NULL;
2740 basic_block bb;
2741 edge_iterator ei;
2742 edge e;
2743 auto_vec<operand_entry_t> ops;
2744 auto_vec<inter_bb_range_test_entry> bbinfo;
2745 bool any_changes = false;
2747 /* Consider only basic blocks that end with GIMPLE_COND or
2748 a cast statement satisfying final_range_test_p. All
2749 but the last bb in the first_bb .. last_bb range
2750 should end with GIMPLE_COND. */
2751 if (gimple_code (stmt) == GIMPLE_COND)
2753 if (EDGE_COUNT (first_bb->succs) != 2)
2754 return;
2756 else if (final_range_test_p (stmt))
2757 other_bb = single_succ (first_bb);
2758 else
2759 return;
2761 if (stmt_could_throw_p (stmt))
2762 return;
2764 /* As relative ordering of post-dominator sons isn't fixed,
2765 maybe_optimize_range_tests can be called first on any
2766 bb in the range we want to optimize. So, start searching
2767 backwards, if first_bb can be set to a predecessor. */
2768 while (single_pred_p (first_bb))
2770 basic_block pred_bb = single_pred (first_bb);
2771 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2772 break;
2773 if (!no_side_effect_bb (first_bb))
2774 break;
2775 first_bb = pred_bb;
2777 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2778 Before starting forward search in last_bb successors, find
2779 out the other_bb. */
2780 if (first_bb == last_bb)
2782 other_bb = NULL;
2783 /* As non-GIMPLE_COND last stmt always terminates the range,
2784 if forward search didn't discover anything, just give up. */
2785 if (gimple_code (stmt) != GIMPLE_COND)
2786 return;
2787 /* Look at both successors. Either it ends with a GIMPLE_COND
2788 and satisfies suitable_cond_bb, or ends with a cast and
2789 other_bb is that cast's successor. */
2790 FOR_EACH_EDGE (e, ei, first_bb->succs)
2791 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2792 || e->dest == first_bb)
2793 return;
2794 else if (single_pred_p (e->dest))
2796 stmt = last_stmt (e->dest);
2797 if (stmt
2798 && gimple_code (stmt) == GIMPLE_COND
2799 && EDGE_COUNT (e->dest->succs) == 2)
2801 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2802 break;
2803 else
2804 other_bb = NULL;
2806 else if (stmt
2807 && final_range_test_p (stmt)
2808 && find_edge (first_bb, single_succ (e->dest)))
2810 other_bb = single_succ (e->dest);
2811 if (other_bb == first_bb)
2812 other_bb = NULL;
2815 if (other_bb == NULL)
2816 return;
2818 /* Now do the forward search, moving last_bb to successor bbs
2819 that aren't other_bb. */
2820 while (EDGE_COUNT (last_bb->succs) == 2)
2822 FOR_EACH_EDGE (e, ei, last_bb->succs)
2823 if (e->dest != other_bb)
2824 break;
2825 if (e == NULL)
2826 break;
2827 if (!single_pred_p (e->dest))
2828 break;
2829 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2830 break;
2831 if (!no_side_effect_bb (e->dest))
2832 break;
2833 last_bb = e->dest;
2835 if (first_bb == last_bb)
2836 return;
2837 /* Here basic blocks first_bb through last_bb's predecessor
2838 end with GIMPLE_COND, all of them have one of the edges to
2839 other_bb and another to another block in the range,
2840 all blocks except first_bb don't have side-effects and
2841 last_bb ends with either GIMPLE_COND, or cast satisfying
2842 final_range_test_p. */
2843 for (bb = last_bb; ; bb = single_pred (bb))
2845 enum tree_code code;
2846 tree lhs, rhs;
2847 inter_bb_range_test_entry bb_ent;
2849 bb_ent.op = NULL_TREE;
2850 bb_ent.first_idx = ops.length ();
2851 bb_ent.last_idx = bb_ent.first_idx;
2852 e = find_edge (bb, other_bb);
2853 stmt = last_stmt (bb);
2854 gimple_set_visited (stmt, true);
2855 if (gimple_code (stmt) != GIMPLE_COND)
2857 use_operand_p use_p;
2858 gimple phi;
2859 edge e2;
2860 unsigned int d;
2862 lhs = gimple_assign_lhs (stmt);
2863 rhs = gimple_assign_rhs1 (stmt);
2864 gcc_assert (bb == last_bb);
2866 /* stmt is
2867 _123 = (int) _234;
2869 followed by:
2870 <bb M>:
2871 # _345 = PHI <_123(N), 1(...), 1(...)>
2873 or 0 instead of 1. If it is 0, the _234
2874 range test is anded together with all the
2875 other range tests, if it is 1, it is ored with
2876 them. */
2877 single_imm_use (lhs, &use_p, &phi);
2878 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2879 e2 = find_edge (first_bb, other_bb);
2880 d = e2->dest_idx;
2881 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2882 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2883 code = BIT_AND_EXPR;
2884 else
2886 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2887 code = BIT_IOR_EXPR;
2890 /* If _234 SSA_NAME_DEF_STMT is
2891 _234 = _567 | _789;
2892 (or &, corresponding to 1/0 in the phi arguments,
2893 push into ops the individual range test arguments
2894 of the bitwise or resp. and, recursively. */
2895 if (!get_ops (rhs, code, &ops,
2896 loop_containing_stmt (stmt))
2897 && has_single_use (rhs))
2899 /* Otherwise, push the _234 range test itself. */
2900 operand_entry_t oe
2901 = (operand_entry_t) pool_alloc (operand_entry_pool);
2903 oe->op = rhs;
2904 oe->rank = code;
2905 oe->id = 0;
2906 oe->count = 1;
2907 ops.safe_push (oe);
2908 bb_ent.last_idx++;
2910 else
2911 bb_ent.last_idx = ops.length ();
2912 bb_ent.op = rhs;
2913 bbinfo.safe_push (bb_ent);
2914 continue;
2916 /* Otherwise stmt is GIMPLE_COND. */
2917 code = gimple_cond_code (stmt);
2918 lhs = gimple_cond_lhs (stmt);
2919 rhs = gimple_cond_rhs (stmt);
2920 if (TREE_CODE (lhs) == SSA_NAME
2921 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2922 && ((code != EQ_EXPR && code != NE_EXPR)
2923 || rhs != boolean_false_node
2924 /* Either push into ops the individual bitwise
2925 or resp. and operands, depending on which
2926 edge is other_bb. */
2927 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2928 ^ (code == EQ_EXPR))
2929 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2930 loop_containing_stmt (stmt))))
2932 /* Or push the GIMPLE_COND stmt itself. */
2933 operand_entry_t oe
2934 = (operand_entry_t) pool_alloc (operand_entry_pool);
2936 oe->op = NULL;
2937 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2938 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2939 /* oe->op = NULL signs that there is no SSA_NAME
2940 for the range test, and oe->id instead is the
2941 basic block number, at which's end the GIMPLE_COND
2942 is. */
2943 oe->id = bb->index;
2944 oe->count = 1;
2945 ops.safe_push (oe);
2946 bb_ent.op = NULL;
2947 bb_ent.last_idx++;
2949 else if (ops.length () > bb_ent.first_idx)
2951 bb_ent.op = lhs;
2952 bb_ent.last_idx = ops.length ();
2954 bbinfo.safe_push (bb_ent);
2955 if (bb == first_bb)
2956 break;
2958 if (ops.length () > 1)
2959 any_changes = optimize_range_tests (ERROR_MARK, &ops);
2960 if (any_changes)
2962 unsigned int idx;
2963 /* update_ops relies on has_single_use predicates returning the
2964 same values as it did during get_ops earlier. Additionally it
2965 never removes statements, only adds new ones and it should walk
2966 from the single imm use and check the predicate already before
2967 making those changes.
2968 On the other side, the handling of GIMPLE_COND directly can turn
2969 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2970 it needs to be done in a separate loop afterwards. */
2971 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2973 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2974 && bbinfo[idx].op != NULL_TREE)
2976 tree new_op;
2978 stmt = last_stmt (bb);
2979 new_op = update_ops (bbinfo[idx].op,
2980 (enum tree_code)
2981 ops[bbinfo[idx].first_idx]->rank,
2982 ops, &bbinfo[idx].first_idx,
2983 loop_containing_stmt (stmt));
2984 if (new_op == NULL_TREE)
2986 gcc_assert (bb == last_bb);
2987 new_op = ops[bbinfo[idx].first_idx++]->op;
2989 if (bbinfo[idx].op != new_op)
2991 imm_use_iterator iter;
2992 use_operand_p use_p;
2993 gimple use_stmt, cast_stmt = NULL;
2995 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2996 if (is_gimple_debug (use_stmt))
2997 continue;
2998 else if (gimple_code (use_stmt) == GIMPLE_COND
2999 || gimple_code (use_stmt) == GIMPLE_PHI)
3000 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3001 SET_USE (use_p, new_op);
3002 else if (gimple_assign_cast_p (use_stmt))
3003 cast_stmt = use_stmt;
3004 else
3005 gcc_unreachable ();
3006 if (cast_stmt)
3008 gcc_assert (bb == last_bb);
3009 tree lhs = gimple_assign_lhs (cast_stmt);
3010 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3011 enum tree_code rhs_code
3012 = gimple_assign_rhs_code (cast_stmt);
3013 gimple g;
3014 if (is_gimple_min_invariant (new_op))
3016 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3017 g = gimple_build_assign (new_lhs, new_op);
3019 else
3020 g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3021 new_op, NULL_TREE);
3022 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3023 gimple_set_uid (g, gimple_uid (cast_stmt));
3024 gimple_set_visited (g, true);
3025 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3026 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3027 if (is_gimple_debug (use_stmt))
3028 continue;
3029 else if (gimple_code (use_stmt) == GIMPLE_COND
3030 || gimple_code (use_stmt) == GIMPLE_PHI)
3031 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3032 SET_USE (use_p, new_lhs);
3033 else
3034 gcc_unreachable ();
3038 if (bb == first_bb)
3039 break;
3041 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3043 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3044 && bbinfo[idx].op == NULL_TREE
3045 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3047 stmt = last_stmt (bb);
3048 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3049 gimple_cond_make_false (stmt);
3050 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3051 gimple_cond_make_true (stmt);
3052 else
3054 gimple_cond_set_code (stmt, NE_EXPR);
3055 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3056 gimple_cond_set_rhs (stmt, boolean_false_node);
3058 update_stmt (stmt);
3060 if (bb == first_bb)
3061 break;
3066 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3067 of STMT in it's operands. This is also known as a "destructive
3068 update" operation. */
3070 static bool
3071 is_phi_for_stmt (gimple stmt, tree operand)
3073 gimple def_stmt;
3074 tree lhs;
3075 use_operand_p arg_p;
3076 ssa_op_iter i;
3078 if (TREE_CODE (operand) != SSA_NAME)
3079 return false;
3081 lhs = gimple_assign_lhs (stmt);
3083 def_stmt = SSA_NAME_DEF_STMT (operand);
3084 if (gimple_code (def_stmt) != GIMPLE_PHI)
3085 return false;
3087 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3088 if (lhs == USE_FROM_PTR (arg_p))
3089 return true;
3090 return false;
3093 /* Remove def stmt of VAR if VAR has zero uses and recurse
3094 on rhs1 operand if so. */
3096 static void
3097 remove_visited_stmt_chain (tree var)
3099 gimple stmt;
3100 gimple_stmt_iterator gsi;
3102 while (1)
3104 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3105 return;
3106 stmt = SSA_NAME_DEF_STMT (var);
3107 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3109 var = gimple_assign_rhs1 (stmt);
3110 gsi = gsi_for_stmt (stmt);
3111 reassoc_remove_stmt (&gsi);
3112 release_defs (stmt);
3114 else
3115 return;
3119 /* This function checks three consequtive operands in
3120 passed operands vector OPS starting from OPINDEX and
3121 swaps two operands if it is profitable for binary operation
3122 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3124 We pair ops with the same rank if possible.
3126 The alternative we try is to see if STMT is a destructive
3127 update style statement, which is like:
3128 b = phi (a, ...)
3129 a = c + b;
3130 In that case, we want to use the destructive update form to
3131 expose the possible vectorizer sum reduction opportunity.
3132 In that case, the third operand will be the phi node. This
3133 check is not performed if STMT is null.
3135 We could, of course, try to be better as noted above, and do a
3136 lot of work to try to find these opportunities in >3 operand
3137 cases, but it is unlikely to be worth it. */
3139 static void
3140 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3141 unsigned int opindex, gimple stmt)
3143 operand_entry_t oe1, oe2, oe3;
3145 oe1 = ops[opindex];
3146 oe2 = ops[opindex + 1];
3147 oe3 = ops[opindex + 2];
3149 if ((oe1->rank == oe2->rank
3150 && oe2->rank != oe3->rank)
3151 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3152 && !is_phi_for_stmt (stmt, oe1->op)
3153 && !is_phi_for_stmt (stmt, oe2->op)))
3155 struct operand_entry temp = *oe3;
3156 oe3->op = oe1->op;
3157 oe3->rank = oe1->rank;
3158 oe1->op = temp.op;
3159 oe1->rank= temp.rank;
3161 else if ((oe1->rank == oe3->rank
3162 && oe2->rank != oe3->rank)
3163 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3164 && !is_phi_for_stmt (stmt, oe1->op)
3165 && !is_phi_for_stmt (stmt, oe3->op)))
3167 struct operand_entry temp = *oe2;
3168 oe2->op = oe1->op;
3169 oe2->rank = oe1->rank;
3170 oe1->op = temp.op;
3171 oe1->rank = temp.rank;
3175 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3176 two definitions, otherwise return STMT. */
3178 static inline gimple
3179 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3181 if (TREE_CODE (rhs1) == SSA_NAME
3182 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3183 stmt = SSA_NAME_DEF_STMT (rhs1);
3184 if (TREE_CODE (rhs2) == SSA_NAME
3185 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3186 stmt = SSA_NAME_DEF_STMT (rhs2);
3187 return stmt;
3190 /* Recursively rewrite our linearized statements so that the operators
3191 match those in OPS[OPINDEX], putting the computation in rank
3192 order. Return new lhs. */
3194 static tree
3195 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3196 vec<operand_entry_t> ops, bool changed)
3198 tree rhs1 = gimple_assign_rhs1 (stmt);
3199 tree rhs2 = gimple_assign_rhs2 (stmt);
3200 tree lhs = gimple_assign_lhs (stmt);
3201 operand_entry_t oe;
3203 /* The final recursion case for this function is that you have
3204 exactly two operations left.
3205 If we had one exactly one op in the entire list to start with, we
3206 would have never called this function, and the tail recursion
3207 rewrites them one at a time. */
3208 if (opindex + 2 == ops.length ())
3210 operand_entry_t oe1, oe2;
3212 oe1 = ops[opindex];
3213 oe2 = ops[opindex + 1];
3215 if (rhs1 != oe1->op || rhs2 != oe2->op)
3217 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3218 unsigned int uid = gimple_uid (stmt);
3220 if (dump_file && (dump_flags & TDF_DETAILS))
3222 fprintf (dump_file, "Transforming ");
3223 print_gimple_stmt (dump_file, stmt, 0, 0);
3226 if (changed)
3228 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3229 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3230 stmt
3231 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3232 lhs, oe1->op, oe2->op);
3233 gimple_set_uid (stmt, uid);
3234 gimple_set_visited (stmt, true);
3235 if (insert_point == gsi_stmt (gsi))
3236 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3237 else
3238 insert_stmt_after (stmt, insert_point);
3240 else
3242 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3243 == stmt);
3244 gimple_assign_set_rhs1 (stmt, oe1->op);
3245 gimple_assign_set_rhs2 (stmt, oe2->op);
3246 update_stmt (stmt);
3249 if (rhs1 != oe1->op && rhs1 != oe2->op)
3250 remove_visited_stmt_chain (rhs1);
3252 if (dump_file && (dump_flags & TDF_DETAILS))
3254 fprintf (dump_file, " into ");
3255 print_gimple_stmt (dump_file, stmt, 0, 0);
3258 return lhs;
3261 /* If we hit here, we should have 3 or more ops left. */
3262 gcc_assert (opindex + 2 < ops.length ());
3264 /* Rewrite the next operator. */
3265 oe = ops[opindex];
3267 /* Recurse on the LHS of the binary operator, which is guaranteed to
3268 be the non-leaf side. */
3269 tree new_rhs1
3270 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3271 changed || oe->op != rhs2);
3273 if (oe->op != rhs2 || new_rhs1 != rhs1)
3275 if (dump_file && (dump_flags & TDF_DETAILS))
3277 fprintf (dump_file, "Transforming ");
3278 print_gimple_stmt (dump_file, stmt, 0, 0);
3281 /* If changed is false, this is either opindex == 0
3282 or all outer rhs2's were equal to corresponding oe->op,
3283 and powi_result is NULL.
3284 That means lhs is equivalent before and after reassociation.
3285 Otherwise ensure the old lhs SSA_NAME is not reused and
3286 create a new stmt as well, so that any debug stmts will be
3287 properly adjusted. */
3288 if (changed)
3290 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3291 unsigned int uid = gimple_uid (stmt);
3292 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3294 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3295 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3296 lhs, new_rhs1, oe->op);
3297 gimple_set_uid (stmt, uid);
3298 gimple_set_visited (stmt, true);
3299 if (insert_point == gsi_stmt (gsi))
3300 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3301 else
3302 insert_stmt_after (stmt, insert_point);
3304 else
3306 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3307 == stmt);
3308 gimple_assign_set_rhs1 (stmt, new_rhs1);
3309 gimple_assign_set_rhs2 (stmt, oe->op);
3310 update_stmt (stmt);
3313 if (dump_file && (dump_flags & TDF_DETAILS))
3315 fprintf (dump_file, " into ");
3316 print_gimple_stmt (dump_file, stmt, 0, 0);
3319 return lhs;
3322 /* Find out how many cycles we need to compute statements chain.
3323 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3324 maximum number of independent statements we may execute per cycle. */
3326 static int
3327 get_required_cycles (int ops_num, int cpu_width)
3329 int res;
3330 int elog;
3331 unsigned int rest;
3333 /* While we have more than 2 * cpu_width operands
3334 we may reduce number of operands by cpu_width
3335 per cycle. */
3336 res = ops_num / (2 * cpu_width);
3338 /* Remained operands count may be reduced twice per cycle
3339 until we have only one operand. */
3340 rest = (unsigned)(ops_num - res * cpu_width);
3341 elog = exact_log2 (rest);
3342 if (elog >= 0)
3343 res += elog;
3344 else
3345 res += floor_log2 (rest) + 1;
3347 return res;
3350 /* Returns an optimal number of registers to use for computation of
3351 given statements. */
3353 static int
3354 get_reassociation_width (int ops_num, enum tree_code opc,
3355 enum machine_mode mode)
3357 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3358 int width;
3359 int width_min;
3360 int cycles_best;
3362 if (param_width > 0)
3363 width = param_width;
3364 else
3365 width = targetm.sched.reassociation_width (opc, mode);
3367 if (width == 1)
3368 return width;
3370 /* Get the minimal time required for sequence computation. */
3371 cycles_best = get_required_cycles (ops_num, width);
3373 /* Check if we may use less width and still compute sequence for
3374 the same time. It will allow us to reduce registers usage.
3375 get_required_cycles is monotonically increasing with lower width
3376 so we can perform a binary search for the minimal width that still
3377 results in the optimal cycle count. */
3378 width_min = 1;
3379 while (width > width_min)
3381 int width_mid = (width + width_min) / 2;
3383 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3384 width = width_mid;
3385 else if (width_min < width_mid)
3386 width_min = width_mid;
3387 else
3388 break;
3391 return width;
3394 /* Recursively rewrite our linearized statements so that the operators
3395 match those in OPS[OPINDEX], putting the computation in rank
3396 order and trying to allow operations to be executed in
3397 parallel. */
3399 static void
3400 rewrite_expr_tree_parallel (gimple stmt, int width,
3401 vec<operand_entry_t> ops)
3403 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3404 int op_num = ops.length ();
3405 int stmt_num = op_num - 1;
3406 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3407 int op_index = op_num - 1;
3408 int stmt_index = 0;
3409 int ready_stmts_end = 0;
3410 int i = 0;
3411 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3413 /* We start expression rewriting from the top statements.
3414 So, in this loop we create a full list of statements
3415 we will work with. */
3416 stmts[stmt_num - 1] = stmt;
3417 for (i = stmt_num - 2; i >= 0; i--)
3418 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3420 for (i = 0; i < stmt_num; i++)
3422 tree op1, op2;
3424 /* Determine whether we should use results of
3425 already handled statements or not. */
3426 if (ready_stmts_end == 0
3427 && (i - stmt_index >= width || op_index < 1))
3428 ready_stmts_end = i;
3430 /* Now we choose operands for the next statement. Non zero
3431 value in ready_stmts_end means here that we should use
3432 the result of already generated statements as new operand. */
3433 if (ready_stmts_end > 0)
3435 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3436 if (ready_stmts_end > stmt_index)
3437 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3438 else if (op_index >= 0)
3439 op2 = ops[op_index--]->op;
3440 else
3442 gcc_assert (stmt_index < i);
3443 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3446 if (stmt_index >= ready_stmts_end)
3447 ready_stmts_end = 0;
3449 else
3451 if (op_index > 1)
3452 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3453 op2 = ops[op_index--]->op;
3454 op1 = ops[op_index--]->op;
3457 /* If we emit the last statement then we should put
3458 operands into the last statement. It will also
3459 break the loop. */
3460 if (op_index < 0 && stmt_index == i)
3461 i = stmt_num - 1;
3463 if (dump_file && (dump_flags & TDF_DETAILS))
3465 fprintf (dump_file, "Transforming ");
3466 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3469 /* We keep original statement only for the last one. All
3470 others are recreated. */
3471 if (i == stmt_num - 1)
3473 gimple_assign_set_rhs1 (stmts[i], op1);
3474 gimple_assign_set_rhs2 (stmts[i], op2);
3475 update_stmt (stmts[i]);
3477 else
3478 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3480 if (dump_file && (dump_flags & TDF_DETAILS))
3482 fprintf (dump_file, " into ");
3483 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3487 remove_visited_stmt_chain (last_rhs1);
3490 /* Transform STMT, which is really (A +B) + (C + D) into the left
3491 linear form, ((A+B)+C)+D.
3492 Recurse on D if necessary. */
3494 static void
3495 linearize_expr (gimple stmt)
3497 gimple_stmt_iterator gsi;
3498 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3499 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3500 gimple oldbinrhs = binrhs;
3501 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3502 gimple newbinrhs = NULL;
3503 struct loop *loop = loop_containing_stmt (stmt);
3504 tree lhs = gimple_assign_lhs (stmt);
3506 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3507 && is_reassociable_op (binrhs, rhscode, loop));
3509 gsi = gsi_for_stmt (stmt);
3511 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3512 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3513 make_ssa_name (TREE_TYPE (lhs), NULL),
3514 gimple_assign_lhs (binlhs),
3515 gimple_assign_rhs2 (binrhs));
3516 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3517 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3518 gimple_set_uid (binrhs, gimple_uid (stmt));
3520 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3521 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3523 if (dump_file && (dump_flags & TDF_DETAILS))
3525 fprintf (dump_file, "Linearized: ");
3526 print_gimple_stmt (dump_file, stmt, 0, 0);
3529 reassociate_stats.linearized++;
3530 update_stmt (stmt);
3532 gsi = gsi_for_stmt (oldbinrhs);
3533 reassoc_remove_stmt (&gsi);
3534 release_defs (oldbinrhs);
3536 gimple_set_visited (stmt, true);
3537 gimple_set_visited (binlhs, true);
3538 gimple_set_visited (binrhs, true);
3540 /* Tail recurse on the new rhs if it still needs reassociation. */
3541 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3542 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3543 want to change the algorithm while converting to tuples. */
3544 linearize_expr (stmt);
3547 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3548 it. Otherwise, return NULL. */
3550 static gimple
3551 get_single_immediate_use (tree lhs)
3553 use_operand_p immuse;
3554 gimple immusestmt;
3556 if (TREE_CODE (lhs) == SSA_NAME
3557 && single_imm_use (lhs, &immuse, &immusestmt)
3558 && is_gimple_assign (immusestmt))
3559 return immusestmt;
3561 return NULL;
3564 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3565 representing the negated value. Insertions of any necessary
3566 instructions go before GSI.
3567 This function is recursive in that, if you hand it "a_5" as the
3568 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3569 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3571 static tree
3572 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3574 gimple negatedefstmt = NULL;
3575 tree resultofnegate;
3576 gimple_stmt_iterator gsi;
3577 unsigned int uid;
3579 /* If we are trying to negate a name, defined by an add, negate the
3580 add operands instead. */
3581 if (TREE_CODE (tonegate) == SSA_NAME)
3582 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3583 if (TREE_CODE (tonegate) == SSA_NAME
3584 && is_gimple_assign (negatedefstmt)
3585 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3586 && has_single_use (gimple_assign_lhs (negatedefstmt))
3587 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3589 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3590 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3591 tree lhs = gimple_assign_lhs (negatedefstmt);
3592 gimple g;
3594 gsi = gsi_for_stmt (negatedefstmt);
3595 rhs1 = negate_value (rhs1, &gsi);
3597 gsi = gsi_for_stmt (negatedefstmt);
3598 rhs2 = negate_value (rhs2, &gsi);
3600 gsi = gsi_for_stmt (negatedefstmt);
3601 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3602 gimple_set_visited (negatedefstmt, true);
3603 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3604 gimple_set_uid (g, gimple_uid (negatedefstmt));
3605 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3606 return lhs;
3609 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3610 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3611 NULL_TREE, true, GSI_SAME_STMT);
3612 gsi = *gsip;
3613 uid = gimple_uid (gsi_stmt (gsi));
3614 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3616 gimple stmt = gsi_stmt (gsi);
3617 if (gimple_uid (stmt) != 0)
3618 break;
3619 gimple_set_uid (stmt, uid);
3621 return resultofnegate;
3624 /* Return true if we should break up the subtract in STMT into an add
3625 with negate. This is true when we the subtract operands are really
3626 adds, or the subtract itself is used in an add expression. In
3627 either case, breaking up the subtract into an add with negate
3628 exposes the adds to reassociation. */
3630 static bool
3631 should_break_up_subtract (gimple stmt)
3633 tree lhs = gimple_assign_lhs (stmt);
3634 tree binlhs = gimple_assign_rhs1 (stmt);
3635 tree binrhs = gimple_assign_rhs2 (stmt);
3636 gimple immusestmt;
3637 struct loop *loop = loop_containing_stmt (stmt);
3639 if (TREE_CODE (binlhs) == SSA_NAME
3640 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3641 return true;
3643 if (TREE_CODE (binrhs) == SSA_NAME
3644 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3645 return true;
3647 if (TREE_CODE (lhs) == SSA_NAME
3648 && (immusestmt = get_single_immediate_use (lhs))
3649 && is_gimple_assign (immusestmt)
3650 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3651 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3652 return true;
3653 return false;
3656 /* Transform STMT from A - B into A + -B. */
3658 static void
3659 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3661 tree rhs1 = gimple_assign_rhs1 (stmt);
3662 tree rhs2 = gimple_assign_rhs2 (stmt);
3664 if (dump_file && (dump_flags & TDF_DETAILS))
3666 fprintf (dump_file, "Breaking up subtract ");
3667 print_gimple_stmt (dump_file, stmt, 0, 0);
3670 rhs2 = negate_value (rhs2, gsip);
3671 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3672 update_stmt (stmt);
3675 /* Determine whether STMT is a builtin call that raises an SSA name
3676 to an integer power and has only one use. If so, and this is early
3677 reassociation and unsafe math optimizations are permitted, place
3678 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3679 If any of these conditions does not hold, return FALSE. */
3681 static bool
3682 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3684 tree fndecl, arg1;
3685 REAL_VALUE_TYPE c, cint;
3687 if (!first_pass_instance
3688 || !flag_unsafe_math_optimizations
3689 || !is_gimple_call (stmt)
3690 || !has_single_use (gimple_call_lhs (stmt)))
3691 return false;
3693 fndecl = gimple_call_fndecl (stmt);
3695 if (!fndecl
3696 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3697 return false;
3699 switch (DECL_FUNCTION_CODE (fndecl))
3701 CASE_FLT_FN (BUILT_IN_POW):
3702 *base = gimple_call_arg (stmt, 0);
3703 arg1 = gimple_call_arg (stmt, 1);
3705 if (TREE_CODE (arg1) != REAL_CST)
3706 return false;
3708 c = TREE_REAL_CST (arg1);
3710 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3711 return false;
3713 *exponent = real_to_integer (&c);
3714 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3715 if (!real_identical (&c, &cint))
3716 return false;
3718 break;
3720 CASE_FLT_FN (BUILT_IN_POWI):
3721 *base = gimple_call_arg (stmt, 0);
3722 arg1 = gimple_call_arg (stmt, 1);
3724 if (!tree_fits_shwi_p (arg1))
3725 return false;
3727 *exponent = tree_to_shwi (arg1);
3728 break;
3730 default:
3731 return false;
3734 /* Expanding negative exponents is generally unproductive, so we don't
3735 complicate matters with those. Exponents of zero and one should
3736 have been handled by expression folding. */
3737 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3738 return false;
3740 return true;
3743 /* Recursively linearize a binary expression that is the RHS of STMT.
3744 Place the operands of the expression tree in the vector named OPS. */
3746 static void
3747 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3748 bool is_associative, bool set_visited)
3750 tree binlhs = gimple_assign_rhs1 (stmt);
3751 tree binrhs = gimple_assign_rhs2 (stmt);
3752 gimple binlhsdef = NULL, binrhsdef = NULL;
3753 bool binlhsisreassoc = false;
3754 bool binrhsisreassoc = false;
3755 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3756 struct loop *loop = loop_containing_stmt (stmt);
3757 tree base = NULL_TREE;
3758 HOST_WIDE_INT exponent = 0;
3760 if (set_visited)
3761 gimple_set_visited (stmt, true);
3763 if (TREE_CODE (binlhs) == SSA_NAME)
3765 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3766 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3767 && !stmt_could_throw_p (binlhsdef));
3770 if (TREE_CODE (binrhs) == SSA_NAME)
3772 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3773 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3774 && !stmt_could_throw_p (binrhsdef));
3777 /* If the LHS is not reassociable, but the RHS is, we need to swap
3778 them. If neither is reassociable, there is nothing we can do, so
3779 just put them in the ops vector. If the LHS is reassociable,
3780 linearize it. If both are reassociable, then linearize the RHS
3781 and the LHS. */
3783 if (!binlhsisreassoc)
3785 tree temp;
3787 /* If this is not a associative operation like division, give up. */
3788 if (!is_associative)
3790 add_to_ops_vec (ops, binrhs);
3791 return;
3794 if (!binrhsisreassoc)
3796 if (rhscode == MULT_EXPR
3797 && TREE_CODE (binrhs) == SSA_NAME
3798 && acceptable_pow_call (binrhsdef, &base, &exponent))
3800 add_repeat_to_ops_vec (ops, base, exponent);
3801 gimple_set_visited (binrhsdef, true);
3803 else
3804 add_to_ops_vec (ops, binrhs);
3806 if (rhscode == MULT_EXPR
3807 && TREE_CODE (binlhs) == SSA_NAME
3808 && acceptable_pow_call (binlhsdef, &base, &exponent))
3810 add_repeat_to_ops_vec (ops, base, exponent);
3811 gimple_set_visited (binlhsdef, true);
3813 else
3814 add_to_ops_vec (ops, binlhs);
3816 return;
3819 if (dump_file && (dump_flags & TDF_DETAILS))
3821 fprintf (dump_file, "swapping operands of ");
3822 print_gimple_stmt (dump_file, stmt, 0, 0);
3825 swap_ssa_operands (stmt,
3826 gimple_assign_rhs1_ptr (stmt),
3827 gimple_assign_rhs2_ptr (stmt));
3828 update_stmt (stmt);
3830 if (dump_file && (dump_flags & TDF_DETAILS))
3832 fprintf (dump_file, " is now ");
3833 print_gimple_stmt (dump_file, stmt, 0, 0);
3836 /* We want to make it so the lhs is always the reassociative op,
3837 so swap. */
3838 temp = binlhs;
3839 binlhs = binrhs;
3840 binrhs = temp;
3842 else if (binrhsisreassoc)
3844 linearize_expr (stmt);
3845 binlhs = gimple_assign_rhs1 (stmt);
3846 binrhs = gimple_assign_rhs2 (stmt);
3849 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3850 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3851 rhscode, loop));
3852 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3853 is_associative, set_visited);
3855 if (rhscode == MULT_EXPR
3856 && TREE_CODE (binrhs) == SSA_NAME
3857 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3859 add_repeat_to_ops_vec (ops, base, exponent);
3860 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3862 else
3863 add_to_ops_vec (ops, binrhs);
3866 /* Repropagate the negates back into subtracts, since no other pass
3867 currently does it. */
3869 static void
3870 repropagate_negates (void)
3872 unsigned int i = 0;
3873 tree negate;
3875 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3877 gimple user = get_single_immediate_use (negate);
3879 if (!user || !is_gimple_assign (user))
3880 continue;
3882 /* The negate operand can be either operand of a PLUS_EXPR
3883 (it can be the LHS if the RHS is a constant for example).
3885 Force the negate operand to the RHS of the PLUS_EXPR, then
3886 transform the PLUS_EXPR into a MINUS_EXPR. */
3887 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3889 /* If the negated operand appears on the LHS of the
3890 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3891 to force the negated operand to the RHS of the PLUS_EXPR. */
3892 if (gimple_assign_rhs1 (user) == negate)
3894 swap_ssa_operands (user,
3895 gimple_assign_rhs1_ptr (user),
3896 gimple_assign_rhs2_ptr (user));
3899 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3900 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3901 if (gimple_assign_rhs2 (user) == negate)
3903 tree rhs1 = gimple_assign_rhs1 (user);
3904 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3905 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3906 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3907 update_stmt (user);
3910 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3912 if (gimple_assign_rhs1 (user) == negate)
3914 /* We have
3915 x = -a
3916 y = x - b
3917 which we transform into
3918 x = a + b
3919 y = -x .
3920 This pushes down the negate which we possibly can merge
3921 into some other operation, hence insert it into the
3922 plus_negates vector. */
3923 gimple feed = SSA_NAME_DEF_STMT (negate);
3924 tree a = gimple_assign_rhs1 (feed);
3925 tree b = gimple_assign_rhs2 (user);
3926 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3927 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3928 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3929 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3930 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3931 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3932 user = gsi_stmt (gsi2);
3933 update_stmt (user);
3934 reassoc_remove_stmt (&gsi);
3935 release_defs (feed);
3936 plus_negates.safe_push (gimple_assign_lhs (user));
3938 else
3940 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3941 rid of one operation. */
3942 gimple feed = SSA_NAME_DEF_STMT (negate);
3943 tree a = gimple_assign_rhs1 (feed);
3944 tree rhs1 = gimple_assign_rhs1 (user);
3945 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3946 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3947 update_stmt (gsi_stmt (gsi));
3953 /* Returns true if OP is of a type for which we can do reassociation.
3954 That is for integral or non-saturating fixed-point types, and for
3955 floating point type when associative-math is enabled. */
3957 static bool
3958 can_reassociate_p (tree op)
3960 tree type = TREE_TYPE (op);
3961 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3962 || NON_SAT_FIXED_POINT_TYPE_P (type)
3963 || (flag_associative_math && FLOAT_TYPE_P (type)))
3964 return true;
3965 return false;
3968 /* Break up subtract operations in block BB.
3970 We do this top down because we don't know whether the subtract is
3971 part of a possible chain of reassociation except at the top.
3973 IE given
3974 d = f + g
3975 c = a + e
3976 b = c - d
3977 q = b - r
3978 k = t - q
3980 we want to break up k = t - q, but we won't until we've transformed q
3981 = b - r, which won't be broken up until we transform b = c - d.
3983 En passant, clear the GIMPLE visited flag on every statement
3984 and set UIDs within each basic block. */
3986 static void
3987 break_up_subtract_bb (basic_block bb)
3989 gimple_stmt_iterator gsi;
3990 basic_block son;
3991 unsigned int uid = 1;
3993 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3995 gimple stmt = gsi_stmt (gsi);
3996 gimple_set_visited (stmt, false);
3997 gimple_set_uid (stmt, uid++);
3999 if (!is_gimple_assign (stmt)
4000 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4001 continue;
4003 /* Look for simple gimple subtract operations. */
4004 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4006 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4007 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4008 continue;
4010 /* Check for a subtract used only in an addition. If this
4011 is the case, transform it into add of a negate for better
4012 reassociation. IE transform C = A-B into C = A + -B if C
4013 is only used in an addition. */
4014 if (should_break_up_subtract (stmt))
4015 break_up_subtract (stmt, &gsi);
4017 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4018 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4019 plus_negates.safe_push (gimple_assign_lhs (stmt));
4021 for (son = first_dom_son (CDI_DOMINATORS, bb);
4022 son;
4023 son = next_dom_son (CDI_DOMINATORS, son))
4024 break_up_subtract_bb (son);
4027 /* Used for repeated factor analysis. */
4028 struct repeat_factor_d
4030 /* An SSA name that occurs in a multiply chain. */
4031 tree factor;
4033 /* Cached rank of the factor. */
4034 unsigned rank;
4036 /* Number of occurrences of the factor in the chain. */
4037 HOST_WIDE_INT count;
4039 /* An SSA name representing the product of this factor and
4040 all factors appearing later in the repeated factor vector. */
4041 tree repr;
4044 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4045 typedef const struct repeat_factor_d *const_repeat_factor_t;
4048 static vec<repeat_factor> repeat_factor_vec;
4050 /* Used for sorting the repeat factor vector. Sort primarily by
4051 ascending occurrence count, secondarily by descending rank. */
4053 static int
4054 compare_repeat_factors (const void *x1, const void *x2)
4056 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4057 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4059 if (rf1->count != rf2->count)
4060 return rf1->count - rf2->count;
4062 return rf2->rank - rf1->rank;
4065 /* Look for repeated operands in OPS in the multiply tree rooted at
4066 STMT. Replace them with an optimal sequence of multiplies and powi
4067 builtin calls, and remove the used operands from OPS. Return an
4068 SSA name representing the value of the replacement sequence. */
4070 static tree
4071 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4073 unsigned i, j, vec_len;
4074 int ii;
4075 operand_entry_t oe;
4076 repeat_factor_t rf1, rf2;
4077 repeat_factor rfnew;
4078 tree result = NULL_TREE;
4079 tree target_ssa, iter_result;
4080 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4081 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4082 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4083 gimple mul_stmt, pow_stmt;
4085 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4086 target. */
4087 if (!powi_fndecl)
4088 return NULL_TREE;
4090 /* Allocate the repeated factor vector. */
4091 repeat_factor_vec.create (10);
4093 /* Scan the OPS vector for all SSA names in the product and build
4094 up a vector of occurrence counts for each factor. */
4095 FOR_EACH_VEC_ELT (*ops, i, oe)
4097 if (TREE_CODE (oe->op) == SSA_NAME)
4099 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4101 if (rf1->factor == oe->op)
4103 rf1->count += oe->count;
4104 break;
4108 if (j >= repeat_factor_vec.length ())
4110 rfnew.factor = oe->op;
4111 rfnew.rank = oe->rank;
4112 rfnew.count = oe->count;
4113 rfnew.repr = NULL_TREE;
4114 repeat_factor_vec.safe_push (rfnew);
4119 /* Sort the repeated factor vector by (a) increasing occurrence count,
4120 and (b) decreasing rank. */
4121 repeat_factor_vec.qsort (compare_repeat_factors);
4123 /* It is generally best to combine as many base factors as possible
4124 into a product before applying __builtin_powi to the result.
4125 However, the sort order chosen for the repeated factor vector
4126 allows us to cache partial results for the product of the base
4127 factors for subsequent use. When we already have a cached partial
4128 result from a previous iteration, it is best to make use of it
4129 before looking for another __builtin_pow opportunity.
4131 As an example, consider x * x * y * y * y * z * z * z * z.
4132 We want to first compose the product x * y * z, raise it to the
4133 second power, then multiply this by y * z, and finally multiply
4134 by z. This can be done in 5 multiplies provided we cache y * z
4135 for use in both expressions:
4137 t1 = y * z
4138 t2 = t1 * x
4139 t3 = t2 * t2
4140 t4 = t1 * t3
4141 result = t4 * z
4143 If we instead ignored the cached y * z and first multiplied by
4144 the __builtin_pow opportunity z * z, we would get the inferior:
4146 t1 = y * z
4147 t2 = t1 * x
4148 t3 = t2 * t2
4149 t4 = z * z
4150 t5 = t3 * t4
4151 result = t5 * y */
4153 vec_len = repeat_factor_vec.length ();
4155 /* Repeatedly look for opportunities to create a builtin_powi call. */
4156 while (true)
4158 HOST_WIDE_INT power;
4160 /* First look for the largest cached product of factors from
4161 preceding iterations. If found, create a builtin_powi for
4162 it if the minimum occurrence count for its factors is at
4163 least 2, or just use this cached product as our next
4164 multiplicand if the minimum occurrence count is 1. */
4165 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4167 if (rf1->repr && rf1->count > 0)
4168 break;
4171 if (j < vec_len)
4173 power = rf1->count;
4175 if (power == 1)
4177 iter_result = rf1->repr;
4179 if (dump_file && (dump_flags & TDF_DETAILS))
4181 unsigned elt;
4182 repeat_factor_t rf;
4183 fputs ("Multiplying by cached product ", dump_file);
4184 for (elt = j; elt < vec_len; elt++)
4186 rf = &repeat_factor_vec[elt];
4187 print_generic_expr (dump_file, rf->factor, 0);
4188 if (elt < vec_len - 1)
4189 fputs (" * ", dump_file);
4191 fputs ("\n", dump_file);
4194 else
4196 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4197 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4198 build_int_cst (integer_type_node,
4199 power));
4200 gimple_call_set_lhs (pow_stmt, iter_result);
4201 gimple_set_location (pow_stmt, gimple_location (stmt));
4202 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4204 if (dump_file && (dump_flags & TDF_DETAILS))
4206 unsigned elt;
4207 repeat_factor_t rf;
4208 fputs ("Building __builtin_pow call for cached product (",
4209 dump_file);
4210 for (elt = j; elt < vec_len; elt++)
4212 rf = &repeat_factor_vec[elt];
4213 print_generic_expr (dump_file, rf->factor, 0);
4214 if (elt < vec_len - 1)
4215 fputs (" * ", dump_file);
4217 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4218 power);
4222 else
4224 /* Otherwise, find the first factor in the repeated factor
4225 vector whose occurrence count is at least 2. If no such
4226 factor exists, there are no builtin_powi opportunities
4227 remaining. */
4228 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4230 if (rf1->count >= 2)
4231 break;
4234 if (j >= vec_len)
4235 break;
4237 power = rf1->count;
4239 if (dump_file && (dump_flags & TDF_DETAILS))
4241 unsigned elt;
4242 repeat_factor_t rf;
4243 fputs ("Building __builtin_pow call for (", dump_file);
4244 for (elt = j; elt < vec_len; elt++)
4246 rf = &repeat_factor_vec[elt];
4247 print_generic_expr (dump_file, rf->factor, 0);
4248 if (elt < vec_len - 1)
4249 fputs (" * ", dump_file);
4251 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4254 reassociate_stats.pows_created++;
4256 /* Visit each element of the vector in reverse order (so that
4257 high-occurrence elements are visited first, and within the
4258 same occurrence count, lower-ranked elements are visited
4259 first). Form a linear product of all elements in this order
4260 whose occurrencce count is at least that of element J.
4261 Record the SSA name representing the product of each element
4262 with all subsequent elements in the vector. */
4263 if (j == vec_len - 1)
4264 rf1->repr = rf1->factor;
4265 else
4267 for (ii = vec_len - 2; ii >= (int)j; ii--)
4269 tree op1, op2;
4271 rf1 = &repeat_factor_vec[ii];
4272 rf2 = &repeat_factor_vec[ii + 1];
4274 /* Init the last factor's representative to be itself. */
4275 if (!rf2->repr)
4276 rf2->repr = rf2->factor;
4278 op1 = rf1->factor;
4279 op2 = rf2->repr;
4281 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4282 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4283 target_ssa,
4284 op1, op2);
4285 gimple_set_location (mul_stmt, gimple_location (stmt));
4286 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4287 rf1->repr = target_ssa;
4289 /* Don't reprocess the multiply we just introduced. */
4290 gimple_set_visited (mul_stmt, true);
4294 /* Form a call to __builtin_powi for the maximum product
4295 just formed, raised to the power obtained earlier. */
4296 rf1 = &repeat_factor_vec[j];
4297 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4298 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4299 build_int_cst (integer_type_node,
4300 power));
4301 gimple_call_set_lhs (pow_stmt, iter_result);
4302 gimple_set_location (pow_stmt, gimple_location (stmt));
4303 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4306 /* If we previously formed at least one other builtin_powi call,
4307 form the product of this one and those others. */
4308 if (result)
4310 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4311 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4312 result, iter_result);
4313 gimple_set_location (mul_stmt, gimple_location (stmt));
4314 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4315 gimple_set_visited (mul_stmt, true);
4316 result = new_result;
4318 else
4319 result = iter_result;
4321 /* Decrement the occurrence count of each element in the product
4322 by the count found above, and remove this many copies of each
4323 factor from OPS. */
4324 for (i = j; i < vec_len; i++)
4326 unsigned k = power;
4327 unsigned n;
4329 rf1 = &repeat_factor_vec[i];
4330 rf1->count -= power;
4332 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4334 if (oe->op == rf1->factor)
4336 if (oe->count <= k)
4338 ops->ordered_remove (n);
4339 k -= oe->count;
4341 if (k == 0)
4342 break;
4344 else
4346 oe->count -= k;
4347 break;
4354 /* At this point all elements in the repeated factor vector have a
4355 remaining occurrence count of 0 or 1, and those with a count of 1
4356 don't have cached representatives. Re-sort the ops vector and
4357 clean up. */
4358 ops->qsort (sort_by_operand_rank);
4359 repeat_factor_vec.release ();
4361 /* Return the final product computed herein. Note that there may
4362 still be some elements with single occurrence count left in OPS;
4363 those will be handled by the normal reassociation logic. */
4364 return result;
4367 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4369 static void
4370 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4372 tree rhs1;
4374 if (dump_file && (dump_flags & TDF_DETAILS))
4376 fprintf (dump_file, "Transforming ");
4377 print_gimple_stmt (dump_file, stmt, 0, 0);
4380 rhs1 = gimple_assign_rhs1 (stmt);
4381 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4382 update_stmt (stmt);
4383 remove_visited_stmt_chain (rhs1);
4385 if (dump_file && (dump_flags & TDF_DETAILS))
4387 fprintf (dump_file, " into ");
4388 print_gimple_stmt (dump_file, stmt, 0, 0);
4392 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4394 static void
4395 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4396 tree rhs1, tree rhs2)
4398 if (dump_file && (dump_flags & TDF_DETAILS))
4400 fprintf (dump_file, "Transforming ");
4401 print_gimple_stmt (dump_file, stmt, 0, 0);
4404 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4405 update_stmt (gsi_stmt (*gsi));
4406 remove_visited_stmt_chain (rhs1);
4408 if (dump_file && (dump_flags & TDF_DETAILS))
4410 fprintf (dump_file, " into ");
4411 print_gimple_stmt (dump_file, stmt, 0, 0);
4415 /* Reassociate expressions in basic block BB and its post-dominator as
4416 children. */
4418 static void
4419 reassociate_bb (basic_block bb)
4421 gimple_stmt_iterator gsi;
4422 basic_block son;
4423 gimple stmt = last_stmt (bb);
4425 if (stmt && !gimple_visited_p (stmt))
4426 maybe_optimize_range_tests (stmt);
4428 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4430 stmt = gsi_stmt (gsi);
4432 if (is_gimple_assign (stmt)
4433 && !stmt_could_throw_p (stmt))
4435 tree lhs, rhs1, rhs2;
4436 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4438 /* If this is not a gimple binary expression, there is
4439 nothing for us to do with it. */
4440 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4441 continue;
4443 /* If this was part of an already processed statement,
4444 we don't need to touch it again. */
4445 if (gimple_visited_p (stmt))
4447 /* This statement might have become dead because of previous
4448 reassociations. */
4449 if (has_zero_uses (gimple_get_lhs (stmt)))
4451 reassoc_remove_stmt (&gsi);
4452 release_defs (stmt);
4453 /* We might end up removing the last stmt above which
4454 places the iterator to the end of the sequence.
4455 Reset it to the last stmt in this case which might
4456 be the end of the sequence as well if we removed
4457 the last statement of the sequence. In which case
4458 we need to bail out. */
4459 if (gsi_end_p (gsi))
4461 gsi = gsi_last_bb (bb);
4462 if (gsi_end_p (gsi))
4463 break;
4466 continue;
4469 lhs = gimple_assign_lhs (stmt);
4470 rhs1 = gimple_assign_rhs1 (stmt);
4471 rhs2 = gimple_assign_rhs2 (stmt);
4473 /* For non-bit or min/max operations we can't associate
4474 all types. Verify that here. */
4475 if (rhs_code != BIT_IOR_EXPR
4476 && rhs_code != BIT_AND_EXPR
4477 && rhs_code != BIT_XOR_EXPR
4478 && rhs_code != MIN_EXPR
4479 && rhs_code != MAX_EXPR
4480 && (!can_reassociate_p (lhs)
4481 || !can_reassociate_p (rhs1)
4482 || !can_reassociate_p (rhs2)))
4483 continue;
4485 if (associative_tree_code (rhs_code))
4487 auto_vec<operand_entry_t> ops;
4488 tree powi_result = NULL_TREE;
4490 /* There may be no immediate uses left by the time we
4491 get here because we may have eliminated them all. */
4492 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4493 continue;
4495 gimple_set_visited (stmt, true);
4496 linearize_expr_tree (&ops, stmt, true, true);
4497 ops.qsort (sort_by_operand_rank);
4498 optimize_ops_list (rhs_code, &ops);
4499 if (undistribute_ops_list (rhs_code, &ops,
4500 loop_containing_stmt (stmt)))
4502 ops.qsort (sort_by_operand_rank);
4503 optimize_ops_list (rhs_code, &ops);
4506 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4507 optimize_range_tests (rhs_code, &ops);
4509 if (first_pass_instance
4510 && rhs_code == MULT_EXPR
4511 && flag_unsafe_math_optimizations)
4512 powi_result = attempt_builtin_powi (stmt, &ops);
4514 /* If the operand vector is now empty, all operands were
4515 consumed by the __builtin_powi optimization. */
4516 if (ops.length () == 0)
4517 transform_stmt_to_copy (&gsi, stmt, powi_result);
4518 else if (ops.length () == 1)
4520 tree last_op = ops.last ()->op;
4522 if (powi_result)
4523 transform_stmt_to_multiply (&gsi, stmt, last_op,
4524 powi_result);
4525 else
4526 transform_stmt_to_copy (&gsi, stmt, last_op);
4528 else
4530 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4531 int ops_num = ops.length ();
4532 int width = get_reassociation_width (ops_num, rhs_code, mode);
4533 tree new_lhs = lhs;
4535 if (dump_file && (dump_flags & TDF_DETAILS))
4536 fprintf (dump_file,
4537 "Width = %d was chosen for reassociation\n", width);
4539 if (width > 1
4540 && ops.length () > 3)
4541 rewrite_expr_tree_parallel (stmt, width, ops);
4542 else
4544 /* When there are three operands left, we want
4545 to make sure the ones that get the double
4546 binary op are chosen wisely. */
4547 int len = ops.length ();
4548 if (len >= 3)
4549 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4551 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4552 powi_result != NULL);
4555 /* If we combined some repeated factors into a
4556 __builtin_powi call, multiply that result by the
4557 reassociated operands. */
4558 if (powi_result)
4560 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4561 tree type = TREE_TYPE (lhs);
4562 tree target_ssa = make_temp_ssa_name (type, NULL,
4563 "reassocpow");
4564 gimple_set_lhs (lhs_stmt, target_ssa);
4565 update_stmt (lhs_stmt);
4566 if (lhs != new_lhs)
4567 target_ssa = new_lhs;
4568 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4569 powi_result,
4570 target_ssa);
4571 gimple_set_location (mul_stmt, gimple_location (stmt));
4572 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4578 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4579 son;
4580 son = next_dom_son (CDI_POST_DOMINATORS, son))
4581 reassociate_bb (son);
4584 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4585 void debug_ops_vector (vec<operand_entry_t> ops);
4587 /* Dump the operand entry vector OPS to FILE. */
4589 void
4590 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4592 operand_entry_t oe;
4593 unsigned int i;
4595 FOR_EACH_VEC_ELT (ops, i, oe)
4597 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4598 print_generic_expr (file, oe->op, 0);
4602 /* Dump the operand entry vector OPS to STDERR. */
4604 DEBUG_FUNCTION void
4605 debug_ops_vector (vec<operand_entry_t> ops)
4607 dump_ops_vector (stderr, ops);
4610 static void
4611 do_reassoc (void)
4613 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4614 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4617 /* Initialize the reassociation pass. */
4619 static void
4620 init_reassoc (void)
4622 int i;
4623 long rank = 2;
4624 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4626 /* Find the loops, so that we can prevent moving calculations in
4627 them. */
4628 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4630 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4632 operand_entry_pool = create_alloc_pool ("operand entry pool",
4633 sizeof (struct operand_entry), 30);
4634 next_operand_entry_id = 0;
4636 /* Reverse RPO (Reverse Post Order) will give us something where
4637 deeper loops come later. */
4638 pre_and_rev_post_order_compute (NULL, bbs, false);
4639 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4640 operand_rank = new hash_map<tree, long>;
4642 /* Give each default definition a distinct rank. This includes
4643 parameters and the static chain. Walk backwards over all
4644 SSA names so that we get proper rank ordering according
4645 to tree_swap_operands_p. */
4646 for (i = num_ssa_names - 1; i > 0; --i)
4648 tree name = ssa_name (i);
4649 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4650 insert_operand_rank (name, ++rank);
4653 /* Set up rank for each BB */
4654 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4655 bb_rank[bbs[i]] = ++rank << 16;
4657 free (bbs);
4658 calculate_dominance_info (CDI_POST_DOMINATORS);
4659 plus_negates = vNULL;
4662 /* Cleanup after the reassociation pass, and print stats if
4663 requested. */
4665 static void
4666 fini_reassoc (void)
4668 statistics_counter_event (cfun, "Linearized",
4669 reassociate_stats.linearized);
4670 statistics_counter_event (cfun, "Constants eliminated",
4671 reassociate_stats.constants_eliminated);
4672 statistics_counter_event (cfun, "Ops eliminated",
4673 reassociate_stats.ops_eliminated);
4674 statistics_counter_event (cfun, "Statements rewritten",
4675 reassociate_stats.rewritten);
4676 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4677 reassociate_stats.pows_encountered);
4678 statistics_counter_event (cfun, "Built-in powi calls created",
4679 reassociate_stats.pows_created);
4681 delete operand_rank;
4682 free_alloc_pool (operand_entry_pool);
4683 free (bb_rank);
4684 plus_negates.release ();
4685 free_dominance_info (CDI_POST_DOMINATORS);
4686 loop_optimizer_finalize ();
4689 /* Gate and execute functions for Reassociation. */
4691 static unsigned int
4692 execute_reassoc (void)
4694 init_reassoc ();
4696 do_reassoc ();
4697 repropagate_negates ();
4699 fini_reassoc ();
4700 return 0;
4703 namespace {
4705 const pass_data pass_data_reassoc =
4707 GIMPLE_PASS, /* type */
4708 "reassoc", /* name */
4709 OPTGROUP_NONE, /* optinfo_flags */
4710 TV_TREE_REASSOC, /* tv_id */
4711 ( PROP_cfg | PROP_ssa ), /* properties_required */
4712 0, /* properties_provided */
4713 0, /* properties_destroyed */
4714 0, /* todo_flags_start */
4715 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
4718 class pass_reassoc : public gimple_opt_pass
4720 public:
4721 pass_reassoc (gcc::context *ctxt)
4722 : gimple_opt_pass (pass_data_reassoc, ctxt)
4725 /* opt_pass methods: */
4726 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4727 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
4728 virtual unsigned int execute (function *) { return execute_reassoc (); }
4730 }; // class pass_reassoc
4732 } // anon namespace
4734 gimple_opt_pass *
4735 make_pass_reassoc (gcc::context *ctxt)
4737 return new pass_reassoc (ctxt);