AVX-512. Branch to hold overall changes introduced by 20140717 EAS.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobf9bd9a446c6e955568061f105eed0d5788368264
1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-inline.h"
33 #include "pointer-set.h"
34 #include "tree-ssa-alias.h"
35 #include "internal-fn.h"
36 #include "gimple-fold.h"
37 #include "tree-eh.h"
38 #include "gimple-expr.h"
39 #include "is-a.h"
40 #include "gimple.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "gimple-ssa.h"
44 #include "tree-cfg.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
51 #include "expr.h"
52 #include "tree-dfa.h"
53 #include "tree-ssa.h"
54 #include "tree-iterator.h"
55 #include "tree-pass.h"
56 #include "alloc-pool.h"
57 #include "langhooks.h"
58 #include "cfgloop.h"
59 #include "flags.h"
60 #include "target.h"
61 #include "params.h"
62 #include "diagnostic-core.h"
63 #include "builtins.h"
65 /* This is a simple global reassociation pass. It is, in part, based
66 on the LLVM pass of the same name (They do some things more/less
67 than we do, in different orders, etc).
69 It consists of five steps:
71 1. Breaking up subtract operations into addition + negate, where
72 it would promote the reassociation of adds.
74 2. Left linearization of the expression trees, so that (A+B)+(C+D)
75 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
76 During linearization, we place the operands of the binary
77 expressions into a vector of operand_entry_t
79 3. Optimization of the operand lists, eliminating things like a +
80 -a, a & a, etc.
82 3a. Combine repeated factors with the same occurrence counts
83 into a __builtin_powi call that will later be optimized into
84 an optimal number of multiplies.
86 4. Rewrite the expression trees we linearized and optimized so
87 they are in proper rank order.
89 5. Repropagate negates, as nothing else will clean it up ATM.
91 A bit of theory on #4, since nobody seems to write anything down
92 about why it makes sense to do it the way they do it:
94 We could do this much nicer theoretically, but don't (for reasons
95 explained after how to do it theoretically nice :P).
97 In order to promote the most redundancy elimination, you want
98 binary expressions whose operands are the same rank (or
99 preferably, the same value) exposed to the redundancy eliminator,
100 for possible elimination.
102 So the way to do this if we really cared, is to build the new op
103 tree from the leaves to the roots, merging as you go, and putting the
104 new op on the end of the worklist, until you are left with one
105 thing on the worklist.
107 IE if you have to rewrite the following set of operands (listed with
108 rank in parentheses), with opcode PLUS_EXPR:
110 a (1), b (1), c (1), d (2), e (2)
113 We start with our merge worklist empty, and the ops list with all of
114 those on it.
116 You want to first merge all leaves of the same rank, as much as
117 possible.
119 So first build a binary op of
121 mergetmp = a + b, and put "mergetmp" on the merge worklist.
123 Because there is no three operand form of PLUS_EXPR, c is not going to
124 be exposed to redundancy elimination as a rank 1 operand.
126 So you might as well throw it on the merge worklist (you could also
127 consider it to now be a rank two operand, and merge it with d and e,
128 but in this case, you then have evicted e from a binary op. So at
129 least in this situation, you can't win.)
131 Then build a binary op of d + e
132 mergetmp2 = d + e
134 and put mergetmp2 on the merge worklist.
136 so merge worklist = {mergetmp, c, mergetmp2}
138 Continue building binary ops of these operations until you have only
139 one operation left on the worklist.
141 So we have
143 build binary op
144 mergetmp3 = mergetmp + c
146 worklist = {mergetmp2, mergetmp3}
148 mergetmp4 = mergetmp2 + mergetmp3
150 worklist = {mergetmp4}
152 because we have one operation left, we can now just set the original
153 statement equal to the result of that operation.
155 This will at least expose a + b and d + e to redundancy elimination
156 as binary operations.
158 For extra points, you can reuse the old statements to build the
159 mergetmps, since you shouldn't run out.
161 So why don't we do this?
163 Because it's expensive, and rarely will help. Most trees we are
164 reassociating have 3 or less ops. If they have 2 ops, they already
165 will be written into a nice single binary op. If you have 3 ops, a
166 single simple check suffices to tell you whether the first two are of the
167 same rank. If so, you know to order it
169 mergetmp = op1 + op2
170 newstmt = mergetmp + op3
172 instead of
173 mergetmp = op2 + op3
174 newstmt = mergetmp + op1
176 If all three are of the same rank, you can't expose them all in a
177 single binary operator anyway, so the above is *still* the best you
178 can do.
180 Thus, this is what we do. When we have three ops left, we check to see
181 what order to put them in, and call it a day. As a nod to vector sum
182 reduction, we check if any of the ops are really a phi node that is a
183 destructive update for the associating op, and keep the destructive
184 update together for vector sum reduction recognition. */
187 /* Statistics */
188 static struct
190 int linearized;
191 int constants_eliminated;
192 int ops_eliminated;
193 int rewritten;
194 int pows_encountered;
195 int pows_created;
196 } reassociate_stats;
198 /* Operator, rank pair. */
199 typedef struct operand_entry
201 unsigned int rank;
202 int id;
203 tree op;
204 unsigned int count;
205 } *operand_entry_t;
207 static alloc_pool operand_entry_pool;
209 /* This is used to assign a unique ID to each struct operand_entry
210 so that qsort results are identical on different hosts. */
211 static int next_operand_entry_id;
213 /* Starting rank number for a given basic block, so that we can rank
214 operations using unmovable instructions in that BB based on the bb
215 depth. */
216 static long *bb_rank;
218 /* Operand->rank hashtable. */
219 static struct pointer_map_t *operand_rank;
221 /* Forward decls. */
222 static long get_rank (tree);
223 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
226 possibly added by gsi_remove. */
228 bool
229 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
231 gimple stmt = gsi_stmt (*gsi);
233 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
234 return gsi_remove (gsi, true);
236 gimple_stmt_iterator prev = *gsi;
237 gsi_prev (&prev);
238 unsigned uid = gimple_uid (stmt);
239 basic_block bb = gimple_bb (stmt);
240 bool ret = gsi_remove (gsi, true);
241 if (!gsi_end_p (prev))
242 gsi_next (&prev);
243 else
244 prev = gsi_start_bb (bb);
245 gimple end_stmt = gsi_stmt (*gsi);
246 while ((stmt = gsi_stmt (prev)) != end_stmt)
248 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
249 gimple_set_uid (stmt, uid);
250 gsi_next (&prev);
252 return ret;
255 /* Bias amount for loop-carried phis. We want this to be larger than
256 the depth of any reassociation tree we can see, but not larger than
257 the rank difference between two blocks. */
258 #define PHI_LOOP_BIAS (1 << 15)
260 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
261 an innermost loop, and the phi has only a single use which is inside
262 the loop, then the rank is the block rank of the loop latch plus an
263 extra bias for the loop-carried dependence. This causes expressions
264 calculated into an accumulator variable to be independent for each
265 iteration of the loop. If STMT is some other phi, the rank is the
266 block rank of its containing block. */
267 static long
268 phi_rank (gimple stmt)
270 basic_block bb = gimple_bb (stmt);
271 struct loop *father = bb->loop_father;
272 tree res;
273 unsigned i;
274 use_operand_p use;
275 gimple use_stmt;
277 /* We only care about real loops (those with a latch). */
278 if (!father->latch)
279 return bb_rank[bb->index];
281 /* Interesting phis must be in headers of innermost loops. */
282 if (bb != father->header
283 || father->inner)
284 return bb_rank[bb->index];
286 /* Ignore virtual SSA_NAMEs. */
287 res = gimple_phi_result (stmt);
288 if (virtual_operand_p (res))
289 return bb_rank[bb->index];
291 /* The phi definition must have a single use, and that use must be
292 within the loop. Otherwise this isn't an accumulator pattern. */
293 if (!single_imm_use (res, &use, &use_stmt)
294 || gimple_bb (use_stmt)->loop_father != father)
295 return bb_rank[bb->index];
297 /* Look for phi arguments from within the loop. If found, bias this phi. */
298 for (i = 0; i < gimple_phi_num_args (stmt); i++)
300 tree arg = gimple_phi_arg_def (stmt, i);
301 if (TREE_CODE (arg) == SSA_NAME
302 && !SSA_NAME_IS_DEFAULT_DEF (arg))
304 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
305 if (gimple_bb (def_stmt)->loop_father == father)
306 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
310 /* Must be an uninteresting phi. */
311 return bb_rank[bb->index];
314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
315 loop-carried dependence of an innermost loop, return TRUE; else
316 return FALSE. */
317 static bool
318 loop_carried_phi (tree exp)
320 gimple phi_stmt;
321 long block_rank;
323 if (TREE_CODE (exp) != SSA_NAME
324 || SSA_NAME_IS_DEFAULT_DEF (exp))
325 return false;
327 phi_stmt = SSA_NAME_DEF_STMT (exp);
329 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
330 return false;
332 /* Non-loop-carried phis have block rank. Loop-carried phis have
333 an additional bias added in. If this phi doesn't have block rank,
334 it's biased and should not be propagated. */
335 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
337 if (phi_rank (phi_stmt) != block_rank)
338 return true;
340 return false;
343 /* Return the maximum of RANK and the rank that should be propagated
344 from expression OP. For most operands, this is just the rank of OP.
345 For loop-carried phis, the value is zero to avoid undoing the bias
346 in favor of the phi. */
347 static long
348 propagate_rank (long rank, tree op)
350 long op_rank;
352 if (loop_carried_phi (op))
353 return rank;
355 op_rank = get_rank (op);
357 return MAX (rank, op_rank);
360 /* Look up the operand rank structure for expression E. */
362 static inline long
363 find_operand_rank (tree e)
365 void **slot = pointer_map_contains (operand_rank, e);
366 return slot ? (long) (intptr_t) *slot : -1;
369 /* Insert {E,RANK} into the operand rank hashtable. */
371 static inline void
372 insert_operand_rank (tree e, long rank)
374 void **slot;
375 gcc_assert (rank > 0);
376 slot = pointer_map_insert (operand_rank, e);
377 gcc_assert (!*slot);
378 *slot = (void *) (intptr_t) rank;
381 /* Given an expression E, return the rank of the expression. */
383 static long
384 get_rank (tree e)
386 /* Constants have rank 0. */
387 if (is_gimple_min_invariant (e))
388 return 0;
390 /* SSA_NAME's have the rank of the expression they are the result
392 For globals and uninitialized values, the rank is 0.
393 For function arguments, use the pre-setup rank.
394 For PHI nodes, stores, asm statements, etc, we use the rank of
395 the BB.
396 For simple operations, the rank is the maximum rank of any of
397 its operands, or the bb_rank, whichever is less.
398 I make no claims that this is optimal, however, it gives good
399 results. */
401 /* We make an exception to the normal ranking system to break
402 dependences of accumulator variables in loops. Suppose we
403 have a simple one-block loop containing:
405 x_1 = phi(x_0, x_2)
406 b = a + x_1
407 c = b + d
408 x_2 = c + e
410 As shown, each iteration of the calculation into x is fully
411 dependent upon the iteration before it. We would prefer to
412 see this in the form:
414 x_1 = phi(x_0, x_2)
415 b = a + d
416 c = b + e
417 x_2 = c + x_1
419 If the loop is unrolled, the calculations of b and c from
420 different iterations can be interleaved.
422 To obtain this result during reassociation, we bias the rank
423 of the phi definition x_1 upward, when it is recognized as an
424 accumulator pattern. The artificial rank causes it to be
425 added last, providing the desired independence. */
427 if (TREE_CODE (e) == SSA_NAME)
429 gimple stmt;
430 long rank;
431 int i, n;
432 tree op;
434 if (SSA_NAME_IS_DEFAULT_DEF (e))
435 return find_operand_rank (e);
437 stmt = SSA_NAME_DEF_STMT (e);
438 if (gimple_code (stmt) == GIMPLE_PHI)
439 return phi_rank (stmt);
441 if (!is_gimple_assign (stmt)
442 || gimple_vdef (stmt))
443 return bb_rank[gimple_bb (stmt)->index];
445 /* If we already have a rank for this expression, use that. */
446 rank = find_operand_rank (e);
447 if (rank != -1)
448 return rank;
450 /* Otherwise, find the maximum rank for the operands. As an
451 exception, remove the bias from loop-carried phis when propagating
452 the rank so that dependent operations are not also biased. */
453 rank = 0;
454 if (gimple_assign_single_p (stmt))
456 tree rhs = gimple_assign_rhs1 (stmt);
457 n = TREE_OPERAND_LENGTH (rhs);
458 if (n == 0)
459 rank = propagate_rank (rank, rhs);
460 else
462 for (i = 0; i < n; i++)
464 op = TREE_OPERAND (rhs, i);
466 if (op != NULL_TREE)
467 rank = propagate_rank (rank, op);
471 else
473 n = gimple_num_ops (stmt);
474 for (i = 1; i < n; i++)
476 op = gimple_op (stmt, i);
477 gcc_assert (op);
478 rank = propagate_rank (rank, op);
482 if (dump_file && (dump_flags & TDF_DETAILS))
484 fprintf (dump_file, "Rank for ");
485 print_generic_expr (dump_file, e, 0);
486 fprintf (dump_file, " is %ld\n", (rank + 1));
489 /* Note the rank in the hashtable so we don't recompute it. */
490 insert_operand_rank (e, (rank + 1));
491 return (rank + 1);
494 /* Globals, etc, are rank 0 */
495 return 0;
499 /* We want integer ones to end up last no matter what, since they are
500 the ones we can do the most with. */
501 #define INTEGER_CONST_TYPE 1 << 3
502 #define FLOAT_CONST_TYPE 1 << 2
503 #define OTHER_CONST_TYPE 1 << 1
505 /* Classify an invariant tree into integer, float, or other, so that
506 we can sort them to be near other constants of the same type. */
507 static inline int
508 constant_type (tree t)
510 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
511 return INTEGER_CONST_TYPE;
512 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
513 return FLOAT_CONST_TYPE;
514 else
515 return OTHER_CONST_TYPE;
518 /* qsort comparison function to sort operand entries PA and PB by rank
519 so that the sorted array is ordered by rank in decreasing order. */
520 static int
521 sort_by_operand_rank (const void *pa, const void *pb)
523 const operand_entry_t oea = *(const operand_entry_t *)pa;
524 const operand_entry_t oeb = *(const operand_entry_t *)pb;
526 /* It's nicer for optimize_expression if constants that are likely
527 to fold when added/multiplied//whatever are put next to each
528 other. Since all constants have rank 0, order them by type. */
529 if (oeb->rank == 0 && oea->rank == 0)
531 if (constant_type (oeb->op) != constant_type (oea->op))
532 return constant_type (oeb->op) - constant_type (oea->op);
533 else
534 /* To make sorting result stable, we use unique IDs to determine
535 order. */
536 return oeb->id - oea->id;
539 /* Lastly, make sure the versions that are the same go next to each
540 other. */
541 if ((oeb->rank - oea->rank == 0)
542 && TREE_CODE (oea->op) == SSA_NAME
543 && TREE_CODE (oeb->op) == SSA_NAME)
545 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
546 versions of removed SSA_NAMEs, so if possible, prefer to sort
547 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
548 See PR60418. */
549 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
550 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
551 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
553 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
554 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
555 basic_block bba = gimple_bb (stmta);
556 basic_block bbb = gimple_bb (stmtb);
557 if (bbb != bba)
559 if (bb_rank[bbb->index] != bb_rank[bba->index])
560 return bb_rank[bbb->index] - bb_rank[bba->index];
562 else
564 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
565 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
566 if (da != db)
567 return da ? 1 : -1;
571 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
572 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
573 else
574 return oeb->id - oea->id;
577 if (oeb->rank != oea->rank)
578 return oeb->rank - oea->rank;
579 else
580 return oeb->id - oea->id;
583 /* Add an operand entry to *OPS for the tree operand OP. */
585 static void
586 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
588 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
590 oe->op = op;
591 oe->rank = get_rank (op);
592 oe->id = next_operand_entry_id++;
593 oe->count = 1;
594 ops->safe_push (oe);
597 /* Add an operand entry to *OPS for the tree operand OP with repeat
598 count REPEAT. */
600 static void
601 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
602 HOST_WIDE_INT repeat)
604 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
606 oe->op = op;
607 oe->rank = get_rank (op);
608 oe->id = next_operand_entry_id++;
609 oe->count = repeat;
610 ops->safe_push (oe);
612 reassociate_stats.pows_encountered++;
615 /* Return true if STMT is reassociable operation containing a binary
616 operation with tree code CODE, and is inside LOOP. */
618 static bool
619 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
621 basic_block bb = gimple_bb (stmt);
623 if (gimple_bb (stmt) == NULL)
624 return false;
626 if (!flow_bb_inside_loop_p (loop, bb))
627 return false;
629 if (is_gimple_assign (stmt)
630 && gimple_assign_rhs_code (stmt) == code
631 && has_single_use (gimple_assign_lhs (stmt)))
632 return true;
634 return false;
638 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
639 operand of the negate operation. Otherwise, return NULL. */
641 static tree
642 get_unary_op (tree name, enum tree_code opcode)
644 gimple stmt = SSA_NAME_DEF_STMT (name);
646 if (!is_gimple_assign (stmt))
647 return NULL_TREE;
649 if (gimple_assign_rhs_code (stmt) == opcode)
650 return gimple_assign_rhs1 (stmt);
651 return NULL_TREE;
654 /* If CURR and LAST are a pair of ops that OPCODE allows us to
655 eliminate through equivalences, do so, remove them from OPS, and
656 return true. Otherwise, return false. */
658 static bool
659 eliminate_duplicate_pair (enum tree_code opcode,
660 vec<operand_entry_t> *ops,
661 bool *all_done,
662 unsigned int i,
663 operand_entry_t curr,
664 operand_entry_t last)
667 /* If we have two of the same op, and the opcode is & |, min, or max,
668 we can eliminate one of them.
669 If we have two of the same op, and the opcode is ^, we can
670 eliminate both of them. */
672 if (last && last->op == curr->op)
674 switch (opcode)
676 case MAX_EXPR:
677 case MIN_EXPR:
678 case BIT_IOR_EXPR:
679 case BIT_AND_EXPR:
680 if (dump_file && (dump_flags & TDF_DETAILS))
682 fprintf (dump_file, "Equivalence: ");
683 print_generic_expr (dump_file, curr->op, 0);
684 fprintf (dump_file, " [&|minmax] ");
685 print_generic_expr (dump_file, last->op, 0);
686 fprintf (dump_file, " -> ");
687 print_generic_stmt (dump_file, last->op, 0);
690 ops->ordered_remove (i);
691 reassociate_stats.ops_eliminated ++;
693 return true;
695 case BIT_XOR_EXPR:
696 if (dump_file && (dump_flags & TDF_DETAILS))
698 fprintf (dump_file, "Equivalence: ");
699 print_generic_expr (dump_file, curr->op, 0);
700 fprintf (dump_file, " ^ ");
701 print_generic_expr (dump_file, last->op, 0);
702 fprintf (dump_file, " -> nothing\n");
705 reassociate_stats.ops_eliminated += 2;
707 if (ops->length () == 2)
709 ops->create (0);
710 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
711 *all_done = true;
713 else
715 ops->ordered_remove (i-1);
716 ops->ordered_remove (i-1);
719 return true;
721 default:
722 break;
725 return false;
728 static vec<tree> plus_negates;
730 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
731 expression, look in OPS for a corresponding positive operation to cancel
732 it out. If we find one, remove the other from OPS, replace
733 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
734 return false. */
736 static bool
737 eliminate_plus_minus_pair (enum tree_code opcode,
738 vec<operand_entry_t> *ops,
739 unsigned int currindex,
740 operand_entry_t curr)
742 tree negateop;
743 tree notop;
744 unsigned int i;
745 operand_entry_t oe;
747 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
748 return false;
750 negateop = get_unary_op (curr->op, NEGATE_EXPR);
751 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
752 if (negateop == NULL_TREE && notop == NULL_TREE)
753 return false;
755 /* Any non-negated version will have a rank that is one less than
756 the current rank. So once we hit those ranks, if we don't find
757 one, we can stop. */
759 for (i = currindex + 1;
760 ops->iterate (i, &oe)
761 && oe->rank >= curr->rank - 1 ;
762 i++)
764 if (oe->op == negateop)
767 if (dump_file && (dump_flags & TDF_DETAILS))
769 fprintf (dump_file, "Equivalence: ");
770 print_generic_expr (dump_file, negateop, 0);
771 fprintf (dump_file, " + -");
772 print_generic_expr (dump_file, oe->op, 0);
773 fprintf (dump_file, " -> 0\n");
776 ops->ordered_remove (i);
777 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
778 ops->ordered_remove (currindex);
779 reassociate_stats.ops_eliminated ++;
781 return true;
783 else if (oe->op == notop)
785 tree op_type = TREE_TYPE (oe->op);
787 if (dump_file && (dump_flags & TDF_DETAILS))
789 fprintf (dump_file, "Equivalence: ");
790 print_generic_expr (dump_file, notop, 0);
791 fprintf (dump_file, " + ~");
792 print_generic_expr (dump_file, oe->op, 0);
793 fprintf (dump_file, " -> -1\n");
796 ops->ordered_remove (i);
797 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
798 ops->ordered_remove (currindex);
799 reassociate_stats.ops_eliminated ++;
801 return true;
805 /* CURR->OP is a negate expr in a plus expr: save it for later
806 inspection in repropagate_negates(). */
807 if (negateop != NULL_TREE)
808 plus_negates.safe_push (curr->op);
810 return false;
813 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
814 bitwise not expression, look in OPS for a corresponding operand to
815 cancel it out. If we find one, remove the other from OPS, replace
816 OPS[CURRINDEX] with 0, and return true. Otherwise, return
817 false. */
819 static bool
820 eliminate_not_pairs (enum tree_code opcode,
821 vec<operand_entry_t> *ops,
822 unsigned int currindex,
823 operand_entry_t curr)
825 tree notop;
826 unsigned int i;
827 operand_entry_t oe;
829 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
830 || TREE_CODE (curr->op) != SSA_NAME)
831 return false;
833 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
834 if (notop == NULL_TREE)
835 return false;
837 /* Any non-not version will have a rank that is one less than
838 the current rank. So once we hit those ranks, if we don't find
839 one, we can stop. */
841 for (i = currindex + 1;
842 ops->iterate (i, &oe)
843 && oe->rank >= curr->rank - 1;
844 i++)
846 if (oe->op == notop)
848 if (dump_file && (dump_flags & TDF_DETAILS))
850 fprintf (dump_file, "Equivalence: ");
851 print_generic_expr (dump_file, notop, 0);
852 if (opcode == BIT_AND_EXPR)
853 fprintf (dump_file, " & ~");
854 else if (opcode == BIT_IOR_EXPR)
855 fprintf (dump_file, " | ~");
856 print_generic_expr (dump_file, oe->op, 0);
857 if (opcode == BIT_AND_EXPR)
858 fprintf (dump_file, " -> 0\n");
859 else if (opcode == BIT_IOR_EXPR)
860 fprintf (dump_file, " -> -1\n");
863 if (opcode == BIT_AND_EXPR)
864 oe->op = build_zero_cst (TREE_TYPE (oe->op));
865 else if (opcode == BIT_IOR_EXPR)
866 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
868 reassociate_stats.ops_eliminated += ops->length () - 1;
869 ops->truncate (0);
870 ops->quick_push (oe);
871 return true;
875 return false;
878 /* Use constant value that may be present in OPS to try to eliminate
879 operands. Note that this function is only really used when we've
880 eliminated ops for other reasons, or merged constants. Across
881 single statements, fold already does all of this, plus more. There
882 is little point in duplicating logic, so I've only included the
883 identities that I could ever construct testcases to trigger. */
885 static void
886 eliminate_using_constants (enum tree_code opcode,
887 vec<operand_entry_t> *ops)
889 operand_entry_t oelast = ops->last ();
890 tree type = TREE_TYPE (oelast->op);
892 if (oelast->rank == 0
893 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
895 switch (opcode)
897 case BIT_AND_EXPR:
898 if (integer_zerop (oelast->op))
900 if (ops->length () != 1)
902 if (dump_file && (dump_flags & TDF_DETAILS))
903 fprintf (dump_file, "Found & 0, removing all other ops\n");
905 reassociate_stats.ops_eliminated += ops->length () - 1;
907 ops->truncate (0);
908 ops->quick_push (oelast);
909 return;
912 else if (integer_all_onesp (oelast->op))
914 if (ops->length () != 1)
916 if (dump_file && (dump_flags & TDF_DETAILS))
917 fprintf (dump_file, "Found & -1, removing\n");
918 ops->pop ();
919 reassociate_stats.ops_eliminated++;
922 break;
923 case BIT_IOR_EXPR:
924 if (integer_all_onesp (oelast->op))
926 if (ops->length () != 1)
928 if (dump_file && (dump_flags & TDF_DETAILS))
929 fprintf (dump_file, "Found | -1, removing all other ops\n");
931 reassociate_stats.ops_eliminated += ops->length () - 1;
933 ops->truncate (0);
934 ops->quick_push (oelast);
935 return;
938 else if (integer_zerop (oelast->op))
940 if (ops->length () != 1)
942 if (dump_file && (dump_flags & TDF_DETAILS))
943 fprintf (dump_file, "Found | 0, removing\n");
944 ops->pop ();
945 reassociate_stats.ops_eliminated++;
948 break;
949 case MULT_EXPR:
950 if (integer_zerop (oelast->op)
951 || (FLOAT_TYPE_P (type)
952 && !HONOR_NANS (TYPE_MODE (type))
953 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
954 && real_zerop (oelast->op)))
956 if (ops->length () != 1)
958 if (dump_file && (dump_flags & TDF_DETAILS))
959 fprintf (dump_file, "Found * 0, removing all other ops\n");
961 reassociate_stats.ops_eliminated += ops->length () - 1;
962 ops->truncate (1);
963 ops->quick_push (oelast);
964 return;
967 else if (integer_onep (oelast->op)
968 || (FLOAT_TYPE_P (type)
969 && !HONOR_SNANS (TYPE_MODE (type))
970 && real_onep (oelast->op)))
972 if (ops->length () != 1)
974 if (dump_file && (dump_flags & TDF_DETAILS))
975 fprintf (dump_file, "Found * 1, removing\n");
976 ops->pop ();
977 reassociate_stats.ops_eliminated++;
978 return;
981 break;
982 case BIT_XOR_EXPR:
983 case PLUS_EXPR:
984 case MINUS_EXPR:
985 if (integer_zerop (oelast->op)
986 || (FLOAT_TYPE_P (type)
987 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
988 && fold_real_zero_addition_p (type, oelast->op,
989 opcode == MINUS_EXPR)))
991 if (ops->length () != 1)
993 if (dump_file && (dump_flags & TDF_DETAILS))
994 fprintf (dump_file, "Found [|^+] 0, removing\n");
995 ops->pop ();
996 reassociate_stats.ops_eliminated++;
997 return;
1000 break;
1001 default:
1002 break;
1008 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1009 bool, bool);
1011 /* Structure for tracking and counting operands. */
1012 typedef struct oecount_s {
1013 int cnt;
1014 int id;
1015 enum tree_code oecode;
1016 tree op;
1017 } oecount;
1020 /* The heap for the oecount hashtable and the sorted list of operands. */
1021 static vec<oecount> cvec;
1024 /* Oecount hashtable helpers. */
1026 struct oecount_hasher
1028 typedef int value_type;
1029 typedef int compare_type;
1030 typedef int store_values_directly;
1031 static inline hashval_t hash (const value_type &);
1032 static inline bool equal (const value_type &, const compare_type &);
1033 static bool is_deleted (int &v) { return v == 1; }
1034 static void mark_deleted (int &e) { e = 1; }
1035 static bool is_empty (int &v) { return v == 0; }
1036 static void mark_empty (int &e) { e = 0; }
1037 static void remove (int &) {}
1040 /* Hash function for oecount. */
1042 inline hashval_t
1043 oecount_hasher::hash (const value_type &p)
1045 const oecount *c = &cvec[p - 42];
1046 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1049 /* Comparison function for oecount. */
1051 inline bool
1052 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1054 const oecount *c1 = &cvec[p1 - 42];
1055 const oecount *c2 = &cvec[p2 - 42];
1056 return (c1->oecode == c2->oecode
1057 && c1->op == c2->op);
1060 /* Comparison function for qsort sorting oecount elements by count. */
1062 static int
1063 oecount_cmp (const void *p1, const void *p2)
1065 const oecount *c1 = (const oecount *)p1;
1066 const oecount *c2 = (const oecount *)p2;
1067 if (c1->cnt != c2->cnt)
1068 return c1->cnt - c2->cnt;
1069 else
1070 /* If counts are identical, use unique IDs to stabilize qsort. */
1071 return c1->id - c2->id;
1074 /* Return TRUE iff STMT represents a builtin call that raises OP
1075 to some exponent. */
1077 static bool
1078 stmt_is_power_of_op (gimple stmt, tree op)
1080 tree fndecl;
1082 if (!is_gimple_call (stmt))
1083 return false;
1085 fndecl = gimple_call_fndecl (stmt);
1087 if (!fndecl
1088 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1089 return false;
1091 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1093 CASE_FLT_FN (BUILT_IN_POW):
1094 CASE_FLT_FN (BUILT_IN_POWI):
1095 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1097 default:
1098 return false;
1102 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1103 in place and return the result. Assumes that stmt_is_power_of_op
1104 was previously called for STMT and returned TRUE. */
1106 static HOST_WIDE_INT
1107 decrement_power (gimple stmt)
1109 REAL_VALUE_TYPE c, cint;
1110 HOST_WIDE_INT power;
1111 tree arg1;
1113 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1115 CASE_FLT_FN (BUILT_IN_POW):
1116 arg1 = gimple_call_arg (stmt, 1);
1117 c = TREE_REAL_CST (arg1);
1118 power = real_to_integer (&c) - 1;
1119 real_from_integer (&cint, VOIDmode, power, SIGNED);
1120 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1121 return power;
1123 CASE_FLT_FN (BUILT_IN_POWI):
1124 arg1 = gimple_call_arg (stmt, 1);
1125 power = TREE_INT_CST_LOW (arg1) - 1;
1126 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1127 return power;
1129 default:
1130 gcc_unreachable ();
1134 /* Find the single immediate use of STMT's LHS, and replace it
1135 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1136 replace *DEF with OP as well. */
1138 static void
1139 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1141 tree lhs;
1142 gimple use_stmt;
1143 use_operand_p use;
1144 gimple_stmt_iterator gsi;
1146 if (is_gimple_call (stmt))
1147 lhs = gimple_call_lhs (stmt);
1148 else
1149 lhs = gimple_assign_lhs (stmt);
1151 gcc_assert (has_single_use (lhs));
1152 single_imm_use (lhs, &use, &use_stmt);
1153 if (lhs == *def)
1154 *def = op;
1155 SET_USE (use, op);
1156 if (TREE_CODE (op) != SSA_NAME)
1157 update_stmt (use_stmt);
1158 gsi = gsi_for_stmt (stmt);
1159 unlink_stmt_vdef (stmt);
1160 reassoc_remove_stmt (&gsi);
1161 release_defs (stmt);
1164 /* Walks the linear chain with result *DEF searching for an operation
1165 with operand OP and code OPCODE removing that from the chain. *DEF
1166 is updated if there is only one operand but no operation left. */
1168 static void
1169 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1171 gimple stmt = SSA_NAME_DEF_STMT (*def);
1175 tree name;
1177 if (opcode == MULT_EXPR
1178 && stmt_is_power_of_op (stmt, op))
1180 if (decrement_power (stmt) == 1)
1181 propagate_op_to_single_use (op, stmt, def);
1182 return;
1185 name = gimple_assign_rhs1 (stmt);
1187 /* If this is the operation we look for and one of the operands
1188 is ours simply propagate the other operand into the stmts
1189 single use. */
1190 if (gimple_assign_rhs_code (stmt) == opcode
1191 && (name == op
1192 || gimple_assign_rhs2 (stmt) == op))
1194 if (name == op)
1195 name = gimple_assign_rhs2 (stmt);
1196 propagate_op_to_single_use (name, stmt, def);
1197 return;
1200 /* We might have a multiply of two __builtin_pow* calls, and
1201 the operand might be hiding in the rightmost one. */
1202 if (opcode == MULT_EXPR
1203 && gimple_assign_rhs_code (stmt) == opcode
1204 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1205 && has_single_use (gimple_assign_rhs2 (stmt)))
1207 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1208 if (stmt_is_power_of_op (stmt2, op))
1210 if (decrement_power (stmt2) == 1)
1211 propagate_op_to_single_use (op, stmt2, def);
1212 return;
1216 /* Continue walking the chain. */
1217 gcc_assert (name != op
1218 && TREE_CODE (name) == SSA_NAME);
1219 stmt = SSA_NAME_DEF_STMT (name);
1221 while (1);
1224 /* Returns true if statement S1 dominates statement S2. Like
1225 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1227 static bool
1228 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1230 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1232 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1233 SSA_NAME. Assume it lives at the beginning of function and
1234 thus dominates everything. */
1235 if (!bb1 || s1 == s2)
1236 return true;
1238 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1239 if (!bb2)
1240 return false;
1242 if (bb1 == bb2)
1244 /* PHIs in the same basic block are assumed to be
1245 executed all in parallel, if only one stmt is a PHI,
1246 it dominates the other stmt in the same basic block. */
1247 if (gimple_code (s1) == GIMPLE_PHI)
1248 return true;
1250 if (gimple_code (s2) == GIMPLE_PHI)
1251 return false;
1253 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1255 if (gimple_uid (s1) < gimple_uid (s2))
1256 return true;
1258 if (gimple_uid (s1) > gimple_uid (s2))
1259 return false;
1261 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1262 unsigned int uid = gimple_uid (s1);
1263 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1265 gimple s = gsi_stmt (gsi);
1266 if (gimple_uid (s) != uid)
1267 break;
1268 if (s == s2)
1269 return true;
1272 return false;
1275 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1278 /* Insert STMT after INSERT_POINT. */
1280 static void
1281 insert_stmt_after (gimple stmt, gimple insert_point)
1283 gimple_stmt_iterator gsi;
1284 basic_block bb;
1286 if (gimple_code (insert_point) == GIMPLE_PHI)
1287 bb = gimple_bb (insert_point);
1288 else if (!stmt_ends_bb_p (insert_point))
1290 gsi = gsi_for_stmt (insert_point);
1291 gimple_set_uid (stmt, gimple_uid (insert_point));
1292 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1293 return;
1295 else
1296 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1297 thus if it must end a basic block, it should be a call that can
1298 throw, or some assignment that can throw. If it throws, the LHS
1299 of it will not be initialized though, so only valid places using
1300 the SSA_NAME should be dominated by the fallthru edge. */
1301 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1302 gsi = gsi_after_labels (bb);
1303 if (gsi_end_p (gsi))
1305 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1306 gimple_set_uid (stmt,
1307 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1309 else
1310 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1311 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1314 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1315 the result. Places the statement after the definition of either
1316 OP1 or OP2. Returns the new statement. */
1318 static gimple
1319 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1321 gimple op1def = NULL, op2def = NULL;
1322 gimple_stmt_iterator gsi;
1323 tree op;
1324 gimple sum;
1326 /* Create the addition statement. */
1327 op = make_ssa_name (type, NULL);
1328 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1330 /* Find an insertion place and insert. */
1331 if (TREE_CODE (op1) == SSA_NAME)
1332 op1def = SSA_NAME_DEF_STMT (op1);
1333 if (TREE_CODE (op2) == SSA_NAME)
1334 op2def = SSA_NAME_DEF_STMT (op2);
1335 if ((!op1def || gimple_nop_p (op1def))
1336 && (!op2def || gimple_nop_p (op2def)))
1338 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1339 if (gsi_end_p (gsi))
1341 gimple_stmt_iterator gsi2
1342 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1343 gimple_set_uid (sum,
1344 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1346 else
1347 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1348 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1350 else
1352 gimple insert_point;
1353 if ((!op1def || gimple_nop_p (op1def))
1354 || (op2def && !gimple_nop_p (op2def)
1355 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1356 insert_point = op2def;
1357 else
1358 insert_point = op1def;
1359 insert_stmt_after (sum, insert_point);
1361 update_stmt (sum);
1363 return sum;
1366 /* Perform un-distribution of divisions and multiplications.
1367 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1368 to (A + B) / X for real X.
1370 The algorithm is organized as follows.
1372 - First we walk the addition chain *OPS looking for summands that
1373 are defined by a multiplication or a real division. This results
1374 in the candidates bitmap with relevant indices into *OPS.
1376 - Second we build the chains of multiplications or divisions for
1377 these candidates, counting the number of occurrences of (operand, code)
1378 pairs in all of the candidates chains.
1380 - Third we sort the (operand, code) pairs by number of occurrence and
1381 process them starting with the pair with the most uses.
1383 * For each such pair we walk the candidates again to build a
1384 second candidate bitmap noting all multiplication/division chains
1385 that have at least one occurrence of (operand, code).
1387 * We build an alternate addition chain only covering these
1388 candidates with one (operand, code) operation removed from their
1389 multiplication/division chain.
1391 * The first candidate gets replaced by the alternate addition chain
1392 multiplied/divided by the operand.
1394 * All candidate chains get disabled for further processing and
1395 processing of (operand, code) pairs continues.
1397 The alternate addition chains built are re-processed by the main
1398 reassociation algorithm which allows optimizing a * x * y + b * y * x
1399 to (a + b ) * x * y in one invocation of the reassociation pass. */
1401 static bool
1402 undistribute_ops_list (enum tree_code opcode,
1403 vec<operand_entry_t> *ops, struct loop *loop)
1405 unsigned int length = ops->length ();
1406 operand_entry_t oe1;
1407 unsigned i, j;
1408 sbitmap candidates, candidates2;
1409 unsigned nr_candidates, nr_candidates2;
1410 sbitmap_iterator sbi0;
1411 vec<operand_entry_t> *subops;
1412 bool changed = false;
1413 int next_oecount_id = 0;
1415 if (length <= 1
1416 || opcode != PLUS_EXPR)
1417 return false;
1419 /* Build a list of candidates to process. */
1420 candidates = sbitmap_alloc (length);
1421 bitmap_clear (candidates);
1422 nr_candidates = 0;
1423 FOR_EACH_VEC_ELT (*ops, i, oe1)
1425 enum tree_code dcode;
1426 gimple oe1def;
1428 if (TREE_CODE (oe1->op) != SSA_NAME)
1429 continue;
1430 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1431 if (!is_gimple_assign (oe1def))
1432 continue;
1433 dcode = gimple_assign_rhs_code (oe1def);
1434 if ((dcode != MULT_EXPR
1435 && dcode != RDIV_EXPR)
1436 || !is_reassociable_op (oe1def, dcode, loop))
1437 continue;
1439 bitmap_set_bit (candidates, i);
1440 nr_candidates++;
1443 if (nr_candidates < 2)
1445 sbitmap_free (candidates);
1446 return false;
1449 if (dump_file && (dump_flags & TDF_DETAILS))
1451 fprintf (dump_file, "searching for un-distribute opportunities ");
1452 print_generic_expr (dump_file,
1453 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1454 fprintf (dump_file, " %d\n", nr_candidates);
1457 /* Build linearized sub-operand lists and the counting table. */
1458 cvec.create (0);
1460 hash_table<oecount_hasher> ctable (15);
1462 /* ??? Macro arguments cannot have multi-argument template types in
1463 them. This typedef is needed to workaround that limitation. */
1464 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1465 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1466 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1468 gimple oedef;
1469 enum tree_code oecode;
1470 unsigned j;
1472 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1473 oecode = gimple_assign_rhs_code (oedef);
1474 linearize_expr_tree (&subops[i], oedef,
1475 associative_tree_code (oecode), false);
1477 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1479 oecount c;
1480 int *slot;
1481 int idx;
1482 c.oecode = oecode;
1483 c.cnt = 1;
1484 c.id = next_oecount_id++;
1485 c.op = oe1->op;
1486 cvec.safe_push (c);
1487 idx = cvec.length () + 41;
1488 slot = ctable.find_slot (idx, INSERT);
1489 if (!*slot)
1491 *slot = idx;
1493 else
1495 cvec.pop ();
1496 cvec[*slot - 42].cnt++;
1501 /* Sort the counting table. */
1502 cvec.qsort (oecount_cmp);
1504 if (dump_file && (dump_flags & TDF_DETAILS))
1506 oecount *c;
1507 fprintf (dump_file, "Candidates:\n");
1508 FOR_EACH_VEC_ELT (cvec, j, c)
1510 fprintf (dump_file, " %u %s: ", c->cnt,
1511 c->oecode == MULT_EXPR
1512 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1513 print_generic_expr (dump_file, c->op, 0);
1514 fprintf (dump_file, "\n");
1518 /* Process the (operand, code) pairs in order of most occurrence. */
1519 candidates2 = sbitmap_alloc (length);
1520 while (!cvec.is_empty ())
1522 oecount *c = &cvec.last ();
1523 if (c->cnt < 2)
1524 break;
1526 /* Now collect the operands in the outer chain that contain
1527 the common operand in their inner chain. */
1528 bitmap_clear (candidates2);
1529 nr_candidates2 = 0;
1530 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1532 gimple oedef;
1533 enum tree_code oecode;
1534 unsigned j;
1535 tree op = (*ops)[i]->op;
1537 /* If we undistributed in this chain already this may be
1538 a constant. */
1539 if (TREE_CODE (op) != SSA_NAME)
1540 continue;
1542 oedef = SSA_NAME_DEF_STMT (op);
1543 oecode = gimple_assign_rhs_code (oedef);
1544 if (oecode != c->oecode)
1545 continue;
1547 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1549 if (oe1->op == c->op)
1551 bitmap_set_bit (candidates2, i);
1552 ++nr_candidates2;
1553 break;
1558 if (nr_candidates2 >= 2)
1560 operand_entry_t oe1, oe2;
1561 gimple prod;
1562 int first = bitmap_first_set_bit (candidates2);
1564 /* Build the new addition chain. */
1565 oe1 = (*ops)[first];
1566 if (dump_file && (dump_flags & TDF_DETAILS))
1568 fprintf (dump_file, "Building (");
1569 print_generic_expr (dump_file, oe1->op, 0);
1571 zero_one_operation (&oe1->op, c->oecode, c->op);
1572 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1574 gimple sum;
1575 oe2 = (*ops)[i];
1576 if (dump_file && (dump_flags & TDF_DETAILS))
1578 fprintf (dump_file, " + ");
1579 print_generic_expr (dump_file, oe2->op, 0);
1581 zero_one_operation (&oe2->op, c->oecode, c->op);
1582 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1583 oe1->op, oe2->op, opcode);
1584 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1585 oe2->rank = 0;
1586 oe1->op = gimple_get_lhs (sum);
1589 /* Apply the multiplication/division. */
1590 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1591 oe1->op, c->op, c->oecode);
1592 if (dump_file && (dump_flags & TDF_DETAILS))
1594 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1595 print_generic_expr (dump_file, c->op, 0);
1596 fprintf (dump_file, "\n");
1599 /* Record it in the addition chain and disable further
1600 undistribution with this op. */
1601 oe1->op = gimple_assign_lhs (prod);
1602 oe1->rank = get_rank (oe1->op);
1603 subops[first].release ();
1605 changed = true;
1608 cvec.pop ();
1611 for (i = 0; i < ops->length (); ++i)
1612 subops[i].release ();
1613 free (subops);
1614 cvec.release ();
1615 sbitmap_free (candidates);
1616 sbitmap_free (candidates2);
1618 return changed;
1621 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1622 expression, examine the other OPS to see if any of them are comparisons
1623 of the same values, which we may be able to combine or eliminate.
1624 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1626 static bool
1627 eliminate_redundant_comparison (enum tree_code opcode,
1628 vec<operand_entry_t> *ops,
1629 unsigned int currindex,
1630 operand_entry_t curr)
1632 tree op1, op2;
1633 enum tree_code lcode, rcode;
1634 gimple def1, def2;
1635 int i;
1636 operand_entry_t oe;
1638 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1639 return false;
1641 /* Check that CURR is a comparison. */
1642 if (TREE_CODE (curr->op) != SSA_NAME)
1643 return false;
1644 def1 = SSA_NAME_DEF_STMT (curr->op);
1645 if (!is_gimple_assign (def1))
1646 return false;
1647 lcode = gimple_assign_rhs_code (def1);
1648 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1649 return false;
1650 op1 = gimple_assign_rhs1 (def1);
1651 op2 = gimple_assign_rhs2 (def1);
1653 /* Now look for a similar comparison in the remaining OPS. */
1654 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1656 tree t;
1658 if (TREE_CODE (oe->op) != SSA_NAME)
1659 continue;
1660 def2 = SSA_NAME_DEF_STMT (oe->op);
1661 if (!is_gimple_assign (def2))
1662 continue;
1663 rcode = gimple_assign_rhs_code (def2);
1664 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1665 continue;
1667 /* If we got here, we have a match. See if we can combine the
1668 two comparisons. */
1669 if (opcode == BIT_IOR_EXPR)
1670 t = maybe_fold_or_comparisons (lcode, op1, op2,
1671 rcode, gimple_assign_rhs1 (def2),
1672 gimple_assign_rhs2 (def2));
1673 else
1674 t = maybe_fold_and_comparisons (lcode, op1, op2,
1675 rcode, gimple_assign_rhs1 (def2),
1676 gimple_assign_rhs2 (def2));
1677 if (!t)
1678 continue;
1680 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1681 always give us a boolean_type_node value back. If the original
1682 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1683 we need to convert. */
1684 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1685 t = fold_convert (TREE_TYPE (curr->op), t);
1687 if (TREE_CODE (t) != INTEGER_CST
1688 && !operand_equal_p (t, curr->op, 0))
1690 enum tree_code subcode;
1691 tree newop1, newop2;
1692 if (!COMPARISON_CLASS_P (t))
1693 continue;
1694 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1695 STRIP_USELESS_TYPE_CONVERSION (newop1);
1696 STRIP_USELESS_TYPE_CONVERSION (newop2);
1697 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1698 continue;
1701 if (dump_file && (dump_flags & TDF_DETAILS))
1703 fprintf (dump_file, "Equivalence: ");
1704 print_generic_expr (dump_file, curr->op, 0);
1705 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1706 print_generic_expr (dump_file, oe->op, 0);
1707 fprintf (dump_file, " -> ");
1708 print_generic_expr (dump_file, t, 0);
1709 fprintf (dump_file, "\n");
1712 /* Now we can delete oe, as it has been subsumed by the new combined
1713 expression t. */
1714 ops->ordered_remove (i);
1715 reassociate_stats.ops_eliminated ++;
1717 /* If t is the same as curr->op, we're done. Otherwise we must
1718 replace curr->op with t. Special case is if we got a constant
1719 back, in which case we add it to the end instead of in place of
1720 the current entry. */
1721 if (TREE_CODE (t) == INTEGER_CST)
1723 ops->ordered_remove (currindex);
1724 add_to_ops_vec (ops, t);
1726 else if (!operand_equal_p (t, curr->op, 0))
1728 gimple sum;
1729 enum tree_code subcode;
1730 tree newop1;
1731 tree newop2;
1732 gcc_assert (COMPARISON_CLASS_P (t));
1733 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1734 STRIP_USELESS_TYPE_CONVERSION (newop1);
1735 STRIP_USELESS_TYPE_CONVERSION (newop2);
1736 gcc_checking_assert (is_gimple_val (newop1)
1737 && is_gimple_val (newop2));
1738 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1739 curr->op = gimple_get_lhs (sum);
1741 return true;
1744 return false;
1747 /* Perform various identities and other optimizations on the list of
1748 operand entries, stored in OPS. The tree code for the binary
1749 operation between all the operands is OPCODE. */
1751 static void
1752 optimize_ops_list (enum tree_code opcode,
1753 vec<operand_entry_t> *ops)
1755 unsigned int length = ops->length ();
1756 unsigned int i;
1757 operand_entry_t oe;
1758 operand_entry_t oelast = NULL;
1759 bool iterate = false;
1761 if (length == 1)
1762 return;
1764 oelast = ops->last ();
1766 /* If the last two are constants, pop the constants off, merge them
1767 and try the next two. */
1768 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1770 operand_entry_t oelm1 = (*ops)[length - 2];
1772 if (oelm1->rank == 0
1773 && is_gimple_min_invariant (oelm1->op)
1774 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1775 TREE_TYPE (oelast->op)))
1777 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1778 oelm1->op, oelast->op);
1780 if (folded && is_gimple_min_invariant (folded))
1782 if (dump_file && (dump_flags & TDF_DETAILS))
1783 fprintf (dump_file, "Merging constants\n");
1785 ops->pop ();
1786 ops->pop ();
1788 add_to_ops_vec (ops, folded);
1789 reassociate_stats.constants_eliminated++;
1791 optimize_ops_list (opcode, ops);
1792 return;
1797 eliminate_using_constants (opcode, ops);
1798 oelast = NULL;
1800 for (i = 0; ops->iterate (i, &oe);)
1802 bool done = false;
1804 if (eliminate_not_pairs (opcode, ops, i, oe))
1805 return;
1806 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1807 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1808 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1810 if (done)
1811 return;
1812 iterate = true;
1813 oelast = NULL;
1814 continue;
1816 oelast = oe;
1817 i++;
1820 length = ops->length ();
1821 oelast = ops->last ();
1823 if (iterate)
1824 optimize_ops_list (opcode, ops);
1827 /* The following functions are subroutines to optimize_range_tests and allow
1828 it to try to change a logical combination of comparisons into a range
1829 test.
1831 For example, both
1832 X == 2 || X == 5 || X == 3 || X == 4
1834 X >= 2 && X <= 5
1835 are converted to
1836 (unsigned) (X - 2) <= 3
1838 For more information see comments above fold_test_range in fold-const.c,
1839 this implementation is for GIMPLE. */
1841 struct range_entry
1843 tree exp;
1844 tree low;
1845 tree high;
1846 bool in_p;
1847 bool strict_overflow_p;
1848 unsigned int idx, next;
1851 /* This is similar to make_range in fold-const.c, but on top of
1852 GIMPLE instead of trees. If EXP is non-NULL, it should be
1853 an SSA_NAME and STMT argument is ignored, otherwise STMT
1854 argument should be a GIMPLE_COND. */
1856 static void
1857 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1859 int in_p;
1860 tree low, high;
1861 bool is_bool, strict_overflow_p;
1863 r->exp = NULL_TREE;
1864 r->in_p = false;
1865 r->strict_overflow_p = false;
1866 r->low = NULL_TREE;
1867 r->high = NULL_TREE;
1868 if (exp != NULL_TREE
1869 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1870 return;
1872 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1873 and see if we can refine the range. Some of the cases below may not
1874 happen, but it doesn't seem worth worrying about this. We "continue"
1875 the outer loop when we've changed something; otherwise we "break"
1876 the switch, which will "break" the while. */
1877 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1878 high = low;
1879 in_p = 0;
1880 strict_overflow_p = false;
1881 is_bool = false;
1882 if (exp == NULL_TREE)
1883 is_bool = true;
1884 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1886 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1887 is_bool = true;
1888 else
1889 return;
1891 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1892 is_bool = true;
1894 while (1)
1896 enum tree_code code;
1897 tree arg0, arg1, exp_type;
1898 tree nexp;
1899 location_t loc;
1901 if (exp != NULL_TREE)
1903 if (TREE_CODE (exp) != SSA_NAME
1904 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1905 break;
1907 stmt = SSA_NAME_DEF_STMT (exp);
1908 if (!is_gimple_assign (stmt))
1909 break;
1911 code = gimple_assign_rhs_code (stmt);
1912 arg0 = gimple_assign_rhs1 (stmt);
1913 arg1 = gimple_assign_rhs2 (stmt);
1914 exp_type = TREE_TYPE (exp);
1916 else
1918 code = gimple_cond_code (stmt);
1919 arg0 = gimple_cond_lhs (stmt);
1920 arg1 = gimple_cond_rhs (stmt);
1921 exp_type = boolean_type_node;
1924 if (TREE_CODE (arg0) != SSA_NAME)
1925 break;
1926 loc = gimple_location (stmt);
1927 switch (code)
1929 case BIT_NOT_EXPR:
1930 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1931 /* Ensure the range is either +[-,0], +[0,0],
1932 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1933 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1934 or similar expression of unconditional true or
1935 false, it should not be negated. */
1936 && ((high && integer_zerop (high))
1937 || (low && integer_onep (low))))
1939 in_p = !in_p;
1940 exp = arg0;
1941 continue;
1943 break;
1944 case SSA_NAME:
1945 exp = arg0;
1946 continue;
1947 CASE_CONVERT:
1948 if (is_bool)
1949 goto do_default;
1950 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1952 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1953 is_bool = true;
1954 else
1955 return;
1957 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1958 is_bool = true;
1959 goto do_default;
1960 case EQ_EXPR:
1961 case NE_EXPR:
1962 case LT_EXPR:
1963 case LE_EXPR:
1964 case GE_EXPR:
1965 case GT_EXPR:
1966 is_bool = true;
1967 /* FALLTHRU */
1968 default:
1969 if (!is_bool)
1970 return;
1971 do_default:
1972 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1973 &low, &high, &in_p,
1974 &strict_overflow_p);
1975 if (nexp != NULL_TREE)
1977 exp = nexp;
1978 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1979 continue;
1981 break;
1983 break;
1985 if (is_bool)
1987 r->exp = exp;
1988 r->in_p = in_p;
1989 r->low = low;
1990 r->high = high;
1991 r->strict_overflow_p = strict_overflow_p;
1995 /* Comparison function for qsort. Sort entries
1996 without SSA_NAME exp first, then with SSA_NAMEs sorted
1997 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1998 by increasing ->low and if ->low is the same, by increasing
1999 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2000 maximum. */
2002 static int
2003 range_entry_cmp (const void *a, const void *b)
2005 const struct range_entry *p = (const struct range_entry *) a;
2006 const struct range_entry *q = (const struct range_entry *) b;
2008 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2010 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2012 /* Group range_entries for the same SSA_NAME together. */
2013 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2014 return -1;
2015 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2016 return 1;
2017 /* If ->low is different, NULL low goes first, then by
2018 ascending low. */
2019 if (p->low != NULL_TREE)
2021 if (q->low != NULL_TREE)
2023 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2024 p->low, q->low);
2025 if (tem && integer_onep (tem))
2026 return -1;
2027 tem = fold_binary (GT_EXPR, boolean_type_node,
2028 p->low, q->low);
2029 if (tem && integer_onep (tem))
2030 return 1;
2032 else
2033 return 1;
2035 else if (q->low != NULL_TREE)
2036 return -1;
2037 /* If ->high is different, NULL high goes last, before that by
2038 ascending high. */
2039 if (p->high != NULL_TREE)
2041 if (q->high != NULL_TREE)
2043 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2044 p->high, q->high);
2045 if (tem && integer_onep (tem))
2046 return -1;
2047 tem = fold_binary (GT_EXPR, boolean_type_node,
2048 p->high, q->high);
2049 if (tem && integer_onep (tem))
2050 return 1;
2052 else
2053 return -1;
2055 else if (p->high != NULL_TREE)
2056 return 1;
2057 /* If both ranges are the same, sort below by ascending idx. */
2059 else
2060 return 1;
2062 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2063 return -1;
2065 if (p->idx < q->idx)
2066 return -1;
2067 else
2069 gcc_checking_assert (p->idx > q->idx);
2070 return 1;
2074 /* Helper routine of optimize_range_test.
2075 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2076 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2077 OPCODE and OPS are arguments of optimize_range_tests. Return
2078 true if the range merge has been successful.
2079 If OPCODE is ERROR_MARK, this is called from within
2080 maybe_optimize_range_tests and is performing inter-bb range optimization.
2081 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2082 oe->rank. */
2084 static bool
2085 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2086 unsigned int count, enum tree_code opcode,
2087 vec<operand_entry_t> *ops, tree exp, bool in_p,
2088 tree low, tree high, bool strict_overflow_p)
2090 operand_entry_t oe = (*ops)[range->idx];
2091 tree op = oe->op;
2092 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2093 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2094 location_t loc = gimple_location (stmt);
2095 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2096 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2097 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2098 gimple_stmt_iterator gsi;
2100 if (tem == NULL_TREE)
2101 return false;
2103 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2104 warning_at (loc, OPT_Wstrict_overflow,
2105 "assuming signed overflow does not occur "
2106 "when simplifying range test");
2108 if (dump_file && (dump_flags & TDF_DETAILS))
2110 struct range_entry *r;
2111 fprintf (dump_file, "Optimizing range tests ");
2112 print_generic_expr (dump_file, range->exp, 0);
2113 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2114 print_generic_expr (dump_file, range->low, 0);
2115 fprintf (dump_file, ", ");
2116 print_generic_expr (dump_file, range->high, 0);
2117 fprintf (dump_file, "]");
2118 for (r = otherrange; r < otherrange + count; r++)
2120 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2121 print_generic_expr (dump_file, r->low, 0);
2122 fprintf (dump_file, ", ");
2123 print_generic_expr (dump_file, r->high, 0);
2124 fprintf (dump_file, "]");
2126 fprintf (dump_file, "\n into ");
2127 print_generic_expr (dump_file, tem, 0);
2128 fprintf (dump_file, "\n");
2131 if (opcode == BIT_IOR_EXPR
2132 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2133 tem = invert_truthvalue_loc (loc, tem);
2135 tem = fold_convert_loc (loc, optype, tem);
2136 gsi = gsi_for_stmt (stmt);
2137 /* In rare cases range->exp can be equal to lhs of stmt.
2138 In that case we have to insert after the stmt rather then before
2139 it. */
2140 if (op == range->exp)
2141 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2142 GSI_CONTINUE_LINKING);
2143 else
2145 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2146 GSI_SAME_STMT);
2147 gsi_prev (&gsi);
2149 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2150 if (gimple_uid (gsi_stmt (gsi)))
2151 break;
2152 else
2153 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2155 oe->op = tem;
2156 range->exp = exp;
2157 range->low = low;
2158 range->high = high;
2159 range->in_p = in_p;
2160 range->strict_overflow_p = false;
2162 for (range = otherrange; range < otherrange + count; range++)
2164 oe = (*ops)[range->idx];
2165 /* Now change all the other range test immediate uses, so that
2166 those tests will be optimized away. */
2167 if (opcode == ERROR_MARK)
2169 if (oe->op)
2170 oe->op = build_int_cst (TREE_TYPE (oe->op),
2171 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2172 else
2173 oe->op = (oe->rank == BIT_IOR_EXPR
2174 ? boolean_false_node : boolean_true_node);
2176 else
2177 oe->op = error_mark_node;
2178 range->exp = NULL_TREE;
2180 return true;
2183 /* Optimize X == CST1 || X == CST2
2184 if popcount (CST1 ^ CST2) == 1 into
2185 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2186 Similarly for ranges. E.g.
2187 X != 2 && X != 3 && X != 10 && X != 11
2188 will be transformed by the previous optimization into
2189 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2190 and this loop can transform that into
2191 !(((X & ~8) - 2U) <= 1U). */
2193 static bool
2194 optimize_range_tests_xor (enum tree_code opcode, tree type,
2195 tree lowi, tree lowj, tree highi, tree highj,
2196 vec<operand_entry_t> *ops,
2197 struct range_entry *rangei,
2198 struct range_entry *rangej)
2200 tree lowxor, highxor, tem, exp;
2201 /* Check highi ^ lowi == highj ^ lowj and
2202 popcount (highi ^ lowi) == 1. */
2203 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2204 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2205 return false;
2206 if (tree_log2 (lowxor) < 0)
2207 return false;
2208 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2209 if (!tree_int_cst_equal (lowxor, highxor))
2210 return false;
2212 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2213 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2214 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2215 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2216 if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2217 rangei->in_p, lowj, highj,
2218 rangei->strict_overflow_p
2219 || rangej->strict_overflow_p))
2220 return true;
2221 return false;
2224 /* Optimize X == CST1 || X == CST2
2225 if popcount (CST2 - CST1) == 1 into
2226 ((X - CST1) & ~(CST2 - CST1)) == 0.
2227 Similarly for ranges. E.g.
2228 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2229 || X == 75 || X == 45
2230 will be transformed by the previous optimization into
2231 (X - 43U) <= 3U || (X - 75U) <= 3U
2232 and this loop can transform that into
2233 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2234 static bool
2235 optimize_range_tests_diff (enum tree_code opcode, tree type,
2236 tree lowi, tree lowj, tree highi, tree highj,
2237 vec<operand_entry_t> *ops,
2238 struct range_entry *rangei,
2239 struct range_entry *rangej)
2241 tree tem1, tem2, mask;
2242 /* Check highi - lowi == highj - lowj. */
2243 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2244 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2245 return false;
2246 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2247 if (!tree_int_cst_equal (tem1, tem2))
2248 return false;
2249 /* Check popcount (lowj - lowi) == 1. */
2250 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2251 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2252 return false;
2253 if (tree_log2 (tem1) < 0)
2254 return false;
2256 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2257 tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2258 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2259 lowj = build_int_cst (type, 0);
2260 if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2261 rangei->in_p, lowj, tem2,
2262 rangei->strict_overflow_p
2263 || rangej->strict_overflow_p))
2264 return true;
2265 return false;
2268 /* It does some common checks for function optimize_range_tests_xor and
2269 optimize_range_tests_diff.
2270 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2271 Else it calls optimize_range_tests_diff. */
2273 static bool
2274 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2275 bool optimize_xor, vec<operand_entry_t> *ops,
2276 struct range_entry *ranges)
2278 int i, j;
2279 bool any_changes = false;
2280 for (i = first; i < length; i++)
2282 tree lowi, highi, lowj, highj, type, tem;
2284 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2285 continue;
2286 type = TREE_TYPE (ranges[i].exp);
2287 if (!INTEGRAL_TYPE_P (type))
2288 continue;
2289 lowi = ranges[i].low;
2290 if (lowi == NULL_TREE)
2291 lowi = TYPE_MIN_VALUE (type);
2292 highi = ranges[i].high;
2293 if (highi == NULL_TREE)
2294 continue;
2295 for (j = i + 1; j < length && j < i + 64; j++)
2297 bool changes;
2298 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2299 continue;
2300 lowj = ranges[j].low;
2301 if (lowj == NULL_TREE)
2302 continue;
2303 highj = ranges[j].high;
2304 if (highj == NULL_TREE)
2305 highj = TYPE_MAX_VALUE (type);
2306 /* Check lowj > highi. */
2307 tem = fold_binary (GT_EXPR, boolean_type_node,
2308 lowj, highi);
2309 if (tem == NULL_TREE || !integer_onep (tem))
2310 continue;
2311 if (optimize_xor)
2312 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2313 highi, highj, ops,
2314 ranges + i, ranges + j);
2315 else
2316 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2317 highi, highj, ops,
2318 ranges + i, ranges + j);
2319 if (changes)
2321 any_changes = true;
2322 break;
2326 return any_changes;
2329 /* Optimize range tests, similarly how fold_range_test optimizes
2330 it on trees. The tree code for the binary
2331 operation between all the operands is OPCODE.
2332 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2333 maybe_optimize_range_tests for inter-bb range optimization.
2334 In that case if oe->op is NULL, oe->id is bb->index whose
2335 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2336 the actual opcode. */
2338 static bool
2339 optimize_range_tests (enum tree_code opcode,
2340 vec<operand_entry_t> *ops)
2342 unsigned int length = ops->length (), i, j, first;
2343 operand_entry_t oe;
2344 struct range_entry *ranges;
2345 bool any_changes = false;
2347 if (length == 1)
2348 return false;
2350 ranges = XNEWVEC (struct range_entry, length);
2351 for (i = 0; i < length; i++)
2353 oe = (*ops)[i];
2354 ranges[i].idx = i;
2355 init_range_entry (ranges + i, oe->op,
2356 oe->op ? NULL :
2357 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2358 /* For | invert it now, we will invert it again before emitting
2359 the optimized expression. */
2360 if (opcode == BIT_IOR_EXPR
2361 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2362 ranges[i].in_p = !ranges[i].in_p;
2365 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2366 for (i = 0; i < length; i++)
2367 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2368 break;
2370 /* Try to merge ranges. */
2371 for (first = i; i < length; i++)
2373 tree low = ranges[i].low;
2374 tree high = ranges[i].high;
2375 int in_p = ranges[i].in_p;
2376 bool strict_overflow_p = ranges[i].strict_overflow_p;
2377 int update_fail_count = 0;
2379 for (j = i + 1; j < length; j++)
2381 if (ranges[i].exp != ranges[j].exp)
2382 break;
2383 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2384 ranges[j].in_p, ranges[j].low, ranges[j].high))
2385 break;
2386 strict_overflow_p |= ranges[j].strict_overflow_p;
2389 if (j == i + 1)
2390 continue;
2392 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2393 ops, ranges[i].exp, in_p, low, high,
2394 strict_overflow_p))
2396 i = j - 1;
2397 any_changes = true;
2399 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2400 while update_range_test would fail. */
2401 else if (update_fail_count == 64)
2402 i = j - 1;
2403 else
2404 ++update_fail_count;
2407 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2408 ops, ranges);
2410 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2411 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2412 ops, ranges);
2414 if (any_changes && opcode != ERROR_MARK)
2416 j = 0;
2417 FOR_EACH_VEC_ELT (*ops, i, oe)
2419 if (oe->op == error_mark_node)
2420 continue;
2421 else if (i != j)
2422 (*ops)[j] = oe;
2423 j++;
2425 ops->truncate (j);
2428 XDELETEVEC (ranges);
2429 return any_changes;
2432 /* Return true if STMT is a cast like:
2433 <bb N>:
2435 _123 = (int) _234;
2437 <bb M>:
2438 # _345 = PHI <_123(N), 1(...), 1(...)>
2439 where _234 has bool type, _123 has single use and
2440 bb N has a single successor M. This is commonly used in
2441 the last block of a range test. */
2443 static bool
2444 final_range_test_p (gimple stmt)
2446 basic_block bb, rhs_bb;
2447 edge e;
2448 tree lhs, rhs;
2449 use_operand_p use_p;
2450 gimple use_stmt;
2452 if (!gimple_assign_cast_p (stmt))
2453 return false;
2454 bb = gimple_bb (stmt);
2455 if (!single_succ_p (bb))
2456 return false;
2457 e = single_succ_edge (bb);
2458 if (e->flags & EDGE_COMPLEX)
2459 return false;
2461 lhs = gimple_assign_lhs (stmt);
2462 rhs = gimple_assign_rhs1 (stmt);
2463 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2464 || TREE_CODE (rhs) != SSA_NAME
2465 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2466 return false;
2468 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2469 if (!single_imm_use (lhs, &use_p, &use_stmt))
2470 return false;
2472 if (gimple_code (use_stmt) != GIMPLE_PHI
2473 || gimple_bb (use_stmt) != e->dest)
2474 return false;
2476 /* And that the rhs is defined in the same loop. */
2477 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2478 if (rhs_bb == NULL
2479 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2480 return false;
2482 return true;
2485 /* Return true if BB is suitable basic block for inter-bb range test
2486 optimization. If BACKWARD is true, BB should be the only predecessor
2487 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2488 or compared with to find a common basic block to which all conditions
2489 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2490 be the only predecessor of BB. */
2492 static bool
2493 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2494 bool backward)
2496 edge_iterator ei, ei2;
2497 edge e, e2;
2498 gimple stmt;
2499 gimple_stmt_iterator gsi;
2500 bool other_edge_seen = false;
2501 bool is_cond;
2503 if (test_bb == bb)
2504 return false;
2505 /* Check last stmt first. */
2506 stmt = last_stmt (bb);
2507 if (stmt == NULL
2508 || (gimple_code (stmt) != GIMPLE_COND
2509 && (backward || !final_range_test_p (stmt)))
2510 || gimple_visited_p (stmt)
2511 || stmt_could_throw_p (stmt)
2512 || *other_bb == bb)
2513 return false;
2514 is_cond = gimple_code (stmt) == GIMPLE_COND;
2515 if (is_cond)
2517 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2518 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2519 to *OTHER_BB (if not set yet, try to find it out). */
2520 if (EDGE_COUNT (bb->succs) != 2)
2521 return false;
2522 FOR_EACH_EDGE (e, ei, bb->succs)
2524 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2525 return false;
2526 if (e->dest == test_bb)
2528 if (backward)
2529 continue;
2530 else
2531 return false;
2533 if (e->dest == bb)
2534 return false;
2535 if (*other_bb == NULL)
2537 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2538 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2539 return false;
2540 else if (e->dest == e2->dest)
2541 *other_bb = e->dest;
2542 if (*other_bb == NULL)
2543 return false;
2545 if (e->dest == *other_bb)
2546 other_edge_seen = true;
2547 else if (backward)
2548 return false;
2550 if (*other_bb == NULL || !other_edge_seen)
2551 return false;
2553 else if (single_succ (bb) != *other_bb)
2554 return false;
2556 /* Now check all PHIs of *OTHER_BB. */
2557 e = find_edge (bb, *other_bb);
2558 e2 = find_edge (test_bb, *other_bb);
2559 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2561 gimple phi = gsi_stmt (gsi);
2562 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2563 corresponding to BB and TEST_BB predecessor must be the same. */
2564 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2565 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2567 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2568 one of the PHIs should have the lhs of the last stmt in
2569 that block as PHI arg and that PHI should have 0 or 1
2570 corresponding to it in all other range test basic blocks
2571 considered. */
2572 if (!is_cond)
2574 if (gimple_phi_arg_def (phi, e->dest_idx)
2575 == gimple_assign_lhs (stmt)
2576 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2577 || integer_onep (gimple_phi_arg_def (phi,
2578 e2->dest_idx))))
2579 continue;
2581 else
2583 gimple test_last = last_stmt (test_bb);
2584 if (gimple_code (test_last) != GIMPLE_COND
2585 && gimple_phi_arg_def (phi, e2->dest_idx)
2586 == gimple_assign_lhs (test_last)
2587 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2588 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2589 continue;
2592 return false;
2595 return true;
2598 /* Return true if BB doesn't have side-effects that would disallow
2599 range test optimization, all SSA_NAMEs set in the bb are consumed
2600 in the bb and there are no PHIs. */
2602 static bool
2603 no_side_effect_bb (basic_block bb)
2605 gimple_stmt_iterator gsi;
2606 gimple last;
2608 if (!gimple_seq_empty_p (phi_nodes (bb)))
2609 return false;
2610 last = last_stmt (bb);
2611 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2613 gimple stmt = gsi_stmt (gsi);
2614 tree lhs;
2615 imm_use_iterator imm_iter;
2616 use_operand_p use_p;
2618 if (is_gimple_debug (stmt))
2619 continue;
2620 if (gimple_has_side_effects (stmt))
2621 return false;
2622 if (stmt == last)
2623 return true;
2624 if (!is_gimple_assign (stmt))
2625 return false;
2626 lhs = gimple_assign_lhs (stmt);
2627 if (TREE_CODE (lhs) != SSA_NAME)
2628 return false;
2629 if (gimple_assign_rhs_could_trap_p (stmt))
2630 return false;
2631 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2633 gimple use_stmt = USE_STMT (use_p);
2634 if (is_gimple_debug (use_stmt))
2635 continue;
2636 if (gimple_bb (use_stmt) != bb)
2637 return false;
2640 return false;
2643 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2644 return true and fill in *OPS recursively. */
2646 static bool
2647 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2648 struct loop *loop)
2650 gimple stmt = SSA_NAME_DEF_STMT (var);
2651 tree rhs[2];
2652 int i;
2654 if (!is_reassociable_op (stmt, code, loop))
2655 return false;
2657 rhs[0] = gimple_assign_rhs1 (stmt);
2658 rhs[1] = gimple_assign_rhs2 (stmt);
2659 gimple_set_visited (stmt, true);
2660 for (i = 0; i < 2; i++)
2661 if (TREE_CODE (rhs[i]) == SSA_NAME
2662 && !get_ops (rhs[i], code, ops, loop)
2663 && has_single_use (rhs[i]))
2665 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2667 oe->op = rhs[i];
2668 oe->rank = code;
2669 oe->id = 0;
2670 oe->count = 1;
2671 ops->safe_push (oe);
2673 return true;
2676 /* Find the ops that were added by get_ops starting from VAR, see if
2677 they were changed during update_range_test and if yes, create new
2678 stmts. */
2680 static tree
2681 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2682 unsigned int *pidx, struct loop *loop)
2684 gimple stmt = SSA_NAME_DEF_STMT (var);
2685 tree rhs[4];
2686 int i;
2688 if (!is_reassociable_op (stmt, code, loop))
2689 return NULL;
2691 rhs[0] = gimple_assign_rhs1 (stmt);
2692 rhs[1] = gimple_assign_rhs2 (stmt);
2693 rhs[2] = rhs[0];
2694 rhs[3] = rhs[1];
2695 for (i = 0; i < 2; i++)
2696 if (TREE_CODE (rhs[i]) == SSA_NAME)
2698 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2699 if (rhs[2 + i] == NULL_TREE)
2701 if (has_single_use (rhs[i]))
2702 rhs[2 + i] = ops[(*pidx)++]->op;
2703 else
2704 rhs[2 + i] = rhs[i];
2707 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2708 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2710 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2711 var = make_ssa_name (TREE_TYPE (var), NULL);
2712 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2713 var, rhs[2], rhs[3]);
2714 gimple_set_uid (g, gimple_uid (stmt));
2715 gimple_set_visited (g, true);
2716 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2718 return var;
2721 /* Structure to track the initial value passed to get_ops and
2722 the range in the ops vector for each basic block. */
2724 struct inter_bb_range_test_entry
2726 tree op;
2727 unsigned int first_idx, last_idx;
2730 /* Inter-bb range test optimization. */
2732 static void
2733 maybe_optimize_range_tests (gimple stmt)
2735 basic_block first_bb = gimple_bb (stmt);
2736 basic_block last_bb = first_bb;
2737 basic_block other_bb = NULL;
2738 basic_block bb;
2739 edge_iterator ei;
2740 edge e;
2741 auto_vec<operand_entry_t> ops;
2742 auto_vec<inter_bb_range_test_entry> bbinfo;
2743 bool any_changes = false;
2745 /* Consider only basic blocks that end with GIMPLE_COND or
2746 a cast statement satisfying final_range_test_p. All
2747 but the last bb in the first_bb .. last_bb range
2748 should end with GIMPLE_COND. */
2749 if (gimple_code (stmt) == GIMPLE_COND)
2751 if (EDGE_COUNT (first_bb->succs) != 2)
2752 return;
2754 else if (final_range_test_p (stmt))
2755 other_bb = single_succ (first_bb);
2756 else
2757 return;
2759 if (stmt_could_throw_p (stmt))
2760 return;
2762 /* As relative ordering of post-dominator sons isn't fixed,
2763 maybe_optimize_range_tests can be called first on any
2764 bb in the range we want to optimize. So, start searching
2765 backwards, if first_bb can be set to a predecessor. */
2766 while (single_pred_p (first_bb))
2768 basic_block pred_bb = single_pred (first_bb);
2769 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2770 break;
2771 if (!no_side_effect_bb (first_bb))
2772 break;
2773 first_bb = pred_bb;
2775 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2776 Before starting forward search in last_bb successors, find
2777 out the other_bb. */
2778 if (first_bb == last_bb)
2780 other_bb = NULL;
2781 /* As non-GIMPLE_COND last stmt always terminates the range,
2782 if forward search didn't discover anything, just give up. */
2783 if (gimple_code (stmt) != GIMPLE_COND)
2784 return;
2785 /* Look at both successors. Either it ends with a GIMPLE_COND
2786 and satisfies suitable_cond_bb, or ends with a cast and
2787 other_bb is that cast's successor. */
2788 FOR_EACH_EDGE (e, ei, first_bb->succs)
2789 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2790 || e->dest == first_bb)
2791 return;
2792 else if (single_pred_p (e->dest))
2794 stmt = last_stmt (e->dest);
2795 if (stmt
2796 && gimple_code (stmt) == GIMPLE_COND
2797 && EDGE_COUNT (e->dest->succs) == 2)
2799 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2800 break;
2801 else
2802 other_bb = NULL;
2804 else if (stmt
2805 && final_range_test_p (stmt)
2806 && find_edge (first_bb, single_succ (e->dest)))
2808 other_bb = single_succ (e->dest);
2809 if (other_bb == first_bb)
2810 other_bb = NULL;
2813 if (other_bb == NULL)
2814 return;
2816 /* Now do the forward search, moving last_bb to successor bbs
2817 that aren't other_bb. */
2818 while (EDGE_COUNT (last_bb->succs) == 2)
2820 FOR_EACH_EDGE (e, ei, last_bb->succs)
2821 if (e->dest != other_bb)
2822 break;
2823 if (e == NULL)
2824 break;
2825 if (!single_pred_p (e->dest))
2826 break;
2827 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2828 break;
2829 if (!no_side_effect_bb (e->dest))
2830 break;
2831 last_bb = e->dest;
2833 if (first_bb == last_bb)
2834 return;
2835 /* Here basic blocks first_bb through last_bb's predecessor
2836 end with GIMPLE_COND, all of them have one of the edges to
2837 other_bb and another to another block in the range,
2838 all blocks except first_bb don't have side-effects and
2839 last_bb ends with either GIMPLE_COND, or cast satisfying
2840 final_range_test_p. */
2841 for (bb = last_bb; ; bb = single_pred (bb))
2843 enum tree_code code;
2844 tree lhs, rhs;
2845 inter_bb_range_test_entry bb_ent;
2847 bb_ent.op = NULL_TREE;
2848 bb_ent.first_idx = ops.length ();
2849 bb_ent.last_idx = bb_ent.first_idx;
2850 e = find_edge (bb, other_bb);
2851 stmt = last_stmt (bb);
2852 gimple_set_visited (stmt, true);
2853 if (gimple_code (stmt) != GIMPLE_COND)
2855 use_operand_p use_p;
2856 gimple phi;
2857 edge e2;
2858 unsigned int d;
2860 lhs = gimple_assign_lhs (stmt);
2861 rhs = gimple_assign_rhs1 (stmt);
2862 gcc_assert (bb == last_bb);
2864 /* stmt is
2865 _123 = (int) _234;
2867 followed by:
2868 <bb M>:
2869 # _345 = PHI <_123(N), 1(...), 1(...)>
2871 or 0 instead of 1. If it is 0, the _234
2872 range test is anded together with all the
2873 other range tests, if it is 1, it is ored with
2874 them. */
2875 single_imm_use (lhs, &use_p, &phi);
2876 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2877 e2 = find_edge (first_bb, other_bb);
2878 d = e2->dest_idx;
2879 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2880 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2881 code = BIT_AND_EXPR;
2882 else
2884 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2885 code = BIT_IOR_EXPR;
2888 /* If _234 SSA_NAME_DEF_STMT is
2889 _234 = _567 | _789;
2890 (or &, corresponding to 1/0 in the phi arguments,
2891 push into ops the individual range test arguments
2892 of the bitwise or resp. and, recursively. */
2893 if (!get_ops (rhs, code, &ops,
2894 loop_containing_stmt (stmt))
2895 && has_single_use (rhs))
2897 /* Otherwise, push the _234 range test itself. */
2898 operand_entry_t oe
2899 = (operand_entry_t) pool_alloc (operand_entry_pool);
2901 oe->op = rhs;
2902 oe->rank = code;
2903 oe->id = 0;
2904 oe->count = 1;
2905 ops.safe_push (oe);
2906 bb_ent.last_idx++;
2908 else
2909 bb_ent.last_idx = ops.length ();
2910 bb_ent.op = rhs;
2911 bbinfo.safe_push (bb_ent);
2912 continue;
2914 /* Otherwise stmt is GIMPLE_COND. */
2915 code = gimple_cond_code (stmt);
2916 lhs = gimple_cond_lhs (stmt);
2917 rhs = gimple_cond_rhs (stmt);
2918 if (TREE_CODE (lhs) == SSA_NAME
2919 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2920 && ((code != EQ_EXPR && code != NE_EXPR)
2921 || rhs != boolean_false_node
2922 /* Either push into ops the individual bitwise
2923 or resp. and operands, depending on which
2924 edge is other_bb. */
2925 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2926 ^ (code == EQ_EXPR))
2927 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2928 loop_containing_stmt (stmt))))
2930 /* Or push the GIMPLE_COND stmt itself. */
2931 operand_entry_t oe
2932 = (operand_entry_t) pool_alloc (operand_entry_pool);
2934 oe->op = NULL;
2935 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2936 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2937 /* oe->op = NULL signs that there is no SSA_NAME
2938 for the range test, and oe->id instead is the
2939 basic block number, at which's end the GIMPLE_COND
2940 is. */
2941 oe->id = bb->index;
2942 oe->count = 1;
2943 ops.safe_push (oe);
2944 bb_ent.op = NULL;
2945 bb_ent.last_idx++;
2947 else if (ops.length () > bb_ent.first_idx)
2949 bb_ent.op = lhs;
2950 bb_ent.last_idx = ops.length ();
2952 bbinfo.safe_push (bb_ent);
2953 if (bb == first_bb)
2954 break;
2956 if (ops.length () > 1)
2957 any_changes = optimize_range_tests (ERROR_MARK, &ops);
2958 if (any_changes)
2960 unsigned int idx;
2961 /* update_ops relies on has_single_use predicates returning the
2962 same values as it did during get_ops earlier. Additionally it
2963 never removes statements, only adds new ones and it should walk
2964 from the single imm use and check the predicate already before
2965 making those changes.
2966 On the other side, the handling of GIMPLE_COND directly can turn
2967 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2968 it needs to be done in a separate loop afterwards. */
2969 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2971 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2972 && bbinfo[idx].op != NULL_TREE)
2974 tree new_op;
2976 stmt = last_stmt (bb);
2977 new_op = update_ops (bbinfo[idx].op,
2978 (enum tree_code)
2979 ops[bbinfo[idx].first_idx]->rank,
2980 ops, &bbinfo[idx].first_idx,
2981 loop_containing_stmt (stmt));
2982 if (new_op == NULL_TREE)
2984 gcc_assert (bb == last_bb);
2985 new_op = ops[bbinfo[idx].first_idx++]->op;
2987 if (bbinfo[idx].op != new_op)
2989 imm_use_iterator iter;
2990 use_operand_p use_p;
2991 gimple use_stmt, cast_stmt = NULL;
2993 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2994 if (is_gimple_debug (use_stmt))
2995 continue;
2996 else if (gimple_code (use_stmt) == GIMPLE_COND
2997 || gimple_code (use_stmt) == GIMPLE_PHI)
2998 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2999 SET_USE (use_p, new_op);
3000 else if (gimple_assign_cast_p (use_stmt))
3001 cast_stmt = use_stmt;
3002 else
3003 gcc_unreachable ();
3004 if (cast_stmt)
3006 gcc_assert (bb == last_bb);
3007 tree lhs = gimple_assign_lhs (cast_stmt);
3008 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3009 enum tree_code rhs_code
3010 = gimple_assign_rhs_code (cast_stmt);
3011 gimple g;
3012 if (is_gimple_min_invariant (new_op))
3014 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3015 g = gimple_build_assign (new_lhs, new_op);
3017 else
3018 g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3019 new_op, NULL_TREE);
3020 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3021 gimple_set_uid (g, gimple_uid (cast_stmt));
3022 gimple_set_visited (g, true);
3023 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3024 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3025 if (is_gimple_debug (use_stmt))
3026 continue;
3027 else if (gimple_code (use_stmt) == GIMPLE_COND
3028 || gimple_code (use_stmt) == GIMPLE_PHI)
3029 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3030 SET_USE (use_p, new_lhs);
3031 else
3032 gcc_unreachable ();
3036 if (bb == first_bb)
3037 break;
3039 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3041 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3042 && bbinfo[idx].op == NULL_TREE
3043 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3045 stmt = last_stmt (bb);
3046 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3047 gimple_cond_make_false (stmt);
3048 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3049 gimple_cond_make_true (stmt);
3050 else
3052 gimple_cond_set_code (stmt, NE_EXPR);
3053 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3054 gimple_cond_set_rhs (stmt, boolean_false_node);
3056 update_stmt (stmt);
3058 if (bb == first_bb)
3059 break;
3064 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3065 of STMT in it's operands. This is also known as a "destructive
3066 update" operation. */
3068 static bool
3069 is_phi_for_stmt (gimple stmt, tree operand)
3071 gimple def_stmt;
3072 tree lhs;
3073 use_operand_p arg_p;
3074 ssa_op_iter i;
3076 if (TREE_CODE (operand) != SSA_NAME)
3077 return false;
3079 lhs = gimple_assign_lhs (stmt);
3081 def_stmt = SSA_NAME_DEF_STMT (operand);
3082 if (gimple_code (def_stmt) != GIMPLE_PHI)
3083 return false;
3085 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3086 if (lhs == USE_FROM_PTR (arg_p))
3087 return true;
3088 return false;
3091 /* Remove def stmt of VAR if VAR has zero uses and recurse
3092 on rhs1 operand if so. */
3094 static void
3095 remove_visited_stmt_chain (tree var)
3097 gimple stmt;
3098 gimple_stmt_iterator gsi;
3100 while (1)
3102 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3103 return;
3104 stmt = SSA_NAME_DEF_STMT (var);
3105 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3107 var = gimple_assign_rhs1 (stmt);
3108 gsi = gsi_for_stmt (stmt);
3109 reassoc_remove_stmt (&gsi);
3110 release_defs (stmt);
3112 else
3113 return;
3117 /* This function checks three consequtive operands in
3118 passed operands vector OPS starting from OPINDEX and
3119 swaps two operands if it is profitable for binary operation
3120 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3122 We pair ops with the same rank if possible.
3124 The alternative we try is to see if STMT is a destructive
3125 update style statement, which is like:
3126 b = phi (a, ...)
3127 a = c + b;
3128 In that case, we want to use the destructive update form to
3129 expose the possible vectorizer sum reduction opportunity.
3130 In that case, the third operand will be the phi node. This
3131 check is not performed if STMT is null.
3133 We could, of course, try to be better as noted above, and do a
3134 lot of work to try to find these opportunities in >3 operand
3135 cases, but it is unlikely to be worth it. */
3137 static void
3138 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3139 unsigned int opindex, gimple stmt)
3141 operand_entry_t oe1, oe2, oe3;
3143 oe1 = ops[opindex];
3144 oe2 = ops[opindex + 1];
3145 oe3 = ops[opindex + 2];
3147 if ((oe1->rank == oe2->rank
3148 && oe2->rank != oe3->rank)
3149 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3150 && !is_phi_for_stmt (stmt, oe1->op)
3151 && !is_phi_for_stmt (stmt, oe2->op)))
3153 struct operand_entry temp = *oe3;
3154 oe3->op = oe1->op;
3155 oe3->rank = oe1->rank;
3156 oe1->op = temp.op;
3157 oe1->rank= temp.rank;
3159 else if ((oe1->rank == oe3->rank
3160 && oe2->rank != oe3->rank)
3161 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3162 && !is_phi_for_stmt (stmt, oe1->op)
3163 && !is_phi_for_stmt (stmt, oe3->op)))
3165 struct operand_entry temp = *oe2;
3166 oe2->op = oe1->op;
3167 oe2->rank = oe1->rank;
3168 oe1->op = temp.op;
3169 oe1->rank = temp.rank;
3173 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3174 two definitions, otherwise return STMT. */
3176 static inline gimple
3177 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3179 if (TREE_CODE (rhs1) == SSA_NAME
3180 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3181 stmt = SSA_NAME_DEF_STMT (rhs1);
3182 if (TREE_CODE (rhs2) == SSA_NAME
3183 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3184 stmt = SSA_NAME_DEF_STMT (rhs2);
3185 return stmt;
3188 /* Recursively rewrite our linearized statements so that the operators
3189 match those in OPS[OPINDEX], putting the computation in rank
3190 order. Return new lhs. */
3192 static tree
3193 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3194 vec<operand_entry_t> ops, bool changed)
3196 tree rhs1 = gimple_assign_rhs1 (stmt);
3197 tree rhs2 = gimple_assign_rhs2 (stmt);
3198 tree lhs = gimple_assign_lhs (stmt);
3199 operand_entry_t oe;
3201 /* The final recursion case for this function is that you have
3202 exactly two operations left.
3203 If we had one exactly one op in the entire list to start with, we
3204 would have never called this function, and the tail recursion
3205 rewrites them one at a time. */
3206 if (opindex + 2 == ops.length ())
3208 operand_entry_t oe1, oe2;
3210 oe1 = ops[opindex];
3211 oe2 = ops[opindex + 1];
3213 if (rhs1 != oe1->op || rhs2 != oe2->op)
3215 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3216 unsigned int uid = gimple_uid (stmt);
3218 if (dump_file && (dump_flags & TDF_DETAILS))
3220 fprintf (dump_file, "Transforming ");
3221 print_gimple_stmt (dump_file, stmt, 0, 0);
3224 if (changed)
3226 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3227 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3228 stmt
3229 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3230 lhs, oe1->op, oe2->op);
3231 gimple_set_uid (stmt, uid);
3232 gimple_set_visited (stmt, true);
3233 if (insert_point == gsi_stmt (gsi))
3234 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3235 else
3236 insert_stmt_after (stmt, insert_point);
3238 else
3240 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3241 == stmt);
3242 gimple_assign_set_rhs1 (stmt, oe1->op);
3243 gimple_assign_set_rhs2 (stmt, oe2->op);
3244 update_stmt (stmt);
3247 if (rhs1 != oe1->op && rhs1 != oe2->op)
3248 remove_visited_stmt_chain (rhs1);
3250 if (dump_file && (dump_flags & TDF_DETAILS))
3252 fprintf (dump_file, " into ");
3253 print_gimple_stmt (dump_file, stmt, 0, 0);
3256 return lhs;
3259 /* If we hit here, we should have 3 or more ops left. */
3260 gcc_assert (opindex + 2 < ops.length ());
3262 /* Rewrite the next operator. */
3263 oe = ops[opindex];
3265 /* Recurse on the LHS of the binary operator, which is guaranteed to
3266 be the non-leaf side. */
3267 tree new_rhs1
3268 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3269 changed || oe->op != rhs2);
3271 if (oe->op != rhs2 || new_rhs1 != rhs1)
3273 if (dump_file && (dump_flags & TDF_DETAILS))
3275 fprintf (dump_file, "Transforming ");
3276 print_gimple_stmt (dump_file, stmt, 0, 0);
3279 /* If changed is false, this is either opindex == 0
3280 or all outer rhs2's were equal to corresponding oe->op,
3281 and powi_result is NULL.
3282 That means lhs is equivalent before and after reassociation.
3283 Otherwise ensure the old lhs SSA_NAME is not reused and
3284 create a new stmt as well, so that any debug stmts will be
3285 properly adjusted. */
3286 if (changed)
3288 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3289 unsigned int uid = gimple_uid (stmt);
3290 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3292 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3293 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3294 lhs, new_rhs1, oe->op);
3295 gimple_set_uid (stmt, uid);
3296 gimple_set_visited (stmt, true);
3297 if (insert_point == gsi_stmt (gsi))
3298 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3299 else
3300 insert_stmt_after (stmt, insert_point);
3302 else
3304 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3305 == stmt);
3306 gimple_assign_set_rhs1 (stmt, new_rhs1);
3307 gimple_assign_set_rhs2 (stmt, oe->op);
3308 update_stmt (stmt);
3311 if (dump_file && (dump_flags & TDF_DETAILS))
3313 fprintf (dump_file, " into ");
3314 print_gimple_stmt (dump_file, stmt, 0, 0);
3317 return lhs;
3320 /* Find out how many cycles we need to compute statements chain.
3321 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3322 maximum number of independent statements we may execute per cycle. */
3324 static int
3325 get_required_cycles (int ops_num, int cpu_width)
3327 int res;
3328 int elog;
3329 unsigned int rest;
3331 /* While we have more than 2 * cpu_width operands
3332 we may reduce number of operands by cpu_width
3333 per cycle. */
3334 res = ops_num / (2 * cpu_width);
3336 /* Remained operands count may be reduced twice per cycle
3337 until we have only one operand. */
3338 rest = (unsigned)(ops_num - res * cpu_width);
3339 elog = exact_log2 (rest);
3340 if (elog >= 0)
3341 res += elog;
3342 else
3343 res += floor_log2 (rest) + 1;
3345 return res;
3348 /* Returns an optimal number of registers to use for computation of
3349 given statements. */
3351 static int
3352 get_reassociation_width (int ops_num, enum tree_code opc,
3353 enum machine_mode mode)
3355 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3356 int width;
3357 int width_min;
3358 int cycles_best;
3360 if (param_width > 0)
3361 width = param_width;
3362 else
3363 width = targetm.sched.reassociation_width (opc, mode);
3365 if (width == 1)
3366 return width;
3368 /* Get the minimal time required for sequence computation. */
3369 cycles_best = get_required_cycles (ops_num, width);
3371 /* Check if we may use less width and still compute sequence for
3372 the same time. It will allow us to reduce registers usage.
3373 get_required_cycles is monotonically increasing with lower width
3374 so we can perform a binary search for the minimal width that still
3375 results in the optimal cycle count. */
3376 width_min = 1;
3377 while (width > width_min)
3379 int width_mid = (width + width_min) / 2;
3381 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3382 width = width_mid;
3383 else if (width_min < width_mid)
3384 width_min = width_mid;
3385 else
3386 break;
3389 return width;
3392 /* Recursively rewrite our linearized statements so that the operators
3393 match those in OPS[OPINDEX], putting the computation in rank
3394 order and trying to allow operations to be executed in
3395 parallel. */
3397 static void
3398 rewrite_expr_tree_parallel (gimple stmt, int width,
3399 vec<operand_entry_t> ops)
3401 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3402 int op_num = ops.length ();
3403 int stmt_num = op_num - 1;
3404 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3405 int op_index = op_num - 1;
3406 int stmt_index = 0;
3407 int ready_stmts_end = 0;
3408 int i = 0;
3409 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3411 /* We start expression rewriting from the top statements.
3412 So, in this loop we create a full list of statements
3413 we will work with. */
3414 stmts[stmt_num - 1] = stmt;
3415 for (i = stmt_num - 2; i >= 0; i--)
3416 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3418 for (i = 0; i < stmt_num; i++)
3420 tree op1, op2;
3422 /* Determine whether we should use results of
3423 already handled statements or not. */
3424 if (ready_stmts_end == 0
3425 && (i - stmt_index >= width || op_index < 1))
3426 ready_stmts_end = i;
3428 /* Now we choose operands for the next statement. Non zero
3429 value in ready_stmts_end means here that we should use
3430 the result of already generated statements as new operand. */
3431 if (ready_stmts_end > 0)
3433 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3434 if (ready_stmts_end > stmt_index)
3435 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3436 else if (op_index >= 0)
3437 op2 = ops[op_index--]->op;
3438 else
3440 gcc_assert (stmt_index < i);
3441 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3444 if (stmt_index >= ready_stmts_end)
3445 ready_stmts_end = 0;
3447 else
3449 if (op_index > 1)
3450 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3451 op2 = ops[op_index--]->op;
3452 op1 = ops[op_index--]->op;
3455 /* If we emit the last statement then we should put
3456 operands into the last statement. It will also
3457 break the loop. */
3458 if (op_index < 0 && stmt_index == i)
3459 i = stmt_num - 1;
3461 if (dump_file && (dump_flags & TDF_DETAILS))
3463 fprintf (dump_file, "Transforming ");
3464 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3467 /* We keep original statement only for the last one. All
3468 others are recreated. */
3469 if (i == stmt_num - 1)
3471 gimple_assign_set_rhs1 (stmts[i], op1);
3472 gimple_assign_set_rhs2 (stmts[i], op2);
3473 update_stmt (stmts[i]);
3475 else
3476 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3478 if (dump_file && (dump_flags & TDF_DETAILS))
3480 fprintf (dump_file, " into ");
3481 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3485 remove_visited_stmt_chain (last_rhs1);
3488 /* Transform STMT, which is really (A +B) + (C + D) into the left
3489 linear form, ((A+B)+C)+D.
3490 Recurse on D if necessary. */
3492 static void
3493 linearize_expr (gimple stmt)
3495 gimple_stmt_iterator gsi;
3496 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3497 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3498 gimple oldbinrhs = binrhs;
3499 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3500 gimple newbinrhs = NULL;
3501 struct loop *loop = loop_containing_stmt (stmt);
3502 tree lhs = gimple_assign_lhs (stmt);
3504 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3505 && is_reassociable_op (binrhs, rhscode, loop));
3507 gsi = gsi_for_stmt (stmt);
3509 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3510 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3511 make_ssa_name (TREE_TYPE (lhs), NULL),
3512 gimple_assign_lhs (binlhs),
3513 gimple_assign_rhs2 (binrhs));
3514 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3515 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3516 gimple_set_uid (binrhs, gimple_uid (stmt));
3518 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3519 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3521 if (dump_file && (dump_flags & TDF_DETAILS))
3523 fprintf (dump_file, "Linearized: ");
3524 print_gimple_stmt (dump_file, stmt, 0, 0);
3527 reassociate_stats.linearized++;
3528 update_stmt (stmt);
3530 gsi = gsi_for_stmt (oldbinrhs);
3531 reassoc_remove_stmt (&gsi);
3532 release_defs (oldbinrhs);
3534 gimple_set_visited (stmt, true);
3535 gimple_set_visited (binlhs, true);
3536 gimple_set_visited (binrhs, true);
3538 /* Tail recurse on the new rhs if it still needs reassociation. */
3539 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3540 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3541 want to change the algorithm while converting to tuples. */
3542 linearize_expr (stmt);
3545 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3546 it. Otherwise, return NULL. */
3548 static gimple
3549 get_single_immediate_use (tree lhs)
3551 use_operand_p immuse;
3552 gimple immusestmt;
3554 if (TREE_CODE (lhs) == SSA_NAME
3555 && single_imm_use (lhs, &immuse, &immusestmt)
3556 && is_gimple_assign (immusestmt))
3557 return immusestmt;
3559 return NULL;
3562 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3563 representing the negated value. Insertions of any necessary
3564 instructions go before GSI.
3565 This function is recursive in that, if you hand it "a_5" as the
3566 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3567 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3569 static tree
3570 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3572 gimple negatedefstmt = NULL;
3573 tree resultofnegate;
3574 gimple_stmt_iterator gsi;
3575 unsigned int uid;
3577 /* If we are trying to negate a name, defined by an add, negate the
3578 add operands instead. */
3579 if (TREE_CODE (tonegate) == SSA_NAME)
3580 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3581 if (TREE_CODE (tonegate) == SSA_NAME
3582 && is_gimple_assign (negatedefstmt)
3583 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3584 && has_single_use (gimple_assign_lhs (negatedefstmt))
3585 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3587 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3588 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3589 tree lhs = gimple_assign_lhs (negatedefstmt);
3590 gimple g;
3592 gsi = gsi_for_stmt (negatedefstmt);
3593 rhs1 = negate_value (rhs1, &gsi);
3595 gsi = gsi_for_stmt (negatedefstmt);
3596 rhs2 = negate_value (rhs2, &gsi);
3598 gsi = gsi_for_stmt (negatedefstmt);
3599 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3600 gimple_set_visited (negatedefstmt, true);
3601 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3602 gimple_set_uid (g, gimple_uid (negatedefstmt));
3603 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3604 return lhs;
3607 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3608 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3609 NULL_TREE, true, GSI_SAME_STMT);
3610 gsi = *gsip;
3611 uid = gimple_uid (gsi_stmt (gsi));
3612 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3614 gimple stmt = gsi_stmt (gsi);
3615 if (gimple_uid (stmt) != 0)
3616 break;
3617 gimple_set_uid (stmt, uid);
3619 return resultofnegate;
3622 /* Return true if we should break up the subtract in STMT into an add
3623 with negate. This is true when we the subtract operands are really
3624 adds, or the subtract itself is used in an add expression. In
3625 either case, breaking up the subtract into an add with negate
3626 exposes the adds to reassociation. */
3628 static bool
3629 should_break_up_subtract (gimple stmt)
3631 tree lhs = gimple_assign_lhs (stmt);
3632 tree binlhs = gimple_assign_rhs1 (stmt);
3633 tree binrhs = gimple_assign_rhs2 (stmt);
3634 gimple immusestmt;
3635 struct loop *loop = loop_containing_stmt (stmt);
3637 if (TREE_CODE (binlhs) == SSA_NAME
3638 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3639 return true;
3641 if (TREE_CODE (binrhs) == SSA_NAME
3642 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3643 return true;
3645 if (TREE_CODE (lhs) == SSA_NAME
3646 && (immusestmt = get_single_immediate_use (lhs))
3647 && is_gimple_assign (immusestmt)
3648 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3649 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3650 return true;
3651 return false;
3654 /* Transform STMT from A - B into A + -B. */
3656 static void
3657 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3659 tree rhs1 = gimple_assign_rhs1 (stmt);
3660 tree rhs2 = gimple_assign_rhs2 (stmt);
3662 if (dump_file && (dump_flags & TDF_DETAILS))
3664 fprintf (dump_file, "Breaking up subtract ");
3665 print_gimple_stmt (dump_file, stmt, 0, 0);
3668 rhs2 = negate_value (rhs2, gsip);
3669 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3670 update_stmt (stmt);
3673 /* Determine whether STMT is a builtin call that raises an SSA name
3674 to an integer power and has only one use. If so, and this is early
3675 reassociation and unsafe math optimizations are permitted, place
3676 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3677 If any of these conditions does not hold, return FALSE. */
3679 static bool
3680 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3682 tree fndecl, arg1;
3683 REAL_VALUE_TYPE c, cint;
3685 if (!first_pass_instance
3686 || !flag_unsafe_math_optimizations
3687 || !is_gimple_call (stmt)
3688 || !has_single_use (gimple_call_lhs (stmt)))
3689 return false;
3691 fndecl = gimple_call_fndecl (stmt);
3693 if (!fndecl
3694 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3695 return false;
3697 switch (DECL_FUNCTION_CODE (fndecl))
3699 CASE_FLT_FN (BUILT_IN_POW):
3700 *base = gimple_call_arg (stmt, 0);
3701 arg1 = gimple_call_arg (stmt, 1);
3703 if (TREE_CODE (arg1) != REAL_CST)
3704 return false;
3706 c = TREE_REAL_CST (arg1);
3708 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3709 return false;
3711 *exponent = real_to_integer (&c);
3712 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3713 if (!real_identical (&c, &cint))
3714 return false;
3716 break;
3718 CASE_FLT_FN (BUILT_IN_POWI):
3719 *base = gimple_call_arg (stmt, 0);
3720 arg1 = gimple_call_arg (stmt, 1);
3722 if (!tree_fits_shwi_p (arg1))
3723 return false;
3725 *exponent = tree_to_shwi (arg1);
3726 break;
3728 default:
3729 return false;
3732 /* Expanding negative exponents is generally unproductive, so we don't
3733 complicate matters with those. Exponents of zero and one should
3734 have been handled by expression folding. */
3735 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3736 return false;
3738 return true;
3741 /* Recursively linearize a binary expression that is the RHS of STMT.
3742 Place the operands of the expression tree in the vector named OPS. */
3744 static void
3745 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3746 bool is_associative, bool set_visited)
3748 tree binlhs = gimple_assign_rhs1 (stmt);
3749 tree binrhs = gimple_assign_rhs2 (stmt);
3750 gimple binlhsdef = NULL, binrhsdef = NULL;
3751 bool binlhsisreassoc = false;
3752 bool binrhsisreassoc = false;
3753 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3754 struct loop *loop = loop_containing_stmt (stmt);
3755 tree base = NULL_TREE;
3756 HOST_WIDE_INT exponent = 0;
3758 if (set_visited)
3759 gimple_set_visited (stmt, true);
3761 if (TREE_CODE (binlhs) == SSA_NAME)
3763 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3764 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3765 && !stmt_could_throw_p (binlhsdef));
3768 if (TREE_CODE (binrhs) == SSA_NAME)
3770 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3771 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3772 && !stmt_could_throw_p (binrhsdef));
3775 /* If the LHS is not reassociable, but the RHS is, we need to swap
3776 them. If neither is reassociable, there is nothing we can do, so
3777 just put them in the ops vector. If the LHS is reassociable,
3778 linearize it. If both are reassociable, then linearize the RHS
3779 and the LHS. */
3781 if (!binlhsisreassoc)
3783 tree temp;
3785 /* If this is not a associative operation like division, give up. */
3786 if (!is_associative)
3788 add_to_ops_vec (ops, binrhs);
3789 return;
3792 if (!binrhsisreassoc)
3794 if (rhscode == MULT_EXPR
3795 && TREE_CODE (binrhs) == SSA_NAME
3796 && acceptable_pow_call (binrhsdef, &base, &exponent))
3798 add_repeat_to_ops_vec (ops, base, exponent);
3799 gimple_set_visited (binrhsdef, true);
3801 else
3802 add_to_ops_vec (ops, binrhs);
3804 if (rhscode == MULT_EXPR
3805 && TREE_CODE (binlhs) == SSA_NAME
3806 && acceptable_pow_call (binlhsdef, &base, &exponent))
3808 add_repeat_to_ops_vec (ops, base, exponent);
3809 gimple_set_visited (binlhsdef, true);
3811 else
3812 add_to_ops_vec (ops, binlhs);
3814 return;
3817 if (dump_file && (dump_flags & TDF_DETAILS))
3819 fprintf (dump_file, "swapping operands of ");
3820 print_gimple_stmt (dump_file, stmt, 0, 0);
3823 swap_ssa_operands (stmt,
3824 gimple_assign_rhs1_ptr (stmt),
3825 gimple_assign_rhs2_ptr (stmt));
3826 update_stmt (stmt);
3828 if (dump_file && (dump_flags & TDF_DETAILS))
3830 fprintf (dump_file, " is now ");
3831 print_gimple_stmt (dump_file, stmt, 0, 0);
3834 /* We want to make it so the lhs is always the reassociative op,
3835 so swap. */
3836 temp = binlhs;
3837 binlhs = binrhs;
3838 binrhs = temp;
3840 else if (binrhsisreassoc)
3842 linearize_expr (stmt);
3843 binlhs = gimple_assign_rhs1 (stmt);
3844 binrhs = gimple_assign_rhs2 (stmt);
3847 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3848 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3849 rhscode, loop));
3850 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3851 is_associative, set_visited);
3853 if (rhscode == MULT_EXPR
3854 && TREE_CODE (binrhs) == SSA_NAME
3855 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3857 add_repeat_to_ops_vec (ops, base, exponent);
3858 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3860 else
3861 add_to_ops_vec (ops, binrhs);
3864 /* Repropagate the negates back into subtracts, since no other pass
3865 currently does it. */
3867 static void
3868 repropagate_negates (void)
3870 unsigned int i = 0;
3871 tree negate;
3873 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3875 gimple user = get_single_immediate_use (negate);
3877 if (!user || !is_gimple_assign (user))
3878 continue;
3880 /* The negate operand can be either operand of a PLUS_EXPR
3881 (it can be the LHS if the RHS is a constant for example).
3883 Force the negate operand to the RHS of the PLUS_EXPR, then
3884 transform the PLUS_EXPR into a MINUS_EXPR. */
3885 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3887 /* If the negated operand appears on the LHS of the
3888 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3889 to force the negated operand to the RHS of the PLUS_EXPR. */
3890 if (gimple_assign_rhs1 (user) == negate)
3892 swap_ssa_operands (user,
3893 gimple_assign_rhs1_ptr (user),
3894 gimple_assign_rhs2_ptr (user));
3897 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3898 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3899 if (gimple_assign_rhs2 (user) == negate)
3901 tree rhs1 = gimple_assign_rhs1 (user);
3902 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3903 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3904 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3905 update_stmt (user);
3908 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3910 if (gimple_assign_rhs1 (user) == negate)
3912 /* We have
3913 x = -a
3914 y = x - b
3915 which we transform into
3916 x = a + b
3917 y = -x .
3918 This pushes down the negate which we possibly can merge
3919 into some other operation, hence insert it into the
3920 plus_negates vector. */
3921 gimple feed = SSA_NAME_DEF_STMT (negate);
3922 tree a = gimple_assign_rhs1 (feed);
3923 tree b = gimple_assign_rhs2 (user);
3924 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3925 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3926 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3927 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3928 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3929 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3930 user = gsi_stmt (gsi2);
3931 update_stmt (user);
3932 reassoc_remove_stmt (&gsi);
3933 release_defs (feed);
3934 plus_negates.safe_push (gimple_assign_lhs (user));
3936 else
3938 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3939 rid of one operation. */
3940 gimple feed = SSA_NAME_DEF_STMT (negate);
3941 tree a = gimple_assign_rhs1 (feed);
3942 tree rhs1 = gimple_assign_rhs1 (user);
3943 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3944 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3945 update_stmt (gsi_stmt (gsi));
3951 /* Returns true if OP is of a type for which we can do reassociation.
3952 That is for integral or non-saturating fixed-point types, and for
3953 floating point type when associative-math is enabled. */
3955 static bool
3956 can_reassociate_p (tree op)
3958 tree type = TREE_TYPE (op);
3959 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3960 || NON_SAT_FIXED_POINT_TYPE_P (type)
3961 || (flag_associative_math && FLOAT_TYPE_P (type)))
3962 return true;
3963 return false;
3966 /* Break up subtract operations in block BB.
3968 We do this top down because we don't know whether the subtract is
3969 part of a possible chain of reassociation except at the top.
3971 IE given
3972 d = f + g
3973 c = a + e
3974 b = c - d
3975 q = b - r
3976 k = t - q
3978 we want to break up k = t - q, but we won't until we've transformed q
3979 = b - r, which won't be broken up until we transform b = c - d.
3981 En passant, clear the GIMPLE visited flag on every statement
3982 and set UIDs within each basic block. */
3984 static void
3985 break_up_subtract_bb (basic_block bb)
3987 gimple_stmt_iterator gsi;
3988 basic_block son;
3989 unsigned int uid = 1;
3991 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3993 gimple stmt = gsi_stmt (gsi);
3994 gimple_set_visited (stmt, false);
3995 gimple_set_uid (stmt, uid++);
3997 if (!is_gimple_assign (stmt)
3998 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3999 continue;
4001 /* Look for simple gimple subtract operations. */
4002 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4004 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4005 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4006 continue;
4008 /* Check for a subtract used only in an addition. If this
4009 is the case, transform it into add of a negate for better
4010 reassociation. IE transform C = A-B into C = A + -B if C
4011 is only used in an addition. */
4012 if (should_break_up_subtract (stmt))
4013 break_up_subtract (stmt, &gsi);
4015 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4016 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4017 plus_negates.safe_push (gimple_assign_lhs (stmt));
4019 for (son = first_dom_son (CDI_DOMINATORS, bb);
4020 son;
4021 son = next_dom_son (CDI_DOMINATORS, son))
4022 break_up_subtract_bb (son);
4025 /* Used for repeated factor analysis. */
4026 struct repeat_factor_d
4028 /* An SSA name that occurs in a multiply chain. */
4029 tree factor;
4031 /* Cached rank of the factor. */
4032 unsigned rank;
4034 /* Number of occurrences of the factor in the chain. */
4035 HOST_WIDE_INT count;
4037 /* An SSA name representing the product of this factor and
4038 all factors appearing later in the repeated factor vector. */
4039 tree repr;
4042 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4043 typedef const struct repeat_factor_d *const_repeat_factor_t;
4046 static vec<repeat_factor> repeat_factor_vec;
4048 /* Used for sorting the repeat factor vector. Sort primarily by
4049 ascending occurrence count, secondarily by descending rank. */
4051 static int
4052 compare_repeat_factors (const void *x1, const void *x2)
4054 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4055 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4057 if (rf1->count != rf2->count)
4058 return rf1->count - rf2->count;
4060 return rf2->rank - rf1->rank;
4063 /* Look for repeated operands in OPS in the multiply tree rooted at
4064 STMT. Replace them with an optimal sequence of multiplies and powi
4065 builtin calls, and remove the used operands from OPS. Return an
4066 SSA name representing the value of the replacement sequence. */
4068 static tree
4069 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4071 unsigned i, j, vec_len;
4072 int ii;
4073 operand_entry_t oe;
4074 repeat_factor_t rf1, rf2;
4075 repeat_factor rfnew;
4076 tree result = NULL_TREE;
4077 tree target_ssa, iter_result;
4078 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4079 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4080 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4081 gimple mul_stmt, pow_stmt;
4083 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4084 target. */
4085 if (!powi_fndecl)
4086 return NULL_TREE;
4088 /* Allocate the repeated factor vector. */
4089 repeat_factor_vec.create (10);
4091 /* Scan the OPS vector for all SSA names in the product and build
4092 up a vector of occurrence counts for each factor. */
4093 FOR_EACH_VEC_ELT (*ops, i, oe)
4095 if (TREE_CODE (oe->op) == SSA_NAME)
4097 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4099 if (rf1->factor == oe->op)
4101 rf1->count += oe->count;
4102 break;
4106 if (j >= repeat_factor_vec.length ())
4108 rfnew.factor = oe->op;
4109 rfnew.rank = oe->rank;
4110 rfnew.count = oe->count;
4111 rfnew.repr = NULL_TREE;
4112 repeat_factor_vec.safe_push (rfnew);
4117 /* Sort the repeated factor vector by (a) increasing occurrence count,
4118 and (b) decreasing rank. */
4119 repeat_factor_vec.qsort (compare_repeat_factors);
4121 /* It is generally best to combine as many base factors as possible
4122 into a product before applying __builtin_powi to the result.
4123 However, the sort order chosen for the repeated factor vector
4124 allows us to cache partial results for the product of the base
4125 factors for subsequent use. When we already have a cached partial
4126 result from a previous iteration, it is best to make use of it
4127 before looking for another __builtin_pow opportunity.
4129 As an example, consider x * x * y * y * y * z * z * z * z.
4130 We want to first compose the product x * y * z, raise it to the
4131 second power, then multiply this by y * z, and finally multiply
4132 by z. This can be done in 5 multiplies provided we cache y * z
4133 for use in both expressions:
4135 t1 = y * z
4136 t2 = t1 * x
4137 t3 = t2 * t2
4138 t4 = t1 * t3
4139 result = t4 * z
4141 If we instead ignored the cached y * z and first multiplied by
4142 the __builtin_pow opportunity z * z, we would get the inferior:
4144 t1 = y * z
4145 t2 = t1 * x
4146 t3 = t2 * t2
4147 t4 = z * z
4148 t5 = t3 * t4
4149 result = t5 * y */
4151 vec_len = repeat_factor_vec.length ();
4153 /* Repeatedly look for opportunities to create a builtin_powi call. */
4154 while (true)
4156 HOST_WIDE_INT power;
4158 /* First look for the largest cached product of factors from
4159 preceding iterations. If found, create a builtin_powi for
4160 it if the minimum occurrence count for its factors is at
4161 least 2, or just use this cached product as our next
4162 multiplicand if the minimum occurrence count is 1. */
4163 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4165 if (rf1->repr && rf1->count > 0)
4166 break;
4169 if (j < vec_len)
4171 power = rf1->count;
4173 if (power == 1)
4175 iter_result = rf1->repr;
4177 if (dump_file && (dump_flags & TDF_DETAILS))
4179 unsigned elt;
4180 repeat_factor_t rf;
4181 fputs ("Multiplying by cached product ", dump_file);
4182 for (elt = j; elt < vec_len; elt++)
4184 rf = &repeat_factor_vec[elt];
4185 print_generic_expr (dump_file, rf->factor, 0);
4186 if (elt < vec_len - 1)
4187 fputs (" * ", dump_file);
4189 fputs ("\n", dump_file);
4192 else
4194 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4195 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4196 build_int_cst (integer_type_node,
4197 power));
4198 gimple_call_set_lhs (pow_stmt, iter_result);
4199 gimple_set_location (pow_stmt, gimple_location (stmt));
4200 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4202 if (dump_file && (dump_flags & TDF_DETAILS))
4204 unsigned elt;
4205 repeat_factor_t rf;
4206 fputs ("Building __builtin_pow call for cached product (",
4207 dump_file);
4208 for (elt = j; elt < vec_len; elt++)
4210 rf = &repeat_factor_vec[elt];
4211 print_generic_expr (dump_file, rf->factor, 0);
4212 if (elt < vec_len - 1)
4213 fputs (" * ", dump_file);
4215 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4216 power);
4220 else
4222 /* Otherwise, find the first factor in the repeated factor
4223 vector whose occurrence count is at least 2. If no such
4224 factor exists, there are no builtin_powi opportunities
4225 remaining. */
4226 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4228 if (rf1->count >= 2)
4229 break;
4232 if (j >= vec_len)
4233 break;
4235 power = rf1->count;
4237 if (dump_file && (dump_flags & TDF_DETAILS))
4239 unsigned elt;
4240 repeat_factor_t rf;
4241 fputs ("Building __builtin_pow call for (", dump_file);
4242 for (elt = j; elt < vec_len; elt++)
4244 rf = &repeat_factor_vec[elt];
4245 print_generic_expr (dump_file, rf->factor, 0);
4246 if (elt < vec_len - 1)
4247 fputs (" * ", dump_file);
4249 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4252 reassociate_stats.pows_created++;
4254 /* Visit each element of the vector in reverse order (so that
4255 high-occurrence elements are visited first, and within the
4256 same occurrence count, lower-ranked elements are visited
4257 first). Form a linear product of all elements in this order
4258 whose occurrencce count is at least that of element J.
4259 Record the SSA name representing the product of each element
4260 with all subsequent elements in the vector. */
4261 if (j == vec_len - 1)
4262 rf1->repr = rf1->factor;
4263 else
4265 for (ii = vec_len - 2; ii >= (int)j; ii--)
4267 tree op1, op2;
4269 rf1 = &repeat_factor_vec[ii];
4270 rf2 = &repeat_factor_vec[ii + 1];
4272 /* Init the last factor's representative to be itself. */
4273 if (!rf2->repr)
4274 rf2->repr = rf2->factor;
4276 op1 = rf1->factor;
4277 op2 = rf2->repr;
4279 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4280 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4281 target_ssa,
4282 op1, op2);
4283 gimple_set_location (mul_stmt, gimple_location (stmt));
4284 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4285 rf1->repr = target_ssa;
4287 /* Don't reprocess the multiply we just introduced. */
4288 gimple_set_visited (mul_stmt, true);
4292 /* Form a call to __builtin_powi for the maximum product
4293 just formed, raised to the power obtained earlier. */
4294 rf1 = &repeat_factor_vec[j];
4295 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4296 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4297 build_int_cst (integer_type_node,
4298 power));
4299 gimple_call_set_lhs (pow_stmt, iter_result);
4300 gimple_set_location (pow_stmt, gimple_location (stmt));
4301 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4304 /* If we previously formed at least one other builtin_powi call,
4305 form the product of this one and those others. */
4306 if (result)
4308 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4309 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4310 result, iter_result);
4311 gimple_set_location (mul_stmt, gimple_location (stmt));
4312 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4313 gimple_set_visited (mul_stmt, true);
4314 result = new_result;
4316 else
4317 result = iter_result;
4319 /* Decrement the occurrence count of each element in the product
4320 by the count found above, and remove this many copies of each
4321 factor from OPS. */
4322 for (i = j; i < vec_len; i++)
4324 unsigned k = power;
4325 unsigned n;
4327 rf1 = &repeat_factor_vec[i];
4328 rf1->count -= power;
4330 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4332 if (oe->op == rf1->factor)
4334 if (oe->count <= k)
4336 ops->ordered_remove (n);
4337 k -= oe->count;
4339 if (k == 0)
4340 break;
4342 else
4344 oe->count -= k;
4345 break;
4352 /* At this point all elements in the repeated factor vector have a
4353 remaining occurrence count of 0 or 1, and those with a count of 1
4354 don't have cached representatives. Re-sort the ops vector and
4355 clean up. */
4356 ops->qsort (sort_by_operand_rank);
4357 repeat_factor_vec.release ();
4359 /* Return the final product computed herein. Note that there may
4360 still be some elements with single occurrence count left in OPS;
4361 those will be handled by the normal reassociation logic. */
4362 return result;
4365 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4367 static void
4368 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4370 tree rhs1;
4372 if (dump_file && (dump_flags & TDF_DETAILS))
4374 fprintf (dump_file, "Transforming ");
4375 print_gimple_stmt (dump_file, stmt, 0, 0);
4378 rhs1 = gimple_assign_rhs1 (stmt);
4379 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4380 update_stmt (stmt);
4381 remove_visited_stmt_chain (rhs1);
4383 if (dump_file && (dump_flags & TDF_DETAILS))
4385 fprintf (dump_file, " into ");
4386 print_gimple_stmt (dump_file, stmt, 0, 0);
4390 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4392 static void
4393 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4394 tree rhs1, tree rhs2)
4396 if (dump_file && (dump_flags & TDF_DETAILS))
4398 fprintf (dump_file, "Transforming ");
4399 print_gimple_stmt (dump_file, stmt, 0, 0);
4402 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4403 update_stmt (gsi_stmt (*gsi));
4404 remove_visited_stmt_chain (rhs1);
4406 if (dump_file && (dump_flags & TDF_DETAILS))
4408 fprintf (dump_file, " into ");
4409 print_gimple_stmt (dump_file, stmt, 0, 0);
4413 /* Reassociate expressions in basic block BB and its post-dominator as
4414 children. */
4416 static void
4417 reassociate_bb (basic_block bb)
4419 gimple_stmt_iterator gsi;
4420 basic_block son;
4421 gimple stmt = last_stmt (bb);
4423 if (stmt && !gimple_visited_p (stmt))
4424 maybe_optimize_range_tests (stmt);
4426 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4428 stmt = gsi_stmt (gsi);
4430 if (is_gimple_assign (stmt)
4431 && !stmt_could_throw_p (stmt))
4433 tree lhs, rhs1, rhs2;
4434 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4436 /* If this is not a gimple binary expression, there is
4437 nothing for us to do with it. */
4438 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4439 continue;
4441 /* If this was part of an already processed statement,
4442 we don't need to touch it again. */
4443 if (gimple_visited_p (stmt))
4445 /* This statement might have become dead because of previous
4446 reassociations. */
4447 if (has_zero_uses (gimple_get_lhs (stmt)))
4449 reassoc_remove_stmt (&gsi);
4450 release_defs (stmt);
4451 /* We might end up removing the last stmt above which
4452 places the iterator to the end of the sequence.
4453 Reset it to the last stmt in this case which might
4454 be the end of the sequence as well if we removed
4455 the last statement of the sequence. In which case
4456 we need to bail out. */
4457 if (gsi_end_p (gsi))
4459 gsi = gsi_last_bb (bb);
4460 if (gsi_end_p (gsi))
4461 break;
4464 continue;
4467 lhs = gimple_assign_lhs (stmt);
4468 rhs1 = gimple_assign_rhs1 (stmt);
4469 rhs2 = gimple_assign_rhs2 (stmt);
4471 /* For non-bit or min/max operations we can't associate
4472 all types. Verify that here. */
4473 if (rhs_code != BIT_IOR_EXPR
4474 && rhs_code != BIT_AND_EXPR
4475 && rhs_code != BIT_XOR_EXPR
4476 && rhs_code != MIN_EXPR
4477 && rhs_code != MAX_EXPR
4478 && (!can_reassociate_p (lhs)
4479 || !can_reassociate_p (rhs1)
4480 || !can_reassociate_p (rhs2)))
4481 continue;
4483 if (associative_tree_code (rhs_code))
4485 auto_vec<operand_entry_t> ops;
4486 tree powi_result = NULL_TREE;
4488 /* There may be no immediate uses left by the time we
4489 get here because we may have eliminated them all. */
4490 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4491 continue;
4493 gimple_set_visited (stmt, true);
4494 linearize_expr_tree (&ops, stmt, true, true);
4495 ops.qsort (sort_by_operand_rank);
4496 optimize_ops_list (rhs_code, &ops);
4497 if (undistribute_ops_list (rhs_code, &ops,
4498 loop_containing_stmt (stmt)))
4500 ops.qsort (sort_by_operand_rank);
4501 optimize_ops_list (rhs_code, &ops);
4504 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4505 optimize_range_tests (rhs_code, &ops);
4507 if (first_pass_instance
4508 && rhs_code == MULT_EXPR
4509 && flag_unsafe_math_optimizations)
4510 powi_result = attempt_builtin_powi (stmt, &ops);
4512 /* If the operand vector is now empty, all operands were
4513 consumed by the __builtin_powi optimization. */
4514 if (ops.length () == 0)
4515 transform_stmt_to_copy (&gsi, stmt, powi_result);
4516 else if (ops.length () == 1)
4518 tree last_op = ops.last ()->op;
4520 if (powi_result)
4521 transform_stmt_to_multiply (&gsi, stmt, last_op,
4522 powi_result);
4523 else
4524 transform_stmt_to_copy (&gsi, stmt, last_op);
4526 else
4528 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4529 int ops_num = ops.length ();
4530 int width = get_reassociation_width (ops_num, rhs_code, mode);
4531 tree new_lhs = lhs;
4533 if (dump_file && (dump_flags & TDF_DETAILS))
4534 fprintf (dump_file,
4535 "Width = %d was chosen for reassociation\n", width);
4537 if (width > 1
4538 && ops.length () > 3)
4539 rewrite_expr_tree_parallel (stmt, width, ops);
4540 else
4542 /* When there are three operands left, we want
4543 to make sure the ones that get the double
4544 binary op are chosen wisely. */
4545 int len = ops.length ();
4546 if (len >= 3)
4547 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4549 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4550 powi_result != NULL);
4553 /* If we combined some repeated factors into a
4554 __builtin_powi call, multiply that result by the
4555 reassociated operands. */
4556 if (powi_result)
4558 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4559 tree type = TREE_TYPE (lhs);
4560 tree target_ssa = make_temp_ssa_name (type, NULL,
4561 "reassocpow");
4562 gimple_set_lhs (lhs_stmt, target_ssa);
4563 update_stmt (lhs_stmt);
4564 if (lhs != new_lhs)
4565 target_ssa = new_lhs;
4566 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4567 powi_result,
4568 target_ssa);
4569 gimple_set_location (mul_stmt, gimple_location (stmt));
4570 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4576 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4577 son;
4578 son = next_dom_son (CDI_POST_DOMINATORS, son))
4579 reassociate_bb (son);
4582 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4583 void debug_ops_vector (vec<operand_entry_t> ops);
4585 /* Dump the operand entry vector OPS to FILE. */
4587 void
4588 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4590 operand_entry_t oe;
4591 unsigned int i;
4593 FOR_EACH_VEC_ELT (ops, i, oe)
4595 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4596 print_generic_expr (file, oe->op, 0);
4600 /* Dump the operand entry vector OPS to STDERR. */
4602 DEBUG_FUNCTION void
4603 debug_ops_vector (vec<operand_entry_t> ops)
4605 dump_ops_vector (stderr, ops);
4608 static void
4609 do_reassoc (void)
4611 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4612 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4615 /* Initialize the reassociation pass. */
4617 static void
4618 init_reassoc (void)
4620 int i;
4621 long rank = 2;
4622 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4624 /* Find the loops, so that we can prevent moving calculations in
4625 them. */
4626 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4628 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4630 operand_entry_pool = create_alloc_pool ("operand entry pool",
4631 sizeof (struct operand_entry), 30);
4632 next_operand_entry_id = 0;
4634 /* Reverse RPO (Reverse Post Order) will give us something where
4635 deeper loops come later. */
4636 pre_and_rev_post_order_compute (NULL, bbs, false);
4637 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4638 operand_rank = pointer_map_create ();
4640 /* Give each default definition a distinct rank. This includes
4641 parameters and the static chain. Walk backwards over all
4642 SSA names so that we get proper rank ordering according
4643 to tree_swap_operands_p. */
4644 for (i = num_ssa_names - 1; i > 0; --i)
4646 tree name = ssa_name (i);
4647 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4648 insert_operand_rank (name, ++rank);
4651 /* Set up rank for each BB */
4652 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4653 bb_rank[bbs[i]] = ++rank << 16;
4655 free (bbs);
4656 calculate_dominance_info (CDI_POST_DOMINATORS);
4657 plus_negates = vNULL;
4660 /* Cleanup after the reassociation pass, and print stats if
4661 requested. */
4663 static void
4664 fini_reassoc (void)
4666 statistics_counter_event (cfun, "Linearized",
4667 reassociate_stats.linearized);
4668 statistics_counter_event (cfun, "Constants eliminated",
4669 reassociate_stats.constants_eliminated);
4670 statistics_counter_event (cfun, "Ops eliminated",
4671 reassociate_stats.ops_eliminated);
4672 statistics_counter_event (cfun, "Statements rewritten",
4673 reassociate_stats.rewritten);
4674 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4675 reassociate_stats.pows_encountered);
4676 statistics_counter_event (cfun, "Built-in powi calls created",
4677 reassociate_stats.pows_created);
4679 pointer_map_destroy (operand_rank);
4680 free_alloc_pool (operand_entry_pool);
4681 free (bb_rank);
4682 plus_negates.release ();
4683 free_dominance_info (CDI_POST_DOMINATORS);
4684 loop_optimizer_finalize ();
4687 /* Gate and execute functions for Reassociation. */
4689 static unsigned int
4690 execute_reassoc (void)
4692 init_reassoc ();
4694 do_reassoc ();
4695 repropagate_negates ();
4697 fini_reassoc ();
4698 return 0;
4701 namespace {
4703 const pass_data pass_data_reassoc =
4705 GIMPLE_PASS, /* type */
4706 "reassoc", /* name */
4707 OPTGROUP_NONE, /* optinfo_flags */
4708 TV_TREE_REASSOC, /* tv_id */
4709 ( PROP_cfg | PROP_ssa ), /* properties_required */
4710 0, /* properties_provided */
4711 0, /* properties_destroyed */
4712 0, /* todo_flags_start */
4713 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
4716 class pass_reassoc : public gimple_opt_pass
4718 public:
4719 pass_reassoc (gcc::context *ctxt)
4720 : gimple_opt_pass (pass_data_reassoc, ctxt)
4723 /* opt_pass methods: */
4724 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4725 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
4726 virtual unsigned int execute (function *) { return execute_reassoc (); }
4728 }; // class pass_reassoc
4730 } // anon namespace
4732 gimple_opt_pass *
4733 make_pass_reassoc (gcc::context *ctxt)
4735 return new pass_reassoc (ctxt);