* rtl.h (insn_location): Declare.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob3d811f06893912a7b6d88f667f9f84455dc0ed88
1 /* Reassociation for trees.
2 Copyright (C) 2005-2014 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "tree.h"
29 #include "stor-layout.h"
30 #include "basic-block.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-inline.h"
33 #include "pointer-set.h"
34 #include "tree-ssa-alias.h"
35 #include "internal-fn.h"
36 #include "gimple-fold.h"
37 #include "tree-eh.h"
38 #include "gimple-expr.h"
39 #include "is-a.h"
40 #include "gimple.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
43 #include "gimple-ssa.h"
44 #include "tree-cfg.h"
45 #include "tree-phinodes.h"
46 #include "ssa-iterators.h"
47 #include "stringpool.h"
48 #include "tree-ssanames.h"
49 #include "tree-ssa-loop-niter.h"
50 #include "tree-ssa-loop.h"
51 #include "expr.h"
52 #include "tree-dfa.h"
53 #include "tree-ssa.h"
54 #include "tree-iterator.h"
55 #include "tree-pass.h"
56 #include "alloc-pool.h"
57 #include "langhooks.h"
58 #include "cfgloop.h"
59 #include "flags.h"
60 #include "target.h"
61 #include "params.h"
62 #include "diagnostic-core.h"
63 #include "builtins.h"
65 /* This is a simple global reassociation pass. It is, in part, based
66 on the LLVM pass of the same name (They do some things more/less
67 than we do, in different orders, etc).
69 It consists of five steps:
71 1. Breaking up subtract operations into addition + negate, where
72 it would promote the reassociation of adds.
74 2. Left linearization of the expression trees, so that (A+B)+(C+D)
75 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
76 During linearization, we place the operands of the binary
77 expressions into a vector of operand_entry_t
79 3. Optimization of the operand lists, eliminating things like a +
80 -a, a & a, etc.
82 3a. Combine repeated factors with the same occurrence counts
83 into a __builtin_powi call that will later be optimized into
84 an optimal number of multiplies.
86 4. Rewrite the expression trees we linearized and optimized so
87 they are in proper rank order.
89 5. Repropagate negates, as nothing else will clean it up ATM.
91 A bit of theory on #4, since nobody seems to write anything down
92 about why it makes sense to do it the way they do it:
94 We could do this much nicer theoretically, but don't (for reasons
95 explained after how to do it theoretically nice :P).
97 In order to promote the most redundancy elimination, you want
98 binary expressions whose operands are the same rank (or
99 preferably, the same value) exposed to the redundancy eliminator,
100 for possible elimination.
102 So the way to do this if we really cared, is to build the new op
103 tree from the leaves to the roots, merging as you go, and putting the
104 new op on the end of the worklist, until you are left with one
105 thing on the worklist.
107 IE if you have to rewrite the following set of operands (listed with
108 rank in parentheses), with opcode PLUS_EXPR:
110 a (1), b (1), c (1), d (2), e (2)
113 We start with our merge worklist empty, and the ops list with all of
114 those on it.
116 You want to first merge all leaves of the same rank, as much as
117 possible.
119 So first build a binary op of
121 mergetmp = a + b, and put "mergetmp" on the merge worklist.
123 Because there is no three operand form of PLUS_EXPR, c is not going to
124 be exposed to redundancy elimination as a rank 1 operand.
126 So you might as well throw it on the merge worklist (you could also
127 consider it to now be a rank two operand, and merge it with d and e,
128 but in this case, you then have evicted e from a binary op. So at
129 least in this situation, you can't win.)
131 Then build a binary op of d + e
132 mergetmp2 = d + e
134 and put mergetmp2 on the merge worklist.
136 so merge worklist = {mergetmp, c, mergetmp2}
138 Continue building binary ops of these operations until you have only
139 one operation left on the worklist.
141 So we have
143 build binary op
144 mergetmp3 = mergetmp + c
146 worklist = {mergetmp2, mergetmp3}
148 mergetmp4 = mergetmp2 + mergetmp3
150 worklist = {mergetmp4}
152 because we have one operation left, we can now just set the original
153 statement equal to the result of that operation.
155 This will at least expose a + b and d + e to redundancy elimination
156 as binary operations.
158 For extra points, you can reuse the old statements to build the
159 mergetmps, since you shouldn't run out.
161 So why don't we do this?
163 Because it's expensive, and rarely will help. Most trees we are
164 reassociating have 3 or less ops. If they have 2 ops, they already
165 will be written into a nice single binary op. If you have 3 ops, a
166 single simple check suffices to tell you whether the first two are of the
167 same rank. If so, you know to order it
169 mergetmp = op1 + op2
170 newstmt = mergetmp + op3
172 instead of
173 mergetmp = op2 + op3
174 newstmt = mergetmp + op1
176 If all three are of the same rank, you can't expose them all in a
177 single binary operator anyway, so the above is *still* the best you
178 can do.
180 Thus, this is what we do. When we have three ops left, we check to see
181 what order to put them in, and call it a day. As a nod to vector sum
182 reduction, we check if any of the ops are really a phi node that is a
183 destructive update for the associating op, and keep the destructive
184 update together for vector sum reduction recognition. */
187 /* Statistics */
188 static struct
190 int linearized;
191 int constants_eliminated;
192 int ops_eliminated;
193 int rewritten;
194 int pows_encountered;
195 int pows_created;
196 } reassociate_stats;
198 /* Operator, rank pair. */
199 typedef struct operand_entry
201 unsigned int rank;
202 int id;
203 tree op;
204 unsigned int count;
205 } *operand_entry_t;
207 static alloc_pool operand_entry_pool;
209 /* This is used to assign a unique ID to each struct operand_entry
210 so that qsort results are identical on different hosts. */
211 static int next_operand_entry_id;
213 /* Starting rank number for a given basic block, so that we can rank
214 operations using unmovable instructions in that BB based on the bb
215 depth. */
216 static long *bb_rank;
218 /* Operand->rank hashtable. */
219 static struct pointer_map_t *operand_rank;
221 /* Forward decls. */
222 static long get_rank (tree);
223 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
225 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
226 possibly added by gsi_remove. */
228 bool
229 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
231 gimple stmt = gsi_stmt (*gsi);
233 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
234 return gsi_remove (gsi, true);
236 gimple_stmt_iterator prev = *gsi;
237 gsi_prev (&prev);
238 unsigned uid = gimple_uid (stmt);
239 basic_block bb = gimple_bb (stmt);
240 bool ret = gsi_remove (gsi, true);
241 if (!gsi_end_p (prev))
242 gsi_next (&prev);
243 else
244 prev = gsi_start_bb (bb);
245 gimple end_stmt = gsi_stmt (*gsi);
246 while ((stmt = gsi_stmt (prev)) != end_stmt)
248 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
249 gimple_set_uid (stmt, uid);
250 gsi_next (&prev);
252 return ret;
255 /* Bias amount for loop-carried phis. We want this to be larger than
256 the depth of any reassociation tree we can see, but not larger than
257 the rank difference between two blocks. */
258 #define PHI_LOOP_BIAS (1 << 15)
260 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
261 an innermost loop, and the phi has only a single use which is inside
262 the loop, then the rank is the block rank of the loop latch plus an
263 extra bias for the loop-carried dependence. This causes expressions
264 calculated into an accumulator variable to be independent for each
265 iteration of the loop. If STMT is some other phi, the rank is the
266 block rank of its containing block. */
267 static long
268 phi_rank (gimple stmt)
270 basic_block bb = gimple_bb (stmt);
271 struct loop *father = bb->loop_father;
272 tree res;
273 unsigned i;
274 use_operand_p use;
275 gimple use_stmt;
277 /* We only care about real loops (those with a latch). */
278 if (!father->latch)
279 return bb_rank[bb->index];
281 /* Interesting phis must be in headers of innermost loops. */
282 if (bb != father->header
283 || father->inner)
284 return bb_rank[bb->index];
286 /* Ignore virtual SSA_NAMEs. */
287 res = gimple_phi_result (stmt);
288 if (virtual_operand_p (res))
289 return bb_rank[bb->index];
291 /* The phi definition must have a single use, and that use must be
292 within the loop. Otherwise this isn't an accumulator pattern. */
293 if (!single_imm_use (res, &use, &use_stmt)
294 || gimple_bb (use_stmt)->loop_father != father)
295 return bb_rank[bb->index];
297 /* Look for phi arguments from within the loop. If found, bias this phi. */
298 for (i = 0; i < gimple_phi_num_args (stmt); i++)
300 tree arg = gimple_phi_arg_def (stmt, i);
301 if (TREE_CODE (arg) == SSA_NAME
302 && !SSA_NAME_IS_DEFAULT_DEF (arg))
304 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
305 if (gimple_bb (def_stmt)->loop_father == father)
306 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
310 /* Must be an uninteresting phi. */
311 return bb_rank[bb->index];
314 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
315 loop-carried dependence of an innermost loop, return TRUE; else
316 return FALSE. */
317 static bool
318 loop_carried_phi (tree exp)
320 gimple phi_stmt;
321 long block_rank;
323 if (TREE_CODE (exp) != SSA_NAME
324 || SSA_NAME_IS_DEFAULT_DEF (exp))
325 return false;
327 phi_stmt = SSA_NAME_DEF_STMT (exp);
329 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
330 return false;
332 /* Non-loop-carried phis have block rank. Loop-carried phis have
333 an additional bias added in. If this phi doesn't have block rank,
334 it's biased and should not be propagated. */
335 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
337 if (phi_rank (phi_stmt) != block_rank)
338 return true;
340 return false;
343 /* Return the maximum of RANK and the rank that should be propagated
344 from expression OP. For most operands, this is just the rank of OP.
345 For loop-carried phis, the value is zero to avoid undoing the bias
346 in favor of the phi. */
347 static long
348 propagate_rank (long rank, tree op)
350 long op_rank;
352 if (loop_carried_phi (op))
353 return rank;
355 op_rank = get_rank (op);
357 return MAX (rank, op_rank);
360 /* Look up the operand rank structure for expression E. */
362 static inline long
363 find_operand_rank (tree e)
365 void **slot = pointer_map_contains (operand_rank, e);
366 return slot ? (long) (intptr_t) *slot : -1;
369 /* Insert {E,RANK} into the operand rank hashtable. */
371 static inline void
372 insert_operand_rank (tree e, long rank)
374 void **slot;
375 gcc_assert (rank > 0);
376 slot = pointer_map_insert (operand_rank, e);
377 gcc_assert (!*slot);
378 *slot = (void *) (intptr_t) rank;
381 /* Given an expression E, return the rank of the expression. */
383 static long
384 get_rank (tree e)
386 /* Constants have rank 0. */
387 if (is_gimple_min_invariant (e))
388 return 0;
390 /* SSA_NAME's have the rank of the expression they are the result
392 For globals and uninitialized values, the rank is 0.
393 For function arguments, use the pre-setup rank.
394 For PHI nodes, stores, asm statements, etc, we use the rank of
395 the BB.
396 For simple operations, the rank is the maximum rank of any of
397 its operands, or the bb_rank, whichever is less.
398 I make no claims that this is optimal, however, it gives good
399 results. */
401 /* We make an exception to the normal ranking system to break
402 dependences of accumulator variables in loops. Suppose we
403 have a simple one-block loop containing:
405 x_1 = phi(x_0, x_2)
406 b = a + x_1
407 c = b + d
408 x_2 = c + e
410 As shown, each iteration of the calculation into x is fully
411 dependent upon the iteration before it. We would prefer to
412 see this in the form:
414 x_1 = phi(x_0, x_2)
415 b = a + d
416 c = b + e
417 x_2 = c + x_1
419 If the loop is unrolled, the calculations of b and c from
420 different iterations can be interleaved.
422 To obtain this result during reassociation, we bias the rank
423 of the phi definition x_1 upward, when it is recognized as an
424 accumulator pattern. The artificial rank causes it to be
425 added last, providing the desired independence. */
427 if (TREE_CODE (e) == SSA_NAME)
429 gimple stmt;
430 long rank;
431 int i, n;
432 tree op;
434 if (SSA_NAME_IS_DEFAULT_DEF (e))
435 return find_operand_rank (e);
437 stmt = SSA_NAME_DEF_STMT (e);
438 if (gimple_code (stmt) == GIMPLE_PHI)
439 return phi_rank (stmt);
441 if (!is_gimple_assign (stmt)
442 || gimple_vdef (stmt))
443 return bb_rank[gimple_bb (stmt)->index];
445 /* If we already have a rank for this expression, use that. */
446 rank = find_operand_rank (e);
447 if (rank != -1)
448 return rank;
450 /* Otherwise, find the maximum rank for the operands. As an
451 exception, remove the bias from loop-carried phis when propagating
452 the rank so that dependent operations are not also biased. */
453 rank = 0;
454 if (gimple_assign_single_p (stmt))
456 tree rhs = gimple_assign_rhs1 (stmt);
457 n = TREE_OPERAND_LENGTH (rhs);
458 if (n == 0)
459 rank = propagate_rank (rank, rhs);
460 else
462 for (i = 0; i < n; i++)
464 op = TREE_OPERAND (rhs, i);
466 if (op != NULL_TREE)
467 rank = propagate_rank (rank, op);
471 else
473 n = gimple_num_ops (stmt);
474 for (i = 1; i < n; i++)
476 op = gimple_op (stmt, i);
477 gcc_assert (op);
478 rank = propagate_rank (rank, op);
482 if (dump_file && (dump_flags & TDF_DETAILS))
484 fprintf (dump_file, "Rank for ");
485 print_generic_expr (dump_file, e, 0);
486 fprintf (dump_file, " is %ld\n", (rank + 1));
489 /* Note the rank in the hashtable so we don't recompute it. */
490 insert_operand_rank (e, (rank + 1));
491 return (rank + 1);
494 /* Globals, etc, are rank 0 */
495 return 0;
499 /* We want integer ones to end up last no matter what, since they are
500 the ones we can do the most with. */
501 #define INTEGER_CONST_TYPE 1 << 3
502 #define FLOAT_CONST_TYPE 1 << 2
503 #define OTHER_CONST_TYPE 1 << 1
505 /* Classify an invariant tree into integer, float, or other, so that
506 we can sort them to be near other constants of the same type. */
507 static inline int
508 constant_type (tree t)
510 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
511 return INTEGER_CONST_TYPE;
512 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
513 return FLOAT_CONST_TYPE;
514 else
515 return OTHER_CONST_TYPE;
518 /* qsort comparison function to sort operand entries PA and PB by rank
519 so that the sorted array is ordered by rank in decreasing order. */
520 static int
521 sort_by_operand_rank (const void *pa, const void *pb)
523 const operand_entry_t oea = *(const operand_entry_t *)pa;
524 const operand_entry_t oeb = *(const operand_entry_t *)pb;
526 /* It's nicer for optimize_expression if constants that are likely
527 to fold when added/multiplied//whatever are put next to each
528 other. Since all constants have rank 0, order them by type. */
529 if (oeb->rank == 0 && oea->rank == 0)
531 if (constant_type (oeb->op) != constant_type (oea->op))
532 return constant_type (oeb->op) - constant_type (oea->op);
533 else
534 /* To make sorting result stable, we use unique IDs to determine
535 order. */
536 return oeb->id - oea->id;
539 /* Lastly, make sure the versions that are the same go next to each
540 other. */
541 if ((oeb->rank - oea->rank == 0)
542 && TREE_CODE (oea->op) == SSA_NAME
543 && TREE_CODE (oeb->op) == SSA_NAME)
545 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
546 versions of removed SSA_NAMEs, so if possible, prefer to sort
547 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
548 See PR60418. */
549 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
550 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
551 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
553 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
554 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
555 basic_block bba = gimple_bb (stmta);
556 basic_block bbb = gimple_bb (stmtb);
557 if (bbb != bba)
559 if (bb_rank[bbb->index] != bb_rank[bba->index])
560 return bb_rank[bbb->index] - bb_rank[bba->index];
562 else
564 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
565 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
566 if (da != db)
567 return da ? 1 : -1;
571 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
572 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
573 else
574 return oeb->id - oea->id;
577 if (oeb->rank != oea->rank)
578 return oeb->rank - oea->rank;
579 else
580 return oeb->id - oea->id;
583 /* Add an operand entry to *OPS for the tree operand OP. */
585 static void
586 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
588 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
590 oe->op = op;
591 oe->rank = get_rank (op);
592 oe->id = next_operand_entry_id++;
593 oe->count = 1;
594 ops->safe_push (oe);
597 /* Add an operand entry to *OPS for the tree operand OP with repeat
598 count REPEAT. */
600 static void
601 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
602 HOST_WIDE_INT repeat)
604 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
606 oe->op = op;
607 oe->rank = get_rank (op);
608 oe->id = next_operand_entry_id++;
609 oe->count = repeat;
610 ops->safe_push (oe);
612 reassociate_stats.pows_encountered++;
615 /* Return true if STMT is reassociable operation containing a binary
616 operation with tree code CODE, and is inside LOOP. */
618 static bool
619 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
621 basic_block bb = gimple_bb (stmt);
623 if (gimple_bb (stmt) == NULL)
624 return false;
626 if (!flow_bb_inside_loop_p (loop, bb))
627 return false;
629 if (is_gimple_assign (stmt)
630 && gimple_assign_rhs_code (stmt) == code
631 && has_single_use (gimple_assign_lhs (stmt)))
632 return true;
634 return false;
638 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
639 operand of the negate operation. Otherwise, return NULL. */
641 static tree
642 get_unary_op (tree name, enum tree_code opcode)
644 gimple stmt = SSA_NAME_DEF_STMT (name);
646 if (!is_gimple_assign (stmt))
647 return NULL_TREE;
649 if (gimple_assign_rhs_code (stmt) == opcode)
650 return gimple_assign_rhs1 (stmt);
651 return NULL_TREE;
654 /* If CURR and LAST are a pair of ops that OPCODE allows us to
655 eliminate through equivalences, do so, remove them from OPS, and
656 return true. Otherwise, return false. */
658 static bool
659 eliminate_duplicate_pair (enum tree_code opcode,
660 vec<operand_entry_t> *ops,
661 bool *all_done,
662 unsigned int i,
663 operand_entry_t curr,
664 operand_entry_t last)
667 /* If we have two of the same op, and the opcode is & |, min, or max,
668 we can eliminate one of them.
669 If we have two of the same op, and the opcode is ^, we can
670 eliminate both of them. */
672 if (last && last->op == curr->op)
674 switch (opcode)
676 case MAX_EXPR:
677 case MIN_EXPR:
678 case BIT_IOR_EXPR:
679 case BIT_AND_EXPR:
680 if (dump_file && (dump_flags & TDF_DETAILS))
682 fprintf (dump_file, "Equivalence: ");
683 print_generic_expr (dump_file, curr->op, 0);
684 fprintf (dump_file, " [&|minmax] ");
685 print_generic_expr (dump_file, last->op, 0);
686 fprintf (dump_file, " -> ");
687 print_generic_stmt (dump_file, last->op, 0);
690 ops->ordered_remove (i);
691 reassociate_stats.ops_eliminated ++;
693 return true;
695 case BIT_XOR_EXPR:
696 if (dump_file && (dump_flags & TDF_DETAILS))
698 fprintf (dump_file, "Equivalence: ");
699 print_generic_expr (dump_file, curr->op, 0);
700 fprintf (dump_file, " ^ ");
701 print_generic_expr (dump_file, last->op, 0);
702 fprintf (dump_file, " -> nothing\n");
705 reassociate_stats.ops_eliminated += 2;
707 if (ops->length () == 2)
709 ops->create (0);
710 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
711 *all_done = true;
713 else
715 ops->ordered_remove (i-1);
716 ops->ordered_remove (i-1);
719 return true;
721 default:
722 break;
725 return false;
728 static vec<tree> plus_negates;
730 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
731 expression, look in OPS for a corresponding positive operation to cancel
732 it out. If we find one, remove the other from OPS, replace
733 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
734 return false. */
736 static bool
737 eliminate_plus_minus_pair (enum tree_code opcode,
738 vec<operand_entry_t> *ops,
739 unsigned int currindex,
740 operand_entry_t curr)
742 tree negateop;
743 tree notop;
744 unsigned int i;
745 operand_entry_t oe;
747 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
748 return false;
750 negateop = get_unary_op (curr->op, NEGATE_EXPR);
751 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
752 if (negateop == NULL_TREE && notop == NULL_TREE)
753 return false;
755 /* Any non-negated version will have a rank that is one less than
756 the current rank. So once we hit those ranks, if we don't find
757 one, we can stop. */
759 for (i = currindex + 1;
760 ops->iterate (i, &oe)
761 && oe->rank >= curr->rank - 1 ;
762 i++)
764 if (oe->op == negateop)
767 if (dump_file && (dump_flags & TDF_DETAILS))
769 fprintf (dump_file, "Equivalence: ");
770 print_generic_expr (dump_file, negateop, 0);
771 fprintf (dump_file, " + -");
772 print_generic_expr (dump_file, oe->op, 0);
773 fprintf (dump_file, " -> 0\n");
776 ops->ordered_remove (i);
777 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
778 ops->ordered_remove (currindex);
779 reassociate_stats.ops_eliminated ++;
781 return true;
783 else if (oe->op == notop)
785 tree op_type = TREE_TYPE (oe->op);
787 if (dump_file && (dump_flags & TDF_DETAILS))
789 fprintf (dump_file, "Equivalence: ");
790 print_generic_expr (dump_file, notop, 0);
791 fprintf (dump_file, " + ~");
792 print_generic_expr (dump_file, oe->op, 0);
793 fprintf (dump_file, " -> -1\n");
796 ops->ordered_remove (i);
797 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
798 ops->ordered_remove (currindex);
799 reassociate_stats.ops_eliminated ++;
801 return true;
805 /* CURR->OP is a negate expr in a plus expr: save it for later
806 inspection in repropagate_negates(). */
807 if (negateop != NULL_TREE)
808 plus_negates.safe_push (curr->op);
810 return false;
813 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
814 bitwise not expression, look in OPS for a corresponding operand to
815 cancel it out. If we find one, remove the other from OPS, replace
816 OPS[CURRINDEX] with 0, and return true. Otherwise, return
817 false. */
819 static bool
820 eliminate_not_pairs (enum tree_code opcode,
821 vec<operand_entry_t> *ops,
822 unsigned int currindex,
823 operand_entry_t curr)
825 tree notop;
826 unsigned int i;
827 operand_entry_t oe;
829 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
830 || TREE_CODE (curr->op) != SSA_NAME)
831 return false;
833 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
834 if (notop == NULL_TREE)
835 return false;
837 /* Any non-not version will have a rank that is one less than
838 the current rank. So once we hit those ranks, if we don't find
839 one, we can stop. */
841 for (i = currindex + 1;
842 ops->iterate (i, &oe)
843 && oe->rank >= curr->rank - 1;
844 i++)
846 if (oe->op == notop)
848 if (dump_file && (dump_flags & TDF_DETAILS))
850 fprintf (dump_file, "Equivalence: ");
851 print_generic_expr (dump_file, notop, 0);
852 if (opcode == BIT_AND_EXPR)
853 fprintf (dump_file, " & ~");
854 else if (opcode == BIT_IOR_EXPR)
855 fprintf (dump_file, " | ~");
856 print_generic_expr (dump_file, oe->op, 0);
857 if (opcode == BIT_AND_EXPR)
858 fprintf (dump_file, " -> 0\n");
859 else if (opcode == BIT_IOR_EXPR)
860 fprintf (dump_file, " -> -1\n");
863 if (opcode == BIT_AND_EXPR)
864 oe->op = build_zero_cst (TREE_TYPE (oe->op));
865 else if (opcode == BIT_IOR_EXPR)
866 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
868 reassociate_stats.ops_eliminated += ops->length () - 1;
869 ops->truncate (0);
870 ops->quick_push (oe);
871 return true;
875 return false;
878 /* Use constant value that may be present in OPS to try to eliminate
879 operands. Note that this function is only really used when we've
880 eliminated ops for other reasons, or merged constants. Across
881 single statements, fold already does all of this, plus more. There
882 is little point in duplicating logic, so I've only included the
883 identities that I could ever construct testcases to trigger. */
885 static void
886 eliminate_using_constants (enum tree_code opcode,
887 vec<operand_entry_t> *ops)
889 operand_entry_t oelast = ops->last ();
890 tree type = TREE_TYPE (oelast->op);
892 if (oelast->rank == 0
893 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
895 switch (opcode)
897 case BIT_AND_EXPR:
898 if (integer_zerop (oelast->op))
900 if (ops->length () != 1)
902 if (dump_file && (dump_flags & TDF_DETAILS))
903 fprintf (dump_file, "Found & 0, removing all other ops\n");
905 reassociate_stats.ops_eliminated += ops->length () - 1;
907 ops->truncate (0);
908 ops->quick_push (oelast);
909 return;
912 else if (integer_all_onesp (oelast->op))
914 if (ops->length () != 1)
916 if (dump_file && (dump_flags & TDF_DETAILS))
917 fprintf (dump_file, "Found & -1, removing\n");
918 ops->pop ();
919 reassociate_stats.ops_eliminated++;
922 break;
923 case BIT_IOR_EXPR:
924 if (integer_all_onesp (oelast->op))
926 if (ops->length () != 1)
928 if (dump_file && (dump_flags & TDF_DETAILS))
929 fprintf (dump_file, "Found | -1, removing all other ops\n");
931 reassociate_stats.ops_eliminated += ops->length () - 1;
933 ops->truncate (0);
934 ops->quick_push (oelast);
935 return;
938 else if (integer_zerop (oelast->op))
940 if (ops->length () != 1)
942 if (dump_file && (dump_flags & TDF_DETAILS))
943 fprintf (dump_file, "Found | 0, removing\n");
944 ops->pop ();
945 reassociate_stats.ops_eliminated++;
948 break;
949 case MULT_EXPR:
950 if (integer_zerop (oelast->op)
951 || (FLOAT_TYPE_P (type)
952 && !HONOR_NANS (TYPE_MODE (type))
953 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
954 && real_zerop (oelast->op)))
956 if (ops->length () != 1)
958 if (dump_file && (dump_flags & TDF_DETAILS))
959 fprintf (dump_file, "Found * 0, removing all other ops\n");
961 reassociate_stats.ops_eliminated += ops->length () - 1;
962 ops->truncate (1);
963 ops->quick_push (oelast);
964 return;
967 else if (integer_onep (oelast->op)
968 || (FLOAT_TYPE_P (type)
969 && !HONOR_SNANS (TYPE_MODE (type))
970 && real_onep (oelast->op)))
972 if (ops->length () != 1)
974 if (dump_file && (dump_flags & TDF_DETAILS))
975 fprintf (dump_file, "Found * 1, removing\n");
976 ops->pop ();
977 reassociate_stats.ops_eliminated++;
978 return;
981 break;
982 case BIT_XOR_EXPR:
983 case PLUS_EXPR:
984 case MINUS_EXPR:
985 if (integer_zerop (oelast->op)
986 || (FLOAT_TYPE_P (type)
987 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
988 && fold_real_zero_addition_p (type, oelast->op,
989 opcode == MINUS_EXPR)))
991 if (ops->length () != 1)
993 if (dump_file && (dump_flags & TDF_DETAILS))
994 fprintf (dump_file, "Found [|^+] 0, removing\n");
995 ops->pop ();
996 reassociate_stats.ops_eliminated++;
997 return;
1000 break;
1001 default:
1002 break;
1008 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1009 bool, bool);
1011 /* Structure for tracking and counting operands. */
1012 typedef struct oecount_s {
1013 int cnt;
1014 int id;
1015 enum tree_code oecode;
1016 tree op;
1017 } oecount;
1020 /* The heap for the oecount hashtable and the sorted list of operands. */
1021 static vec<oecount> cvec;
1024 /* Oecount hashtable helpers. */
1026 struct oecount_hasher : typed_noop_remove <void>
1028 /* Note that this hash table stores integers, not pointers.
1029 So, observe the casting in the member functions. */
1030 typedef void value_type;
1031 typedef void compare_type;
1032 static inline hashval_t hash (const value_type *);
1033 static inline bool equal (const value_type *, const compare_type *);
1036 /* Hash function for oecount. */
1038 inline hashval_t
1039 oecount_hasher::hash (const value_type *p)
1041 const oecount *c = &cvec[(size_t)p - 42];
1042 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1045 /* Comparison function for oecount. */
1047 inline bool
1048 oecount_hasher::equal (const value_type *p1, const compare_type *p2)
1050 const oecount *c1 = &cvec[(size_t)p1 - 42];
1051 const oecount *c2 = &cvec[(size_t)p2 - 42];
1052 return (c1->oecode == c2->oecode
1053 && c1->op == c2->op);
1056 /* Comparison function for qsort sorting oecount elements by count. */
1058 static int
1059 oecount_cmp (const void *p1, const void *p2)
1061 const oecount *c1 = (const oecount *)p1;
1062 const oecount *c2 = (const oecount *)p2;
1063 if (c1->cnt != c2->cnt)
1064 return c1->cnt - c2->cnt;
1065 else
1066 /* If counts are identical, use unique IDs to stabilize qsort. */
1067 return c1->id - c2->id;
1070 /* Return TRUE iff STMT represents a builtin call that raises OP
1071 to some exponent. */
1073 static bool
1074 stmt_is_power_of_op (gimple stmt, tree op)
1076 tree fndecl;
1078 if (!is_gimple_call (stmt))
1079 return false;
1081 fndecl = gimple_call_fndecl (stmt);
1083 if (!fndecl
1084 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1085 return false;
1087 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1089 CASE_FLT_FN (BUILT_IN_POW):
1090 CASE_FLT_FN (BUILT_IN_POWI):
1091 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1093 default:
1094 return false;
1098 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1099 in place and return the result. Assumes that stmt_is_power_of_op
1100 was previously called for STMT and returned TRUE. */
1102 static HOST_WIDE_INT
1103 decrement_power (gimple stmt)
1105 REAL_VALUE_TYPE c, cint;
1106 HOST_WIDE_INT power;
1107 tree arg1;
1109 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1111 CASE_FLT_FN (BUILT_IN_POW):
1112 arg1 = gimple_call_arg (stmt, 1);
1113 c = TREE_REAL_CST (arg1);
1114 power = real_to_integer (&c) - 1;
1115 real_from_integer (&cint, VOIDmode, power, SIGNED);
1116 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1117 return power;
1119 CASE_FLT_FN (BUILT_IN_POWI):
1120 arg1 = gimple_call_arg (stmt, 1);
1121 power = TREE_INT_CST_LOW (arg1) - 1;
1122 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1123 return power;
1125 default:
1126 gcc_unreachable ();
1130 /* Find the single immediate use of STMT's LHS, and replace it
1131 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1132 replace *DEF with OP as well. */
1134 static void
1135 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1137 tree lhs;
1138 gimple use_stmt;
1139 use_operand_p use;
1140 gimple_stmt_iterator gsi;
1142 if (is_gimple_call (stmt))
1143 lhs = gimple_call_lhs (stmt);
1144 else
1145 lhs = gimple_assign_lhs (stmt);
1147 gcc_assert (has_single_use (lhs));
1148 single_imm_use (lhs, &use, &use_stmt);
1149 if (lhs == *def)
1150 *def = op;
1151 SET_USE (use, op);
1152 if (TREE_CODE (op) != SSA_NAME)
1153 update_stmt (use_stmt);
1154 gsi = gsi_for_stmt (stmt);
1155 unlink_stmt_vdef (stmt);
1156 reassoc_remove_stmt (&gsi);
1157 release_defs (stmt);
1160 /* Walks the linear chain with result *DEF searching for an operation
1161 with operand OP and code OPCODE removing that from the chain. *DEF
1162 is updated if there is only one operand but no operation left. */
1164 static void
1165 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1167 gimple stmt = SSA_NAME_DEF_STMT (*def);
1171 tree name;
1173 if (opcode == MULT_EXPR
1174 && stmt_is_power_of_op (stmt, op))
1176 if (decrement_power (stmt) == 1)
1177 propagate_op_to_single_use (op, stmt, def);
1178 return;
1181 name = gimple_assign_rhs1 (stmt);
1183 /* If this is the operation we look for and one of the operands
1184 is ours simply propagate the other operand into the stmts
1185 single use. */
1186 if (gimple_assign_rhs_code (stmt) == opcode
1187 && (name == op
1188 || gimple_assign_rhs2 (stmt) == op))
1190 if (name == op)
1191 name = gimple_assign_rhs2 (stmt);
1192 propagate_op_to_single_use (name, stmt, def);
1193 return;
1196 /* We might have a multiply of two __builtin_pow* calls, and
1197 the operand might be hiding in the rightmost one. */
1198 if (opcode == MULT_EXPR
1199 && gimple_assign_rhs_code (stmt) == opcode
1200 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1201 && has_single_use (gimple_assign_rhs2 (stmt)))
1203 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1204 if (stmt_is_power_of_op (stmt2, op))
1206 if (decrement_power (stmt2) == 1)
1207 propagate_op_to_single_use (op, stmt2, def);
1208 return;
1212 /* Continue walking the chain. */
1213 gcc_assert (name != op
1214 && TREE_CODE (name) == SSA_NAME);
1215 stmt = SSA_NAME_DEF_STMT (name);
1217 while (1);
1220 /* Returns true if statement S1 dominates statement S2. Like
1221 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1223 static bool
1224 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1226 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1228 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1229 SSA_NAME. Assume it lives at the beginning of function and
1230 thus dominates everything. */
1231 if (!bb1 || s1 == s2)
1232 return true;
1234 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1235 if (!bb2)
1236 return false;
1238 if (bb1 == bb2)
1240 /* PHIs in the same basic block are assumed to be
1241 executed all in parallel, if only one stmt is a PHI,
1242 it dominates the other stmt in the same basic block. */
1243 if (gimple_code (s1) == GIMPLE_PHI)
1244 return true;
1246 if (gimple_code (s2) == GIMPLE_PHI)
1247 return false;
1249 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1251 if (gimple_uid (s1) < gimple_uid (s2))
1252 return true;
1254 if (gimple_uid (s1) > gimple_uid (s2))
1255 return false;
1257 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1258 unsigned int uid = gimple_uid (s1);
1259 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1261 gimple s = gsi_stmt (gsi);
1262 if (gimple_uid (s) != uid)
1263 break;
1264 if (s == s2)
1265 return true;
1268 return false;
1271 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1274 /* Insert STMT after INSERT_POINT. */
1276 static void
1277 insert_stmt_after (gimple stmt, gimple insert_point)
1279 gimple_stmt_iterator gsi;
1280 basic_block bb;
1282 if (gimple_code (insert_point) == GIMPLE_PHI)
1283 bb = gimple_bb (insert_point);
1284 else if (!stmt_ends_bb_p (insert_point))
1286 gsi = gsi_for_stmt (insert_point);
1287 gimple_set_uid (stmt, gimple_uid (insert_point));
1288 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1289 return;
1291 else
1292 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1293 thus if it must end a basic block, it should be a call that can
1294 throw, or some assignment that can throw. If it throws, the LHS
1295 of it will not be initialized though, so only valid places using
1296 the SSA_NAME should be dominated by the fallthru edge. */
1297 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1298 gsi = gsi_after_labels (bb);
1299 if (gsi_end_p (gsi))
1301 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1302 gimple_set_uid (stmt,
1303 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1305 else
1306 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1307 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1310 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1311 the result. Places the statement after the definition of either
1312 OP1 or OP2. Returns the new statement. */
1314 static gimple
1315 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1317 gimple op1def = NULL, op2def = NULL;
1318 gimple_stmt_iterator gsi;
1319 tree op;
1320 gimple sum;
1322 /* Create the addition statement. */
1323 op = make_ssa_name (type, NULL);
1324 sum = gimple_build_assign_with_ops (opcode, op, op1, op2);
1326 /* Find an insertion place and insert. */
1327 if (TREE_CODE (op1) == SSA_NAME)
1328 op1def = SSA_NAME_DEF_STMT (op1);
1329 if (TREE_CODE (op2) == SSA_NAME)
1330 op2def = SSA_NAME_DEF_STMT (op2);
1331 if ((!op1def || gimple_nop_p (op1def))
1332 && (!op2def || gimple_nop_p (op2def)))
1334 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1335 if (gsi_end_p (gsi))
1337 gimple_stmt_iterator gsi2
1338 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1339 gimple_set_uid (sum,
1340 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1342 else
1343 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1344 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1346 else
1348 gimple insert_point;
1349 if ((!op1def || gimple_nop_p (op1def))
1350 || (op2def && !gimple_nop_p (op2def)
1351 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1352 insert_point = op2def;
1353 else
1354 insert_point = op1def;
1355 insert_stmt_after (sum, insert_point);
1357 update_stmt (sum);
1359 return sum;
1362 /* Perform un-distribution of divisions and multiplications.
1363 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1364 to (A + B) / X for real X.
1366 The algorithm is organized as follows.
1368 - First we walk the addition chain *OPS looking for summands that
1369 are defined by a multiplication or a real division. This results
1370 in the candidates bitmap with relevant indices into *OPS.
1372 - Second we build the chains of multiplications or divisions for
1373 these candidates, counting the number of occurrences of (operand, code)
1374 pairs in all of the candidates chains.
1376 - Third we sort the (operand, code) pairs by number of occurrence and
1377 process them starting with the pair with the most uses.
1379 * For each such pair we walk the candidates again to build a
1380 second candidate bitmap noting all multiplication/division chains
1381 that have at least one occurrence of (operand, code).
1383 * We build an alternate addition chain only covering these
1384 candidates with one (operand, code) operation removed from their
1385 multiplication/division chain.
1387 * The first candidate gets replaced by the alternate addition chain
1388 multiplied/divided by the operand.
1390 * All candidate chains get disabled for further processing and
1391 processing of (operand, code) pairs continues.
1393 The alternate addition chains built are re-processed by the main
1394 reassociation algorithm which allows optimizing a * x * y + b * y * x
1395 to (a + b ) * x * y in one invocation of the reassociation pass. */
1397 static bool
1398 undistribute_ops_list (enum tree_code opcode,
1399 vec<operand_entry_t> *ops, struct loop *loop)
1401 unsigned int length = ops->length ();
1402 operand_entry_t oe1;
1403 unsigned i, j;
1404 sbitmap candidates, candidates2;
1405 unsigned nr_candidates, nr_candidates2;
1406 sbitmap_iterator sbi0;
1407 vec<operand_entry_t> *subops;
1408 hash_table <oecount_hasher> ctable;
1409 bool changed = false;
1410 int next_oecount_id = 0;
1412 if (length <= 1
1413 || opcode != PLUS_EXPR)
1414 return false;
1416 /* Build a list of candidates to process. */
1417 candidates = sbitmap_alloc (length);
1418 bitmap_clear (candidates);
1419 nr_candidates = 0;
1420 FOR_EACH_VEC_ELT (*ops, i, oe1)
1422 enum tree_code dcode;
1423 gimple oe1def;
1425 if (TREE_CODE (oe1->op) != SSA_NAME)
1426 continue;
1427 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1428 if (!is_gimple_assign (oe1def))
1429 continue;
1430 dcode = gimple_assign_rhs_code (oe1def);
1431 if ((dcode != MULT_EXPR
1432 && dcode != RDIV_EXPR)
1433 || !is_reassociable_op (oe1def, dcode, loop))
1434 continue;
1436 bitmap_set_bit (candidates, i);
1437 nr_candidates++;
1440 if (nr_candidates < 2)
1442 sbitmap_free (candidates);
1443 return false;
1446 if (dump_file && (dump_flags & TDF_DETAILS))
1448 fprintf (dump_file, "searching for un-distribute opportunities ");
1449 print_generic_expr (dump_file,
1450 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1451 fprintf (dump_file, " %d\n", nr_candidates);
1454 /* Build linearized sub-operand lists and the counting table. */
1455 cvec.create (0);
1456 ctable.create (15);
1457 /* ??? Macro arguments cannot have multi-argument template types in
1458 them. This typedef is needed to workaround that limitation. */
1459 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1460 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1461 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1463 gimple oedef;
1464 enum tree_code oecode;
1465 unsigned j;
1467 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1468 oecode = gimple_assign_rhs_code (oedef);
1469 linearize_expr_tree (&subops[i], oedef,
1470 associative_tree_code (oecode), false);
1472 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1474 oecount c;
1475 void **slot;
1476 size_t idx;
1477 c.oecode = oecode;
1478 c.cnt = 1;
1479 c.id = next_oecount_id++;
1480 c.op = oe1->op;
1481 cvec.safe_push (c);
1482 idx = cvec.length () + 41;
1483 slot = ctable.find_slot ((void *)idx, INSERT);
1484 if (!*slot)
1486 *slot = (void *)idx;
1488 else
1490 cvec.pop ();
1491 cvec[(size_t)*slot - 42].cnt++;
1495 ctable.dispose ();
1497 /* Sort the counting table. */
1498 cvec.qsort (oecount_cmp);
1500 if (dump_file && (dump_flags & TDF_DETAILS))
1502 oecount *c;
1503 fprintf (dump_file, "Candidates:\n");
1504 FOR_EACH_VEC_ELT (cvec, j, c)
1506 fprintf (dump_file, " %u %s: ", c->cnt,
1507 c->oecode == MULT_EXPR
1508 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1509 print_generic_expr (dump_file, c->op, 0);
1510 fprintf (dump_file, "\n");
1514 /* Process the (operand, code) pairs in order of most occurrence. */
1515 candidates2 = sbitmap_alloc (length);
1516 while (!cvec.is_empty ())
1518 oecount *c = &cvec.last ();
1519 if (c->cnt < 2)
1520 break;
1522 /* Now collect the operands in the outer chain that contain
1523 the common operand in their inner chain. */
1524 bitmap_clear (candidates2);
1525 nr_candidates2 = 0;
1526 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1528 gimple oedef;
1529 enum tree_code oecode;
1530 unsigned j;
1531 tree op = (*ops)[i]->op;
1533 /* If we undistributed in this chain already this may be
1534 a constant. */
1535 if (TREE_CODE (op) != SSA_NAME)
1536 continue;
1538 oedef = SSA_NAME_DEF_STMT (op);
1539 oecode = gimple_assign_rhs_code (oedef);
1540 if (oecode != c->oecode)
1541 continue;
1543 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1545 if (oe1->op == c->op)
1547 bitmap_set_bit (candidates2, i);
1548 ++nr_candidates2;
1549 break;
1554 if (nr_candidates2 >= 2)
1556 operand_entry_t oe1, oe2;
1557 gimple prod;
1558 int first = bitmap_first_set_bit (candidates2);
1560 /* Build the new addition chain. */
1561 oe1 = (*ops)[first];
1562 if (dump_file && (dump_flags & TDF_DETAILS))
1564 fprintf (dump_file, "Building (");
1565 print_generic_expr (dump_file, oe1->op, 0);
1567 zero_one_operation (&oe1->op, c->oecode, c->op);
1568 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1570 gimple sum;
1571 oe2 = (*ops)[i];
1572 if (dump_file && (dump_flags & TDF_DETAILS))
1574 fprintf (dump_file, " + ");
1575 print_generic_expr (dump_file, oe2->op, 0);
1577 zero_one_operation (&oe2->op, c->oecode, c->op);
1578 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1579 oe1->op, oe2->op, opcode);
1580 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1581 oe2->rank = 0;
1582 oe1->op = gimple_get_lhs (sum);
1585 /* Apply the multiplication/division. */
1586 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1587 oe1->op, c->op, c->oecode);
1588 if (dump_file && (dump_flags & TDF_DETAILS))
1590 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1591 print_generic_expr (dump_file, c->op, 0);
1592 fprintf (dump_file, "\n");
1595 /* Record it in the addition chain and disable further
1596 undistribution with this op. */
1597 oe1->op = gimple_assign_lhs (prod);
1598 oe1->rank = get_rank (oe1->op);
1599 subops[first].release ();
1601 changed = true;
1604 cvec.pop ();
1607 for (i = 0; i < ops->length (); ++i)
1608 subops[i].release ();
1609 free (subops);
1610 cvec.release ();
1611 sbitmap_free (candidates);
1612 sbitmap_free (candidates2);
1614 return changed;
1617 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1618 expression, examine the other OPS to see if any of them are comparisons
1619 of the same values, which we may be able to combine or eliminate.
1620 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1622 static bool
1623 eliminate_redundant_comparison (enum tree_code opcode,
1624 vec<operand_entry_t> *ops,
1625 unsigned int currindex,
1626 operand_entry_t curr)
1628 tree op1, op2;
1629 enum tree_code lcode, rcode;
1630 gimple def1, def2;
1631 int i;
1632 operand_entry_t oe;
1634 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1635 return false;
1637 /* Check that CURR is a comparison. */
1638 if (TREE_CODE (curr->op) != SSA_NAME)
1639 return false;
1640 def1 = SSA_NAME_DEF_STMT (curr->op);
1641 if (!is_gimple_assign (def1))
1642 return false;
1643 lcode = gimple_assign_rhs_code (def1);
1644 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1645 return false;
1646 op1 = gimple_assign_rhs1 (def1);
1647 op2 = gimple_assign_rhs2 (def1);
1649 /* Now look for a similar comparison in the remaining OPS. */
1650 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1652 tree t;
1654 if (TREE_CODE (oe->op) != SSA_NAME)
1655 continue;
1656 def2 = SSA_NAME_DEF_STMT (oe->op);
1657 if (!is_gimple_assign (def2))
1658 continue;
1659 rcode = gimple_assign_rhs_code (def2);
1660 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1661 continue;
1663 /* If we got here, we have a match. See if we can combine the
1664 two comparisons. */
1665 if (opcode == BIT_IOR_EXPR)
1666 t = maybe_fold_or_comparisons (lcode, op1, op2,
1667 rcode, gimple_assign_rhs1 (def2),
1668 gimple_assign_rhs2 (def2));
1669 else
1670 t = maybe_fold_and_comparisons (lcode, op1, op2,
1671 rcode, gimple_assign_rhs1 (def2),
1672 gimple_assign_rhs2 (def2));
1673 if (!t)
1674 continue;
1676 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1677 always give us a boolean_type_node value back. If the original
1678 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1679 we need to convert. */
1680 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1681 t = fold_convert (TREE_TYPE (curr->op), t);
1683 if (TREE_CODE (t) != INTEGER_CST
1684 && !operand_equal_p (t, curr->op, 0))
1686 enum tree_code subcode;
1687 tree newop1, newop2;
1688 if (!COMPARISON_CLASS_P (t))
1689 continue;
1690 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1691 STRIP_USELESS_TYPE_CONVERSION (newop1);
1692 STRIP_USELESS_TYPE_CONVERSION (newop2);
1693 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1694 continue;
1697 if (dump_file && (dump_flags & TDF_DETAILS))
1699 fprintf (dump_file, "Equivalence: ");
1700 print_generic_expr (dump_file, curr->op, 0);
1701 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1702 print_generic_expr (dump_file, oe->op, 0);
1703 fprintf (dump_file, " -> ");
1704 print_generic_expr (dump_file, t, 0);
1705 fprintf (dump_file, "\n");
1708 /* Now we can delete oe, as it has been subsumed by the new combined
1709 expression t. */
1710 ops->ordered_remove (i);
1711 reassociate_stats.ops_eliminated ++;
1713 /* If t is the same as curr->op, we're done. Otherwise we must
1714 replace curr->op with t. Special case is if we got a constant
1715 back, in which case we add it to the end instead of in place of
1716 the current entry. */
1717 if (TREE_CODE (t) == INTEGER_CST)
1719 ops->ordered_remove (currindex);
1720 add_to_ops_vec (ops, t);
1722 else if (!operand_equal_p (t, curr->op, 0))
1724 gimple sum;
1725 enum tree_code subcode;
1726 tree newop1;
1727 tree newop2;
1728 gcc_assert (COMPARISON_CLASS_P (t));
1729 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1730 STRIP_USELESS_TYPE_CONVERSION (newop1);
1731 STRIP_USELESS_TYPE_CONVERSION (newop2);
1732 gcc_checking_assert (is_gimple_val (newop1)
1733 && is_gimple_val (newop2));
1734 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1735 curr->op = gimple_get_lhs (sum);
1737 return true;
1740 return false;
1743 /* Perform various identities and other optimizations on the list of
1744 operand entries, stored in OPS. The tree code for the binary
1745 operation between all the operands is OPCODE. */
1747 static void
1748 optimize_ops_list (enum tree_code opcode,
1749 vec<operand_entry_t> *ops)
1751 unsigned int length = ops->length ();
1752 unsigned int i;
1753 operand_entry_t oe;
1754 operand_entry_t oelast = NULL;
1755 bool iterate = false;
1757 if (length == 1)
1758 return;
1760 oelast = ops->last ();
1762 /* If the last two are constants, pop the constants off, merge them
1763 and try the next two. */
1764 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1766 operand_entry_t oelm1 = (*ops)[length - 2];
1768 if (oelm1->rank == 0
1769 && is_gimple_min_invariant (oelm1->op)
1770 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1771 TREE_TYPE (oelast->op)))
1773 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1774 oelm1->op, oelast->op);
1776 if (folded && is_gimple_min_invariant (folded))
1778 if (dump_file && (dump_flags & TDF_DETAILS))
1779 fprintf (dump_file, "Merging constants\n");
1781 ops->pop ();
1782 ops->pop ();
1784 add_to_ops_vec (ops, folded);
1785 reassociate_stats.constants_eliminated++;
1787 optimize_ops_list (opcode, ops);
1788 return;
1793 eliminate_using_constants (opcode, ops);
1794 oelast = NULL;
1796 for (i = 0; ops->iterate (i, &oe);)
1798 bool done = false;
1800 if (eliminate_not_pairs (opcode, ops, i, oe))
1801 return;
1802 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1803 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1804 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1806 if (done)
1807 return;
1808 iterate = true;
1809 oelast = NULL;
1810 continue;
1812 oelast = oe;
1813 i++;
1816 length = ops->length ();
1817 oelast = ops->last ();
1819 if (iterate)
1820 optimize_ops_list (opcode, ops);
1823 /* The following functions are subroutines to optimize_range_tests and allow
1824 it to try to change a logical combination of comparisons into a range
1825 test.
1827 For example, both
1828 X == 2 || X == 5 || X == 3 || X == 4
1830 X >= 2 && X <= 5
1831 are converted to
1832 (unsigned) (X - 2) <= 3
1834 For more information see comments above fold_test_range in fold-const.c,
1835 this implementation is for GIMPLE. */
1837 struct range_entry
1839 tree exp;
1840 tree low;
1841 tree high;
1842 bool in_p;
1843 bool strict_overflow_p;
1844 unsigned int idx, next;
1847 /* This is similar to make_range in fold-const.c, but on top of
1848 GIMPLE instead of trees. If EXP is non-NULL, it should be
1849 an SSA_NAME and STMT argument is ignored, otherwise STMT
1850 argument should be a GIMPLE_COND. */
1852 static void
1853 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1855 int in_p;
1856 tree low, high;
1857 bool is_bool, strict_overflow_p;
1859 r->exp = NULL_TREE;
1860 r->in_p = false;
1861 r->strict_overflow_p = false;
1862 r->low = NULL_TREE;
1863 r->high = NULL_TREE;
1864 if (exp != NULL_TREE
1865 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1866 return;
1868 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1869 and see if we can refine the range. Some of the cases below may not
1870 happen, but it doesn't seem worth worrying about this. We "continue"
1871 the outer loop when we've changed something; otherwise we "break"
1872 the switch, which will "break" the while. */
1873 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1874 high = low;
1875 in_p = 0;
1876 strict_overflow_p = false;
1877 is_bool = false;
1878 if (exp == NULL_TREE)
1879 is_bool = true;
1880 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1882 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1883 is_bool = true;
1884 else
1885 return;
1887 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1888 is_bool = true;
1890 while (1)
1892 enum tree_code code;
1893 tree arg0, arg1, exp_type;
1894 tree nexp;
1895 location_t loc;
1897 if (exp != NULL_TREE)
1899 if (TREE_CODE (exp) != SSA_NAME
1900 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1901 break;
1903 stmt = SSA_NAME_DEF_STMT (exp);
1904 if (!is_gimple_assign (stmt))
1905 break;
1907 code = gimple_assign_rhs_code (stmt);
1908 arg0 = gimple_assign_rhs1 (stmt);
1909 arg1 = gimple_assign_rhs2 (stmt);
1910 exp_type = TREE_TYPE (exp);
1912 else
1914 code = gimple_cond_code (stmt);
1915 arg0 = gimple_cond_lhs (stmt);
1916 arg1 = gimple_cond_rhs (stmt);
1917 exp_type = boolean_type_node;
1920 if (TREE_CODE (arg0) != SSA_NAME)
1921 break;
1922 loc = gimple_location (stmt);
1923 switch (code)
1925 case BIT_NOT_EXPR:
1926 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1927 /* Ensure the range is either +[-,0], +[0,0],
1928 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1929 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1930 or similar expression of unconditional true or
1931 false, it should not be negated. */
1932 && ((high && integer_zerop (high))
1933 || (low && integer_onep (low))))
1935 in_p = !in_p;
1936 exp = arg0;
1937 continue;
1939 break;
1940 case SSA_NAME:
1941 exp = arg0;
1942 continue;
1943 CASE_CONVERT:
1944 if (is_bool)
1945 goto do_default;
1946 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1948 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1949 is_bool = true;
1950 else
1951 return;
1953 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1954 is_bool = true;
1955 goto do_default;
1956 case EQ_EXPR:
1957 case NE_EXPR:
1958 case LT_EXPR:
1959 case LE_EXPR:
1960 case GE_EXPR:
1961 case GT_EXPR:
1962 is_bool = true;
1963 /* FALLTHRU */
1964 default:
1965 if (!is_bool)
1966 return;
1967 do_default:
1968 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1969 &low, &high, &in_p,
1970 &strict_overflow_p);
1971 if (nexp != NULL_TREE)
1973 exp = nexp;
1974 gcc_assert (TREE_CODE (exp) == SSA_NAME);
1975 continue;
1977 break;
1979 break;
1981 if (is_bool)
1983 r->exp = exp;
1984 r->in_p = in_p;
1985 r->low = low;
1986 r->high = high;
1987 r->strict_overflow_p = strict_overflow_p;
1991 /* Comparison function for qsort. Sort entries
1992 without SSA_NAME exp first, then with SSA_NAMEs sorted
1993 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
1994 by increasing ->low and if ->low is the same, by increasing
1995 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
1996 maximum. */
1998 static int
1999 range_entry_cmp (const void *a, const void *b)
2001 const struct range_entry *p = (const struct range_entry *) a;
2002 const struct range_entry *q = (const struct range_entry *) b;
2004 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2006 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2008 /* Group range_entries for the same SSA_NAME together. */
2009 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2010 return -1;
2011 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2012 return 1;
2013 /* If ->low is different, NULL low goes first, then by
2014 ascending low. */
2015 if (p->low != NULL_TREE)
2017 if (q->low != NULL_TREE)
2019 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2020 p->low, q->low);
2021 if (tem && integer_onep (tem))
2022 return -1;
2023 tem = fold_binary (GT_EXPR, boolean_type_node,
2024 p->low, q->low);
2025 if (tem && integer_onep (tem))
2026 return 1;
2028 else
2029 return 1;
2031 else if (q->low != NULL_TREE)
2032 return -1;
2033 /* If ->high is different, NULL high goes last, before that by
2034 ascending high. */
2035 if (p->high != NULL_TREE)
2037 if (q->high != NULL_TREE)
2039 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2040 p->high, q->high);
2041 if (tem && integer_onep (tem))
2042 return -1;
2043 tem = fold_binary (GT_EXPR, boolean_type_node,
2044 p->high, q->high);
2045 if (tem && integer_onep (tem))
2046 return 1;
2048 else
2049 return -1;
2051 else if (p->high != NULL_TREE)
2052 return 1;
2053 /* If both ranges are the same, sort below by ascending idx. */
2055 else
2056 return 1;
2058 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2059 return -1;
2061 if (p->idx < q->idx)
2062 return -1;
2063 else
2065 gcc_checking_assert (p->idx > q->idx);
2066 return 1;
2070 /* Helper routine of optimize_range_test.
2071 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2072 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2073 OPCODE and OPS are arguments of optimize_range_tests. Return
2074 true if the range merge has been successful.
2075 If OPCODE is ERROR_MARK, this is called from within
2076 maybe_optimize_range_tests and is performing inter-bb range optimization.
2077 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2078 oe->rank. */
2080 static bool
2081 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2082 unsigned int count, enum tree_code opcode,
2083 vec<operand_entry_t> *ops, tree exp, bool in_p,
2084 tree low, tree high, bool strict_overflow_p)
2086 operand_entry_t oe = (*ops)[range->idx];
2087 tree op = oe->op;
2088 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2089 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2090 location_t loc = gimple_location (stmt);
2091 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2092 tree tem = build_range_check (loc, optype, exp, in_p, low, high);
2093 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2094 gimple_stmt_iterator gsi;
2096 if (tem == NULL_TREE)
2097 return false;
2099 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2100 warning_at (loc, OPT_Wstrict_overflow,
2101 "assuming signed overflow does not occur "
2102 "when simplifying range test");
2104 if (dump_file && (dump_flags & TDF_DETAILS))
2106 struct range_entry *r;
2107 fprintf (dump_file, "Optimizing range tests ");
2108 print_generic_expr (dump_file, range->exp, 0);
2109 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2110 print_generic_expr (dump_file, range->low, 0);
2111 fprintf (dump_file, ", ");
2112 print_generic_expr (dump_file, range->high, 0);
2113 fprintf (dump_file, "]");
2114 for (r = otherrange; r < otherrange + count; r++)
2116 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2117 print_generic_expr (dump_file, r->low, 0);
2118 fprintf (dump_file, ", ");
2119 print_generic_expr (dump_file, r->high, 0);
2120 fprintf (dump_file, "]");
2122 fprintf (dump_file, "\n into ");
2123 print_generic_expr (dump_file, tem, 0);
2124 fprintf (dump_file, "\n");
2127 if (opcode == BIT_IOR_EXPR
2128 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2129 tem = invert_truthvalue_loc (loc, tem);
2131 tem = fold_convert_loc (loc, optype, tem);
2132 gsi = gsi_for_stmt (stmt);
2133 /* In rare cases range->exp can be equal to lhs of stmt.
2134 In that case we have to insert after the stmt rather then before
2135 it. */
2136 if (op == range->exp)
2137 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2138 GSI_CONTINUE_LINKING);
2139 else
2141 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2142 GSI_SAME_STMT);
2143 gsi_prev (&gsi);
2145 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2146 if (gimple_uid (gsi_stmt (gsi)))
2147 break;
2148 else
2149 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2151 oe->op = tem;
2152 range->exp = exp;
2153 range->low = low;
2154 range->high = high;
2155 range->in_p = in_p;
2156 range->strict_overflow_p = false;
2158 for (range = otherrange; range < otherrange + count; range++)
2160 oe = (*ops)[range->idx];
2161 /* Now change all the other range test immediate uses, so that
2162 those tests will be optimized away. */
2163 if (opcode == ERROR_MARK)
2165 if (oe->op)
2166 oe->op = build_int_cst (TREE_TYPE (oe->op),
2167 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2168 else
2169 oe->op = (oe->rank == BIT_IOR_EXPR
2170 ? boolean_false_node : boolean_true_node);
2172 else
2173 oe->op = error_mark_node;
2174 range->exp = NULL_TREE;
2176 return true;
2179 /* Optimize X == CST1 || X == CST2
2180 if popcount (CST1 ^ CST2) == 1 into
2181 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2182 Similarly for ranges. E.g.
2183 X != 2 && X != 3 && X != 10 && X != 11
2184 will be transformed by the previous optimization into
2185 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2186 and this loop can transform that into
2187 !(((X & ~8) - 2U) <= 1U). */
2189 static bool
2190 optimize_range_tests_xor (enum tree_code opcode, tree type,
2191 tree lowi, tree lowj, tree highi, tree highj,
2192 vec<operand_entry_t> *ops,
2193 struct range_entry *rangei,
2194 struct range_entry *rangej)
2196 tree lowxor, highxor, tem, exp;
2197 /* Check highi ^ lowi == highj ^ lowj and
2198 popcount (highi ^ lowi) == 1. */
2199 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2200 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2201 return false;
2202 if (tree_log2 (lowxor) < 0)
2203 return false;
2204 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2205 if (!tree_int_cst_equal (lowxor, highxor))
2206 return false;
2208 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2209 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2210 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2211 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2212 if (update_range_test (rangei, rangej, 1, opcode, ops, exp,
2213 rangei->in_p, lowj, highj,
2214 rangei->strict_overflow_p
2215 || rangej->strict_overflow_p))
2216 return true;
2217 return false;
2220 /* Optimize X == CST1 || X == CST2
2221 if popcount (CST2 - CST1) == 1 into
2222 ((X - CST1) & ~(CST2 - CST1)) == 0.
2223 Similarly for ranges. E.g.
2224 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2225 || X == 75 || X == 45
2226 will be transformed by the previous optimization into
2227 (X - 43U) <= 3U || (X - 75U) <= 3U
2228 and this loop can transform that into
2229 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2230 static bool
2231 optimize_range_tests_diff (enum tree_code opcode, tree type,
2232 tree lowi, tree lowj, tree highi, tree highj,
2233 vec<operand_entry_t> *ops,
2234 struct range_entry *rangei,
2235 struct range_entry *rangej)
2237 tree tem1, tem2, mask;
2238 /* Check highi - lowi == highj - lowj. */
2239 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2240 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2241 return false;
2242 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2243 if (!tree_int_cst_equal (tem1, tem2))
2244 return false;
2245 /* Check popcount (lowj - lowi) == 1. */
2246 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2247 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2248 return false;
2249 if (tree_log2 (tem1) < 0)
2250 return false;
2252 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2253 tem1 = fold_binary (MINUS_EXPR, type, rangei->exp, lowi);
2254 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2255 lowj = build_int_cst (type, 0);
2256 if (update_range_test (rangei, rangej, 1, opcode, ops, tem1,
2257 rangei->in_p, lowj, tem2,
2258 rangei->strict_overflow_p
2259 || rangej->strict_overflow_p))
2260 return true;
2261 return false;
2264 /* It does some common checks for function optimize_range_tests_xor and
2265 optimize_range_tests_diff.
2266 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2267 Else it calls optimize_range_tests_diff. */
2269 static bool
2270 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2271 bool optimize_xor, vec<operand_entry_t> *ops,
2272 struct range_entry *ranges)
2274 int i, j;
2275 bool any_changes = false;
2276 for (i = first; i < length; i++)
2278 tree lowi, highi, lowj, highj, type, tem;
2280 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2281 continue;
2282 type = TREE_TYPE (ranges[i].exp);
2283 if (!INTEGRAL_TYPE_P (type))
2284 continue;
2285 lowi = ranges[i].low;
2286 if (lowi == NULL_TREE)
2287 lowi = TYPE_MIN_VALUE (type);
2288 highi = ranges[i].high;
2289 if (highi == NULL_TREE)
2290 continue;
2291 for (j = i + 1; j < length && j < i + 64; j++)
2293 bool changes;
2294 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2295 continue;
2296 lowj = ranges[j].low;
2297 if (lowj == NULL_TREE)
2298 continue;
2299 highj = ranges[j].high;
2300 if (highj == NULL_TREE)
2301 highj = TYPE_MAX_VALUE (type);
2302 /* Check lowj > highi. */
2303 tem = fold_binary (GT_EXPR, boolean_type_node,
2304 lowj, highi);
2305 if (tem == NULL_TREE || !integer_onep (tem))
2306 continue;
2307 if (optimize_xor)
2308 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2309 highi, highj, ops,
2310 ranges + i, ranges + j);
2311 else
2312 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2313 highi, highj, ops,
2314 ranges + i, ranges + j);
2315 if (changes)
2317 any_changes = true;
2318 break;
2322 return any_changes;
2325 /* Optimize range tests, similarly how fold_range_test optimizes
2326 it on trees. The tree code for the binary
2327 operation between all the operands is OPCODE.
2328 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2329 maybe_optimize_range_tests for inter-bb range optimization.
2330 In that case if oe->op is NULL, oe->id is bb->index whose
2331 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2332 the actual opcode. */
2334 static bool
2335 optimize_range_tests (enum tree_code opcode,
2336 vec<operand_entry_t> *ops)
2338 unsigned int length = ops->length (), i, j, first;
2339 operand_entry_t oe;
2340 struct range_entry *ranges;
2341 bool any_changes = false;
2343 if (length == 1)
2344 return false;
2346 ranges = XNEWVEC (struct range_entry, length);
2347 for (i = 0; i < length; i++)
2349 oe = (*ops)[i];
2350 ranges[i].idx = i;
2351 init_range_entry (ranges + i, oe->op,
2352 oe->op ? NULL :
2353 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2354 /* For | invert it now, we will invert it again before emitting
2355 the optimized expression. */
2356 if (opcode == BIT_IOR_EXPR
2357 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2358 ranges[i].in_p = !ranges[i].in_p;
2361 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2362 for (i = 0; i < length; i++)
2363 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2364 break;
2366 /* Try to merge ranges. */
2367 for (first = i; i < length; i++)
2369 tree low = ranges[i].low;
2370 tree high = ranges[i].high;
2371 int in_p = ranges[i].in_p;
2372 bool strict_overflow_p = ranges[i].strict_overflow_p;
2373 int update_fail_count = 0;
2375 for (j = i + 1; j < length; j++)
2377 if (ranges[i].exp != ranges[j].exp)
2378 break;
2379 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2380 ranges[j].in_p, ranges[j].low, ranges[j].high))
2381 break;
2382 strict_overflow_p |= ranges[j].strict_overflow_p;
2385 if (j == i + 1)
2386 continue;
2388 if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode,
2389 ops, ranges[i].exp, in_p, low, high,
2390 strict_overflow_p))
2392 i = j - 1;
2393 any_changes = true;
2395 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2396 while update_range_test would fail. */
2397 else if (update_fail_count == 64)
2398 i = j - 1;
2399 else
2400 ++update_fail_count;
2403 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2404 ops, ranges);
2406 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2407 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2408 ops, ranges);
2410 if (any_changes && opcode != ERROR_MARK)
2412 j = 0;
2413 FOR_EACH_VEC_ELT (*ops, i, oe)
2415 if (oe->op == error_mark_node)
2416 continue;
2417 else if (i != j)
2418 (*ops)[j] = oe;
2419 j++;
2421 ops->truncate (j);
2424 XDELETEVEC (ranges);
2425 return any_changes;
2428 /* Return true if STMT is a cast like:
2429 <bb N>:
2431 _123 = (int) _234;
2433 <bb M>:
2434 # _345 = PHI <_123(N), 1(...), 1(...)>
2435 where _234 has bool type, _123 has single use and
2436 bb N has a single successor M. This is commonly used in
2437 the last block of a range test. */
2439 static bool
2440 final_range_test_p (gimple stmt)
2442 basic_block bb, rhs_bb;
2443 edge e;
2444 tree lhs, rhs;
2445 use_operand_p use_p;
2446 gimple use_stmt;
2448 if (!gimple_assign_cast_p (stmt))
2449 return false;
2450 bb = gimple_bb (stmt);
2451 if (!single_succ_p (bb))
2452 return false;
2453 e = single_succ_edge (bb);
2454 if (e->flags & EDGE_COMPLEX)
2455 return false;
2457 lhs = gimple_assign_lhs (stmt);
2458 rhs = gimple_assign_rhs1 (stmt);
2459 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2460 || TREE_CODE (rhs) != SSA_NAME
2461 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2462 return false;
2464 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2465 if (!single_imm_use (lhs, &use_p, &use_stmt))
2466 return false;
2468 if (gimple_code (use_stmt) != GIMPLE_PHI
2469 || gimple_bb (use_stmt) != e->dest)
2470 return false;
2472 /* And that the rhs is defined in the same loop. */
2473 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2474 if (rhs_bb == NULL
2475 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2476 return false;
2478 return true;
2481 /* Return true if BB is suitable basic block for inter-bb range test
2482 optimization. If BACKWARD is true, BB should be the only predecessor
2483 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2484 or compared with to find a common basic block to which all conditions
2485 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2486 be the only predecessor of BB. */
2488 static bool
2489 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2490 bool backward)
2492 edge_iterator ei, ei2;
2493 edge e, e2;
2494 gimple stmt;
2495 gimple_stmt_iterator gsi;
2496 bool other_edge_seen = false;
2497 bool is_cond;
2499 if (test_bb == bb)
2500 return false;
2501 /* Check last stmt first. */
2502 stmt = last_stmt (bb);
2503 if (stmt == NULL
2504 || (gimple_code (stmt) != GIMPLE_COND
2505 && (backward || !final_range_test_p (stmt)))
2506 || gimple_visited_p (stmt)
2507 || stmt_could_throw_p (stmt)
2508 || *other_bb == bb)
2509 return false;
2510 is_cond = gimple_code (stmt) == GIMPLE_COND;
2511 if (is_cond)
2513 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2514 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2515 to *OTHER_BB (if not set yet, try to find it out). */
2516 if (EDGE_COUNT (bb->succs) != 2)
2517 return false;
2518 FOR_EACH_EDGE (e, ei, bb->succs)
2520 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2521 return false;
2522 if (e->dest == test_bb)
2524 if (backward)
2525 continue;
2526 else
2527 return false;
2529 if (e->dest == bb)
2530 return false;
2531 if (*other_bb == NULL)
2533 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2534 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2535 return false;
2536 else if (e->dest == e2->dest)
2537 *other_bb = e->dest;
2538 if (*other_bb == NULL)
2539 return false;
2541 if (e->dest == *other_bb)
2542 other_edge_seen = true;
2543 else if (backward)
2544 return false;
2546 if (*other_bb == NULL || !other_edge_seen)
2547 return false;
2549 else if (single_succ (bb) != *other_bb)
2550 return false;
2552 /* Now check all PHIs of *OTHER_BB. */
2553 e = find_edge (bb, *other_bb);
2554 e2 = find_edge (test_bb, *other_bb);
2555 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2557 gimple phi = gsi_stmt (gsi);
2558 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2559 corresponding to BB and TEST_BB predecessor must be the same. */
2560 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2561 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2563 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2564 one of the PHIs should have the lhs of the last stmt in
2565 that block as PHI arg and that PHI should have 0 or 1
2566 corresponding to it in all other range test basic blocks
2567 considered. */
2568 if (!is_cond)
2570 if (gimple_phi_arg_def (phi, e->dest_idx)
2571 == gimple_assign_lhs (stmt)
2572 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2573 || integer_onep (gimple_phi_arg_def (phi,
2574 e2->dest_idx))))
2575 continue;
2577 else
2579 gimple test_last = last_stmt (test_bb);
2580 if (gimple_code (test_last) != GIMPLE_COND
2581 && gimple_phi_arg_def (phi, e2->dest_idx)
2582 == gimple_assign_lhs (test_last)
2583 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2584 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2585 continue;
2588 return false;
2591 return true;
2594 /* Return true if BB doesn't have side-effects that would disallow
2595 range test optimization, all SSA_NAMEs set in the bb are consumed
2596 in the bb and there are no PHIs. */
2598 static bool
2599 no_side_effect_bb (basic_block bb)
2601 gimple_stmt_iterator gsi;
2602 gimple last;
2604 if (!gimple_seq_empty_p (phi_nodes (bb)))
2605 return false;
2606 last = last_stmt (bb);
2607 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2609 gimple stmt = gsi_stmt (gsi);
2610 tree lhs;
2611 imm_use_iterator imm_iter;
2612 use_operand_p use_p;
2614 if (is_gimple_debug (stmt))
2615 continue;
2616 if (gimple_has_side_effects (stmt))
2617 return false;
2618 if (stmt == last)
2619 return true;
2620 if (!is_gimple_assign (stmt))
2621 return false;
2622 lhs = gimple_assign_lhs (stmt);
2623 if (TREE_CODE (lhs) != SSA_NAME)
2624 return false;
2625 if (gimple_assign_rhs_could_trap_p (stmt))
2626 return false;
2627 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2629 gimple use_stmt = USE_STMT (use_p);
2630 if (is_gimple_debug (use_stmt))
2631 continue;
2632 if (gimple_bb (use_stmt) != bb)
2633 return false;
2636 return false;
2639 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2640 return true and fill in *OPS recursively. */
2642 static bool
2643 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2644 struct loop *loop)
2646 gimple stmt = SSA_NAME_DEF_STMT (var);
2647 tree rhs[2];
2648 int i;
2650 if (!is_reassociable_op (stmt, code, loop))
2651 return false;
2653 rhs[0] = gimple_assign_rhs1 (stmt);
2654 rhs[1] = gimple_assign_rhs2 (stmt);
2655 gimple_set_visited (stmt, true);
2656 for (i = 0; i < 2; i++)
2657 if (TREE_CODE (rhs[i]) == SSA_NAME
2658 && !get_ops (rhs[i], code, ops, loop)
2659 && has_single_use (rhs[i]))
2661 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2663 oe->op = rhs[i];
2664 oe->rank = code;
2665 oe->id = 0;
2666 oe->count = 1;
2667 ops->safe_push (oe);
2669 return true;
2672 /* Find the ops that were added by get_ops starting from VAR, see if
2673 they were changed during update_range_test and if yes, create new
2674 stmts. */
2676 static tree
2677 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2678 unsigned int *pidx, struct loop *loop)
2680 gimple stmt = SSA_NAME_DEF_STMT (var);
2681 tree rhs[4];
2682 int i;
2684 if (!is_reassociable_op (stmt, code, loop))
2685 return NULL;
2687 rhs[0] = gimple_assign_rhs1 (stmt);
2688 rhs[1] = gimple_assign_rhs2 (stmt);
2689 rhs[2] = rhs[0];
2690 rhs[3] = rhs[1];
2691 for (i = 0; i < 2; i++)
2692 if (TREE_CODE (rhs[i]) == SSA_NAME)
2694 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2695 if (rhs[2 + i] == NULL_TREE)
2697 if (has_single_use (rhs[i]))
2698 rhs[2 + i] = ops[(*pidx)++]->op;
2699 else
2700 rhs[2 + i] = rhs[i];
2703 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
2704 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
2706 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2707 var = make_ssa_name (TREE_TYPE (var), NULL);
2708 gimple g = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
2709 var, rhs[2], rhs[3]);
2710 gimple_set_uid (g, gimple_uid (stmt));
2711 gimple_set_visited (g, true);
2712 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2714 return var;
2717 /* Structure to track the initial value passed to get_ops and
2718 the range in the ops vector for each basic block. */
2720 struct inter_bb_range_test_entry
2722 tree op;
2723 unsigned int first_idx, last_idx;
2726 /* Inter-bb range test optimization. */
2728 static void
2729 maybe_optimize_range_tests (gimple stmt)
2731 basic_block first_bb = gimple_bb (stmt);
2732 basic_block last_bb = first_bb;
2733 basic_block other_bb = NULL;
2734 basic_block bb;
2735 edge_iterator ei;
2736 edge e;
2737 auto_vec<operand_entry_t> ops;
2738 auto_vec<inter_bb_range_test_entry> bbinfo;
2739 bool any_changes = false;
2741 /* Consider only basic blocks that end with GIMPLE_COND or
2742 a cast statement satisfying final_range_test_p. All
2743 but the last bb in the first_bb .. last_bb range
2744 should end with GIMPLE_COND. */
2745 if (gimple_code (stmt) == GIMPLE_COND)
2747 if (EDGE_COUNT (first_bb->succs) != 2)
2748 return;
2750 else if (final_range_test_p (stmt))
2751 other_bb = single_succ (first_bb);
2752 else
2753 return;
2755 if (stmt_could_throw_p (stmt))
2756 return;
2758 /* As relative ordering of post-dominator sons isn't fixed,
2759 maybe_optimize_range_tests can be called first on any
2760 bb in the range we want to optimize. So, start searching
2761 backwards, if first_bb can be set to a predecessor. */
2762 while (single_pred_p (first_bb))
2764 basic_block pred_bb = single_pred (first_bb);
2765 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
2766 break;
2767 if (!no_side_effect_bb (first_bb))
2768 break;
2769 first_bb = pred_bb;
2771 /* If first_bb is last_bb, other_bb hasn't been computed yet.
2772 Before starting forward search in last_bb successors, find
2773 out the other_bb. */
2774 if (first_bb == last_bb)
2776 other_bb = NULL;
2777 /* As non-GIMPLE_COND last stmt always terminates the range,
2778 if forward search didn't discover anything, just give up. */
2779 if (gimple_code (stmt) != GIMPLE_COND)
2780 return;
2781 /* Look at both successors. Either it ends with a GIMPLE_COND
2782 and satisfies suitable_cond_bb, or ends with a cast and
2783 other_bb is that cast's successor. */
2784 FOR_EACH_EDGE (e, ei, first_bb->succs)
2785 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
2786 || e->dest == first_bb)
2787 return;
2788 else if (single_pred_p (e->dest))
2790 stmt = last_stmt (e->dest);
2791 if (stmt
2792 && gimple_code (stmt) == GIMPLE_COND
2793 && EDGE_COUNT (e->dest->succs) == 2)
2795 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
2796 break;
2797 else
2798 other_bb = NULL;
2800 else if (stmt
2801 && final_range_test_p (stmt)
2802 && find_edge (first_bb, single_succ (e->dest)))
2804 other_bb = single_succ (e->dest);
2805 if (other_bb == first_bb)
2806 other_bb = NULL;
2809 if (other_bb == NULL)
2810 return;
2812 /* Now do the forward search, moving last_bb to successor bbs
2813 that aren't other_bb. */
2814 while (EDGE_COUNT (last_bb->succs) == 2)
2816 FOR_EACH_EDGE (e, ei, last_bb->succs)
2817 if (e->dest != other_bb)
2818 break;
2819 if (e == NULL)
2820 break;
2821 if (!single_pred_p (e->dest))
2822 break;
2823 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
2824 break;
2825 if (!no_side_effect_bb (e->dest))
2826 break;
2827 last_bb = e->dest;
2829 if (first_bb == last_bb)
2830 return;
2831 /* Here basic blocks first_bb through last_bb's predecessor
2832 end with GIMPLE_COND, all of them have one of the edges to
2833 other_bb and another to another block in the range,
2834 all blocks except first_bb don't have side-effects and
2835 last_bb ends with either GIMPLE_COND, or cast satisfying
2836 final_range_test_p. */
2837 for (bb = last_bb; ; bb = single_pred (bb))
2839 enum tree_code code;
2840 tree lhs, rhs;
2841 inter_bb_range_test_entry bb_ent;
2843 bb_ent.op = NULL_TREE;
2844 bb_ent.first_idx = ops.length ();
2845 bb_ent.last_idx = bb_ent.first_idx;
2846 e = find_edge (bb, other_bb);
2847 stmt = last_stmt (bb);
2848 gimple_set_visited (stmt, true);
2849 if (gimple_code (stmt) != GIMPLE_COND)
2851 use_operand_p use_p;
2852 gimple phi;
2853 edge e2;
2854 unsigned int d;
2856 lhs = gimple_assign_lhs (stmt);
2857 rhs = gimple_assign_rhs1 (stmt);
2858 gcc_assert (bb == last_bb);
2860 /* stmt is
2861 _123 = (int) _234;
2863 followed by:
2864 <bb M>:
2865 # _345 = PHI <_123(N), 1(...), 1(...)>
2867 or 0 instead of 1. If it is 0, the _234
2868 range test is anded together with all the
2869 other range tests, if it is 1, it is ored with
2870 them. */
2871 single_imm_use (lhs, &use_p, &phi);
2872 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
2873 e2 = find_edge (first_bb, other_bb);
2874 d = e2->dest_idx;
2875 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
2876 if (integer_zerop (gimple_phi_arg_def (phi, d)))
2877 code = BIT_AND_EXPR;
2878 else
2880 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
2881 code = BIT_IOR_EXPR;
2884 /* If _234 SSA_NAME_DEF_STMT is
2885 _234 = _567 | _789;
2886 (or &, corresponding to 1/0 in the phi arguments,
2887 push into ops the individual range test arguments
2888 of the bitwise or resp. and, recursively. */
2889 if (!get_ops (rhs, code, &ops,
2890 loop_containing_stmt (stmt))
2891 && has_single_use (rhs))
2893 /* Otherwise, push the _234 range test itself. */
2894 operand_entry_t oe
2895 = (operand_entry_t) pool_alloc (operand_entry_pool);
2897 oe->op = rhs;
2898 oe->rank = code;
2899 oe->id = 0;
2900 oe->count = 1;
2901 ops.safe_push (oe);
2902 bb_ent.last_idx++;
2904 else
2905 bb_ent.last_idx = ops.length ();
2906 bb_ent.op = rhs;
2907 bbinfo.safe_push (bb_ent);
2908 continue;
2910 /* Otherwise stmt is GIMPLE_COND. */
2911 code = gimple_cond_code (stmt);
2912 lhs = gimple_cond_lhs (stmt);
2913 rhs = gimple_cond_rhs (stmt);
2914 if (TREE_CODE (lhs) == SSA_NAME
2915 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2916 && ((code != EQ_EXPR && code != NE_EXPR)
2917 || rhs != boolean_false_node
2918 /* Either push into ops the individual bitwise
2919 or resp. and operands, depending on which
2920 edge is other_bb. */
2921 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
2922 ^ (code == EQ_EXPR))
2923 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
2924 loop_containing_stmt (stmt))))
2926 /* Or push the GIMPLE_COND stmt itself. */
2927 operand_entry_t oe
2928 = (operand_entry_t) pool_alloc (operand_entry_pool);
2930 oe->op = NULL;
2931 oe->rank = (e->flags & EDGE_TRUE_VALUE)
2932 ? BIT_IOR_EXPR : BIT_AND_EXPR;
2933 /* oe->op = NULL signs that there is no SSA_NAME
2934 for the range test, and oe->id instead is the
2935 basic block number, at which's end the GIMPLE_COND
2936 is. */
2937 oe->id = bb->index;
2938 oe->count = 1;
2939 ops.safe_push (oe);
2940 bb_ent.op = NULL;
2941 bb_ent.last_idx++;
2943 else if (ops.length () > bb_ent.first_idx)
2945 bb_ent.op = lhs;
2946 bb_ent.last_idx = ops.length ();
2948 bbinfo.safe_push (bb_ent);
2949 if (bb == first_bb)
2950 break;
2952 if (ops.length () > 1)
2953 any_changes = optimize_range_tests (ERROR_MARK, &ops);
2954 if (any_changes)
2956 unsigned int idx;
2957 /* update_ops relies on has_single_use predicates returning the
2958 same values as it did during get_ops earlier. Additionally it
2959 never removes statements, only adds new ones and it should walk
2960 from the single imm use and check the predicate already before
2961 making those changes.
2962 On the other side, the handling of GIMPLE_COND directly can turn
2963 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
2964 it needs to be done in a separate loop afterwards. */
2965 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
2967 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
2968 && bbinfo[idx].op != NULL_TREE)
2970 tree new_op;
2972 stmt = last_stmt (bb);
2973 new_op = update_ops (bbinfo[idx].op,
2974 (enum tree_code)
2975 ops[bbinfo[idx].first_idx]->rank,
2976 ops, &bbinfo[idx].first_idx,
2977 loop_containing_stmt (stmt));
2978 if (new_op == NULL_TREE)
2980 gcc_assert (bb == last_bb);
2981 new_op = ops[bbinfo[idx].first_idx++]->op;
2983 if (bbinfo[idx].op != new_op)
2985 imm_use_iterator iter;
2986 use_operand_p use_p;
2987 gimple use_stmt, cast_stmt = NULL;
2989 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
2990 if (is_gimple_debug (use_stmt))
2991 continue;
2992 else if (gimple_code (use_stmt) == GIMPLE_COND
2993 || gimple_code (use_stmt) == GIMPLE_PHI)
2994 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2995 SET_USE (use_p, new_op);
2996 else if (gimple_assign_cast_p (use_stmt))
2997 cast_stmt = use_stmt;
2998 else
2999 gcc_unreachable ();
3000 if (cast_stmt)
3002 gcc_assert (bb == last_bb);
3003 tree lhs = gimple_assign_lhs (cast_stmt);
3004 tree new_lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3005 enum tree_code rhs_code
3006 = gimple_assign_rhs_code (cast_stmt);
3007 gimple g;
3008 if (is_gimple_min_invariant (new_op))
3010 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3011 g = gimple_build_assign (new_lhs, new_op);
3013 else
3014 g = gimple_build_assign_with_ops (rhs_code, new_lhs,
3015 new_op, NULL_TREE);
3016 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3017 gimple_set_uid (g, gimple_uid (cast_stmt));
3018 gimple_set_visited (g, true);
3019 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3020 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3021 if (is_gimple_debug (use_stmt))
3022 continue;
3023 else if (gimple_code (use_stmt) == GIMPLE_COND
3024 || gimple_code (use_stmt) == GIMPLE_PHI)
3025 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3026 SET_USE (use_p, new_lhs);
3027 else
3028 gcc_unreachable ();
3032 if (bb == first_bb)
3033 break;
3035 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3037 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3038 && bbinfo[idx].op == NULL_TREE
3039 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3041 stmt = last_stmt (bb);
3042 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3043 gimple_cond_make_false (stmt);
3044 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3045 gimple_cond_make_true (stmt);
3046 else
3048 gimple_cond_set_code (stmt, NE_EXPR);
3049 gimple_cond_set_lhs (stmt, ops[bbinfo[idx].first_idx]->op);
3050 gimple_cond_set_rhs (stmt, boolean_false_node);
3052 update_stmt (stmt);
3054 if (bb == first_bb)
3055 break;
3060 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3061 of STMT in it's operands. This is also known as a "destructive
3062 update" operation. */
3064 static bool
3065 is_phi_for_stmt (gimple stmt, tree operand)
3067 gimple def_stmt;
3068 tree lhs;
3069 use_operand_p arg_p;
3070 ssa_op_iter i;
3072 if (TREE_CODE (operand) != SSA_NAME)
3073 return false;
3075 lhs = gimple_assign_lhs (stmt);
3077 def_stmt = SSA_NAME_DEF_STMT (operand);
3078 if (gimple_code (def_stmt) != GIMPLE_PHI)
3079 return false;
3081 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
3082 if (lhs == USE_FROM_PTR (arg_p))
3083 return true;
3084 return false;
3087 /* Remove def stmt of VAR if VAR has zero uses and recurse
3088 on rhs1 operand if so. */
3090 static void
3091 remove_visited_stmt_chain (tree var)
3093 gimple stmt;
3094 gimple_stmt_iterator gsi;
3096 while (1)
3098 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3099 return;
3100 stmt = SSA_NAME_DEF_STMT (var);
3101 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3103 var = gimple_assign_rhs1 (stmt);
3104 gsi = gsi_for_stmt (stmt);
3105 reassoc_remove_stmt (&gsi);
3106 release_defs (stmt);
3108 else
3109 return;
3113 /* This function checks three consequtive operands in
3114 passed operands vector OPS starting from OPINDEX and
3115 swaps two operands if it is profitable for binary operation
3116 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3118 We pair ops with the same rank if possible.
3120 The alternative we try is to see if STMT is a destructive
3121 update style statement, which is like:
3122 b = phi (a, ...)
3123 a = c + b;
3124 In that case, we want to use the destructive update form to
3125 expose the possible vectorizer sum reduction opportunity.
3126 In that case, the third operand will be the phi node. This
3127 check is not performed if STMT is null.
3129 We could, of course, try to be better as noted above, and do a
3130 lot of work to try to find these opportunities in >3 operand
3131 cases, but it is unlikely to be worth it. */
3133 static void
3134 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3135 unsigned int opindex, gimple stmt)
3137 operand_entry_t oe1, oe2, oe3;
3139 oe1 = ops[opindex];
3140 oe2 = ops[opindex + 1];
3141 oe3 = ops[opindex + 2];
3143 if ((oe1->rank == oe2->rank
3144 && oe2->rank != oe3->rank)
3145 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3146 && !is_phi_for_stmt (stmt, oe1->op)
3147 && !is_phi_for_stmt (stmt, oe2->op)))
3149 struct operand_entry temp = *oe3;
3150 oe3->op = oe1->op;
3151 oe3->rank = oe1->rank;
3152 oe1->op = temp.op;
3153 oe1->rank= temp.rank;
3155 else if ((oe1->rank == oe3->rank
3156 && oe2->rank != oe3->rank)
3157 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3158 && !is_phi_for_stmt (stmt, oe1->op)
3159 && !is_phi_for_stmt (stmt, oe3->op)))
3161 struct operand_entry temp = *oe2;
3162 oe2->op = oe1->op;
3163 oe2->rank = oe1->rank;
3164 oe1->op = temp.op;
3165 oe1->rank = temp.rank;
3169 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3170 two definitions, otherwise return STMT. */
3172 static inline gimple
3173 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3175 if (TREE_CODE (rhs1) == SSA_NAME
3176 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3177 stmt = SSA_NAME_DEF_STMT (rhs1);
3178 if (TREE_CODE (rhs2) == SSA_NAME
3179 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3180 stmt = SSA_NAME_DEF_STMT (rhs2);
3181 return stmt;
3184 /* Recursively rewrite our linearized statements so that the operators
3185 match those in OPS[OPINDEX], putting the computation in rank
3186 order. Return new lhs. */
3188 static tree
3189 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3190 vec<operand_entry_t> ops, bool changed)
3192 tree rhs1 = gimple_assign_rhs1 (stmt);
3193 tree rhs2 = gimple_assign_rhs2 (stmt);
3194 tree lhs = gimple_assign_lhs (stmt);
3195 operand_entry_t oe;
3197 /* The final recursion case for this function is that you have
3198 exactly two operations left.
3199 If we had one exactly one op in the entire list to start with, we
3200 would have never called this function, and the tail recursion
3201 rewrites them one at a time. */
3202 if (opindex + 2 == ops.length ())
3204 operand_entry_t oe1, oe2;
3206 oe1 = ops[opindex];
3207 oe2 = ops[opindex + 1];
3209 if (rhs1 != oe1->op || rhs2 != oe2->op)
3211 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3212 unsigned int uid = gimple_uid (stmt);
3214 if (dump_file && (dump_flags & TDF_DETAILS))
3216 fprintf (dump_file, "Transforming ");
3217 print_gimple_stmt (dump_file, stmt, 0, 0);
3220 if (changed)
3222 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3223 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3224 stmt
3225 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3226 lhs, oe1->op, oe2->op);
3227 gimple_set_uid (stmt, uid);
3228 gimple_set_visited (stmt, true);
3229 if (insert_point == gsi_stmt (gsi))
3230 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3231 else
3232 insert_stmt_after (stmt, insert_point);
3234 else
3236 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3237 == stmt);
3238 gimple_assign_set_rhs1 (stmt, oe1->op);
3239 gimple_assign_set_rhs2 (stmt, oe2->op);
3240 update_stmt (stmt);
3243 if (rhs1 != oe1->op && rhs1 != oe2->op)
3244 remove_visited_stmt_chain (rhs1);
3246 if (dump_file && (dump_flags & TDF_DETAILS))
3248 fprintf (dump_file, " into ");
3249 print_gimple_stmt (dump_file, stmt, 0, 0);
3252 return lhs;
3255 /* If we hit here, we should have 3 or more ops left. */
3256 gcc_assert (opindex + 2 < ops.length ());
3258 /* Rewrite the next operator. */
3259 oe = ops[opindex];
3261 /* Recurse on the LHS of the binary operator, which is guaranteed to
3262 be the non-leaf side. */
3263 tree new_rhs1
3264 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3265 changed || oe->op != rhs2);
3267 if (oe->op != rhs2 || new_rhs1 != rhs1)
3269 if (dump_file && (dump_flags & TDF_DETAILS))
3271 fprintf (dump_file, "Transforming ");
3272 print_gimple_stmt (dump_file, stmt, 0, 0);
3275 /* If changed is false, this is either opindex == 0
3276 or all outer rhs2's were equal to corresponding oe->op,
3277 and powi_result is NULL.
3278 That means lhs is equivalent before and after reassociation.
3279 Otherwise ensure the old lhs SSA_NAME is not reused and
3280 create a new stmt as well, so that any debug stmts will be
3281 properly adjusted. */
3282 if (changed)
3284 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3285 unsigned int uid = gimple_uid (stmt);
3286 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3288 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3289 stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt),
3290 lhs, new_rhs1, oe->op);
3291 gimple_set_uid (stmt, uid);
3292 gimple_set_visited (stmt, true);
3293 if (insert_point == gsi_stmt (gsi))
3294 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3295 else
3296 insert_stmt_after (stmt, insert_point);
3298 else
3300 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3301 == stmt);
3302 gimple_assign_set_rhs1 (stmt, new_rhs1);
3303 gimple_assign_set_rhs2 (stmt, oe->op);
3304 update_stmt (stmt);
3307 if (dump_file && (dump_flags & TDF_DETAILS))
3309 fprintf (dump_file, " into ");
3310 print_gimple_stmt (dump_file, stmt, 0, 0);
3313 return lhs;
3316 /* Find out how many cycles we need to compute statements chain.
3317 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3318 maximum number of independent statements we may execute per cycle. */
3320 static int
3321 get_required_cycles (int ops_num, int cpu_width)
3323 int res;
3324 int elog;
3325 unsigned int rest;
3327 /* While we have more than 2 * cpu_width operands
3328 we may reduce number of operands by cpu_width
3329 per cycle. */
3330 res = ops_num / (2 * cpu_width);
3332 /* Remained operands count may be reduced twice per cycle
3333 until we have only one operand. */
3334 rest = (unsigned)(ops_num - res * cpu_width);
3335 elog = exact_log2 (rest);
3336 if (elog >= 0)
3337 res += elog;
3338 else
3339 res += floor_log2 (rest) + 1;
3341 return res;
3344 /* Returns an optimal number of registers to use for computation of
3345 given statements. */
3347 static int
3348 get_reassociation_width (int ops_num, enum tree_code opc,
3349 enum machine_mode mode)
3351 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3352 int width;
3353 int width_min;
3354 int cycles_best;
3356 if (param_width > 0)
3357 width = param_width;
3358 else
3359 width = targetm.sched.reassociation_width (opc, mode);
3361 if (width == 1)
3362 return width;
3364 /* Get the minimal time required for sequence computation. */
3365 cycles_best = get_required_cycles (ops_num, width);
3367 /* Check if we may use less width and still compute sequence for
3368 the same time. It will allow us to reduce registers usage.
3369 get_required_cycles is monotonically increasing with lower width
3370 so we can perform a binary search for the minimal width that still
3371 results in the optimal cycle count. */
3372 width_min = 1;
3373 while (width > width_min)
3375 int width_mid = (width + width_min) / 2;
3377 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3378 width = width_mid;
3379 else if (width_min < width_mid)
3380 width_min = width_mid;
3381 else
3382 break;
3385 return width;
3388 /* Recursively rewrite our linearized statements so that the operators
3389 match those in OPS[OPINDEX], putting the computation in rank
3390 order and trying to allow operations to be executed in
3391 parallel. */
3393 static void
3394 rewrite_expr_tree_parallel (gimple stmt, int width,
3395 vec<operand_entry_t> ops)
3397 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3398 int op_num = ops.length ();
3399 int stmt_num = op_num - 1;
3400 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3401 int op_index = op_num - 1;
3402 int stmt_index = 0;
3403 int ready_stmts_end = 0;
3404 int i = 0;
3405 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3407 /* We start expression rewriting from the top statements.
3408 So, in this loop we create a full list of statements
3409 we will work with. */
3410 stmts[stmt_num - 1] = stmt;
3411 for (i = stmt_num - 2; i >= 0; i--)
3412 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3414 for (i = 0; i < stmt_num; i++)
3416 tree op1, op2;
3418 /* Determine whether we should use results of
3419 already handled statements or not. */
3420 if (ready_stmts_end == 0
3421 && (i - stmt_index >= width || op_index < 1))
3422 ready_stmts_end = i;
3424 /* Now we choose operands for the next statement. Non zero
3425 value in ready_stmts_end means here that we should use
3426 the result of already generated statements as new operand. */
3427 if (ready_stmts_end > 0)
3429 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3430 if (ready_stmts_end > stmt_index)
3431 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3432 else if (op_index >= 0)
3433 op2 = ops[op_index--]->op;
3434 else
3436 gcc_assert (stmt_index < i);
3437 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3440 if (stmt_index >= ready_stmts_end)
3441 ready_stmts_end = 0;
3443 else
3445 if (op_index > 1)
3446 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3447 op2 = ops[op_index--]->op;
3448 op1 = ops[op_index--]->op;
3451 /* If we emit the last statement then we should put
3452 operands into the last statement. It will also
3453 break the loop. */
3454 if (op_index < 0 && stmt_index == i)
3455 i = stmt_num - 1;
3457 if (dump_file && (dump_flags & TDF_DETAILS))
3459 fprintf (dump_file, "Transforming ");
3460 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3463 /* We keep original statement only for the last one. All
3464 others are recreated. */
3465 if (i == stmt_num - 1)
3467 gimple_assign_set_rhs1 (stmts[i], op1);
3468 gimple_assign_set_rhs2 (stmts[i], op2);
3469 update_stmt (stmts[i]);
3471 else
3472 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3474 if (dump_file && (dump_flags & TDF_DETAILS))
3476 fprintf (dump_file, " into ");
3477 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3481 remove_visited_stmt_chain (last_rhs1);
3484 /* Transform STMT, which is really (A +B) + (C + D) into the left
3485 linear form, ((A+B)+C)+D.
3486 Recurse on D if necessary. */
3488 static void
3489 linearize_expr (gimple stmt)
3491 gimple_stmt_iterator gsi;
3492 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3493 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3494 gimple oldbinrhs = binrhs;
3495 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3496 gimple newbinrhs = NULL;
3497 struct loop *loop = loop_containing_stmt (stmt);
3498 tree lhs = gimple_assign_lhs (stmt);
3500 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3501 && is_reassociable_op (binrhs, rhscode, loop));
3503 gsi = gsi_for_stmt (stmt);
3505 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3506 binrhs = gimple_build_assign_with_ops (gimple_assign_rhs_code (binrhs),
3507 make_ssa_name (TREE_TYPE (lhs), NULL),
3508 gimple_assign_lhs (binlhs),
3509 gimple_assign_rhs2 (binrhs));
3510 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3511 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3512 gimple_set_uid (binrhs, gimple_uid (stmt));
3514 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3515 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3517 if (dump_file && (dump_flags & TDF_DETAILS))
3519 fprintf (dump_file, "Linearized: ");
3520 print_gimple_stmt (dump_file, stmt, 0, 0);
3523 reassociate_stats.linearized++;
3524 update_stmt (stmt);
3526 gsi = gsi_for_stmt (oldbinrhs);
3527 reassoc_remove_stmt (&gsi);
3528 release_defs (oldbinrhs);
3530 gimple_set_visited (stmt, true);
3531 gimple_set_visited (binlhs, true);
3532 gimple_set_visited (binrhs, true);
3534 /* Tail recurse on the new rhs if it still needs reassociation. */
3535 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3536 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3537 want to change the algorithm while converting to tuples. */
3538 linearize_expr (stmt);
3541 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3542 it. Otherwise, return NULL. */
3544 static gimple
3545 get_single_immediate_use (tree lhs)
3547 use_operand_p immuse;
3548 gimple immusestmt;
3550 if (TREE_CODE (lhs) == SSA_NAME
3551 && single_imm_use (lhs, &immuse, &immusestmt)
3552 && is_gimple_assign (immusestmt))
3553 return immusestmt;
3555 return NULL;
3558 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3559 representing the negated value. Insertions of any necessary
3560 instructions go before GSI.
3561 This function is recursive in that, if you hand it "a_5" as the
3562 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3563 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3565 static tree
3566 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3568 gimple negatedefstmt = NULL;
3569 tree resultofnegate;
3570 gimple_stmt_iterator gsi;
3571 unsigned int uid;
3573 /* If we are trying to negate a name, defined by an add, negate the
3574 add operands instead. */
3575 if (TREE_CODE (tonegate) == SSA_NAME)
3576 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3577 if (TREE_CODE (tonegate) == SSA_NAME
3578 && is_gimple_assign (negatedefstmt)
3579 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3580 && has_single_use (gimple_assign_lhs (negatedefstmt))
3581 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3583 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3584 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3585 tree lhs = gimple_assign_lhs (negatedefstmt);
3586 gimple g;
3588 gsi = gsi_for_stmt (negatedefstmt);
3589 rhs1 = negate_value (rhs1, &gsi);
3591 gsi = gsi_for_stmt (negatedefstmt);
3592 rhs2 = negate_value (rhs2, &gsi);
3594 gsi = gsi_for_stmt (negatedefstmt);
3595 lhs = make_ssa_name (TREE_TYPE (lhs), NULL);
3596 gimple_set_visited (negatedefstmt, true);
3597 g = gimple_build_assign_with_ops (PLUS_EXPR, lhs, rhs1, rhs2);
3598 gimple_set_uid (g, gimple_uid (negatedefstmt));
3599 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3600 return lhs;
3603 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3604 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3605 NULL_TREE, true, GSI_SAME_STMT);
3606 gsi = *gsip;
3607 uid = gimple_uid (gsi_stmt (gsi));
3608 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3610 gimple stmt = gsi_stmt (gsi);
3611 if (gimple_uid (stmt) != 0)
3612 break;
3613 gimple_set_uid (stmt, uid);
3615 return resultofnegate;
3618 /* Return true if we should break up the subtract in STMT into an add
3619 with negate. This is true when we the subtract operands are really
3620 adds, or the subtract itself is used in an add expression. In
3621 either case, breaking up the subtract into an add with negate
3622 exposes the adds to reassociation. */
3624 static bool
3625 should_break_up_subtract (gimple stmt)
3627 tree lhs = gimple_assign_lhs (stmt);
3628 tree binlhs = gimple_assign_rhs1 (stmt);
3629 tree binrhs = gimple_assign_rhs2 (stmt);
3630 gimple immusestmt;
3631 struct loop *loop = loop_containing_stmt (stmt);
3633 if (TREE_CODE (binlhs) == SSA_NAME
3634 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3635 return true;
3637 if (TREE_CODE (binrhs) == SSA_NAME
3638 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3639 return true;
3641 if (TREE_CODE (lhs) == SSA_NAME
3642 && (immusestmt = get_single_immediate_use (lhs))
3643 && is_gimple_assign (immusestmt)
3644 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3645 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3646 return true;
3647 return false;
3650 /* Transform STMT from A - B into A + -B. */
3652 static void
3653 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3655 tree rhs1 = gimple_assign_rhs1 (stmt);
3656 tree rhs2 = gimple_assign_rhs2 (stmt);
3658 if (dump_file && (dump_flags & TDF_DETAILS))
3660 fprintf (dump_file, "Breaking up subtract ");
3661 print_gimple_stmt (dump_file, stmt, 0, 0);
3664 rhs2 = negate_value (rhs2, gsip);
3665 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3666 update_stmt (stmt);
3669 /* Determine whether STMT is a builtin call that raises an SSA name
3670 to an integer power and has only one use. If so, and this is early
3671 reassociation and unsafe math optimizations are permitted, place
3672 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3673 If any of these conditions does not hold, return FALSE. */
3675 static bool
3676 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3678 tree fndecl, arg1;
3679 REAL_VALUE_TYPE c, cint;
3681 if (!first_pass_instance
3682 || !flag_unsafe_math_optimizations
3683 || !is_gimple_call (stmt)
3684 || !has_single_use (gimple_call_lhs (stmt)))
3685 return false;
3687 fndecl = gimple_call_fndecl (stmt);
3689 if (!fndecl
3690 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3691 return false;
3693 switch (DECL_FUNCTION_CODE (fndecl))
3695 CASE_FLT_FN (BUILT_IN_POW):
3696 *base = gimple_call_arg (stmt, 0);
3697 arg1 = gimple_call_arg (stmt, 1);
3699 if (TREE_CODE (arg1) != REAL_CST)
3700 return false;
3702 c = TREE_REAL_CST (arg1);
3704 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
3705 return false;
3707 *exponent = real_to_integer (&c);
3708 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
3709 if (!real_identical (&c, &cint))
3710 return false;
3712 break;
3714 CASE_FLT_FN (BUILT_IN_POWI):
3715 *base = gimple_call_arg (stmt, 0);
3716 arg1 = gimple_call_arg (stmt, 1);
3718 if (!tree_fits_shwi_p (arg1))
3719 return false;
3721 *exponent = tree_to_shwi (arg1);
3722 break;
3724 default:
3725 return false;
3728 /* Expanding negative exponents is generally unproductive, so we don't
3729 complicate matters with those. Exponents of zero and one should
3730 have been handled by expression folding. */
3731 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
3732 return false;
3734 return true;
3737 /* Recursively linearize a binary expression that is the RHS of STMT.
3738 Place the operands of the expression tree in the vector named OPS. */
3740 static void
3741 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
3742 bool is_associative, bool set_visited)
3744 tree binlhs = gimple_assign_rhs1 (stmt);
3745 tree binrhs = gimple_assign_rhs2 (stmt);
3746 gimple binlhsdef = NULL, binrhsdef = NULL;
3747 bool binlhsisreassoc = false;
3748 bool binrhsisreassoc = false;
3749 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3750 struct loop *loop = loop_containing_stmt (stmt);
3751 tree base = NULL_TREE;
3752 HOST_WIDE_INT exponent = 0;
3754 if (set_visited)
3755 gimple_set_visited (stmt, true);
3757 if (TREE_CODE (binlhs) == SSA_NAME)
3759 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
3760 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
3761 && !stmt_could_throw_p (binlhsdef));
3764 if (TREE_CODE (binrhs) == SSA_NAME)
3766 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
3767 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
3768 && !stmt_could_throw_p (binrhsdef));
3771 /* If the LHS is not reassociable, but the RHS is, we need to swap
3772 them. If neither is reassociable, there is nothing we can do, so
3773 just put them in the ops vector. If the LHS is reassociable,
3774 linearize it. If both are reassociable, then linearize the RHS
3775 and the LHS. */
3777 if (!binlhsisreassoc)
3779 tree temp;
3781 /* If this is not a associative operation like division, give up. */
3782 if (!is_associative)
3784 add_to_ops_vec (ops, binrhs);
3785 return;
3788 if (!binrhsisreassoc)
3790 if (rhscode == MULT_EXPR
3791 && TREE_CODE (binrhs) == SSA_NAME
3792 && acceptable_pow_call (binrhsdef, &base, &exponent))
3794 add_repeat_to_ops_vec (ops, base, exponent);
3795 gimple_set_visited (binrhsdef, true);
3797 else
3798 add_to_ops_vec (ops, binrhs);
3800 if (rhscode == MULT_EXPR
3801 && TREE_CODE (binlhs) == SSA_NAME
3802 && acceptable_pow_call (binlhsdef, &base, &exponent))
3804 add_repeat_to_ops_vec (ops, base, exponent);
3805 gimple_set_visited (binlhsdef, true);
3807 else
3808 add_to_ops_vec (ops, binlhs);
3810 return;
3813 if (dump_file && (dump_flags & TDF_DETAILS))
3815 fprintf (dump_file, "swapping operands of ");
3816 print_gimple_stmt (dump_file, stmt, 0, 0);
3819 swap_ssa_operands (stmt,
3820 gimple_assign_rhs1_ptr (stmt),
3821 gimple_assign_rhs2_ptr (stmt));
3822 update_stmt (stmt);
3824 if (dump_file && (dump_flags & TDF_DETAILS))
3826 fprintf (dump_file, " is now ");
3827 print_gimple_stmt (dump_file, stmt, 0, 0);
3830 /* We want to make it so the lhs is always the reassociative op,
3831 so swap. */
3832 temp = binlhs;
3833 binlhs = binrhs;
3834 binrhs = temp;
3836 else if (binrhsisreassoc)
3838 linearize_expr (stmt);
3839 binlhs = gimple_assign_rhs1 (stmt);
3840 binrhs = gimple_assign_rhs2 (stmt);
3843 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
3844 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
3845 rhscode, loop));
3846 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
3847 is_associative, set_visited);
3849 if (rhscode == MULT_EXPR
3850 && TREE_CODE (binrhs) == SSA_NAME
3851 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
3853 add_repeat_to_ops_vec (ops, base, exponent);
3854 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
3856 else
3857 add_to_ops_vec (ops, binrhs);
3860 /* Repropagate the negates back into subtracts, since no other pass
3861 currently does it. */
3863 static void
3864 repropagate_negates (void)
3866 unsigned int i = 0;
3867 tree negate;
3869 FOR_EACH_VEC_ELT (plus_negates, i, negate)
3871 gimple user = get_single_immediate_use (negate);
3873 if (!user || !is_gimple_assign (user))
3874 continue;
3876 /* The negate operand can be either operand of a PLUS_EXPR
3877 (it can be the LHS if the RHS is a constant for example).
3879 Force the negate operand to the RHS of the PLUS_EXPR, then
3880 transform the PLUS_EXPR into a MINUS_EXPR. */
3881 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
3883 /* If the negated operand appears on the LHS of the
3884 PLUS_EXPR, exchange the operands of the PLUS_EXPR
3885 to force the negated operand to the RHS of the PLUS_EXPR. */
3886 if (gimple_assign_rhs1 (user) == negate)
3888 swap_ssa_operands (user,
3889 gimple_assign_rhs1_ptr (user),
3890 gimple_assign_rhs2_ptr (user));
3893 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
3894 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
3895 if (gimple_assign_rhs2 (user) == negate)
3897 tree rhs1 = gimple_assign_rhs1 (user);
3898 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
3899 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3900 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
3901 update_stmt (user);
3904 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
3906 if (gimple_assign_rhs1 (user) == negate)
3908 /* We have
3909 x = -a
3910 y = x - b
3911 which we transform into
3912 x = a + b
3913 y = -x .
3914 This pushes down the negate which we possibly can merge
3915 into some other operation, hence insert it into the
3916 plus_negates vector. */
3917 gimple feed = SSA_NAME_DEF_STMT (negate);
3918 tree a = gimple_assign_rhs1 (feed);
3919 tree b = gimple_assign_rhs2 (user);
3920 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
3921 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
3922 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)), NULL);
3923 gimple g = gimple_build_assign_with_ops (PLUS_EXPR, x, a, b);
3924 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3925 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x, NULL);
3926 user = gsi_stmt (gsi2);
3927 update_stmt (user);
3928 reassoc_remove_stmt (&gsi);
3929 release_defs (feed);
3930 plus_negates.safe_push (gimple_assign_lhs (user));
3932 else
3934 /* Transform "x = -a; y = b - x" into "y = b + a", getting
3935 rid of one operation. */
3936 gimple feed = SSA_NAME_DEF_STMT (negate);
3937 tree a = gimple_assign_rhs1 (feed);
3938 tree rhs1 = gimple_assign_rhs1 (user);
3939 gimple_stmt_iterator gsi = gsi_for_stmt (user);
3940 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
3941 update_stmt (gsi_stmt (gsi));
3947 /* Returns true if OP is of a type for which we can do reassociation.
3948 That is for integral or non-saturating fixed-point types, and for
3949 floating point type when associative-math is enabled. */
3951 static bool
3952 can_reassociate_p (tree op)
3954 tree type = TREE_TYPE (op);
3955 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
3956 || NON_SAT_FIXED_POINT_TYPE_P (type)
3957 || (flag_associative_math && FLOAT_TYPE_P (type)))
3958 return true;
3959 return false;
3962 /* Break up subtract operations in block BB.
3964 We do this top down because we don't know whether the subtract is
3965 part of a possible chain of reassociation except at the top.
3967 IE given
3968 d = f + g
3969 c = a + e
3970 b = c - d
3971 q = b - r
3972 k = t - q
3974 we want to break up k = t - q, but we won't until we've transformed q
3975 = b - r, which won't be broken up until we transform b = c - d.
3977 En passant, clear the GIMPLE visited flag on every statement
3978 and set UIDs within each basic block. */
3980 static void
3981 break_up_subtract_bb (basic_block bb)
3983 gimple_stmt_iterator gsi;
3984 basic_block son;
3985 unsigned int uid = 1;
3987 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3989 gimple stmt = gsi_stmt (gsi);
3990 gimple_set_visited (stmt, false);
3991 gimple_set_uid (stmt, uid++);
3993 if (!is_gimple_assign (stmt)
3994 || !can_reassociate_p (gimple_assign_lhs (stmt)))
3995 continue;
3997 /* Look for simple gimple subtract operations. */
3998 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4000 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4001 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4002 continue;
4004 /* Check for a subtract used only in an addition. If this
4005 is the case, transform it into add of a negate for better
4006 reassociation. IE transform C = A-B into C = A + -B if C
4007 is only used in an addition. */
4008 if (should_break_up_subtract (stmt))
4009 break_up_subtract (stmt, &gsi);
4011 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4012 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4013 plus_negates.safe_push (gimple_assign_lhs (stmt));
4015 for (son = first_dom_son (CDI_DOMINATORS, bb);
4016 son;
4017 son = next_dom_son (CDI_DOMINATORS, son))
4018 break_up_subtract_bb (son);
4021 /* Used for repeated factor analysis. */
4022 struct repeat_factor_d
4024 /* An SSA name that occurs in a multiply chain. */
4025 tree factor;
4027 /* Cached rank of the factor. */
4028 unsigned rank;
4030 /* Number of occurrences of the factor in the chain. */
4031 HOST_WIDE_INT count;
4033 /* An SSA name representing the product of this factor and
4034 all factors appearing later in the repeated factor vector. */
4035 tree repr;
4038 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4039 typedef const struct repeat_factor_d *const_repeat_factor_t;
4042 static vec<repeat_factor> repeat_factor_vec;
4044 /* Used for sorting the repeat factor vector. Sort primarily by
4045 ascending occurrence count, secondarily by descending rank. */
4047 static int
4048 compare_repeat_factors (const void *x1, const void *x2)
4050 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4051 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4053 if (rf1->count != rf2->count)
4054 return rf1->count - rf2->count;
4056 return rf2->rank - rf1->rank;
4059 /* Look for repeated operands in OPS in the multiply tree rooted at
4060 STMT. Replace them with an optimal sequence of multiplies and powi
4061 builtin calls, and remove the used operands from OPS. Return an
4062 SSA name representing the value of the replacement sequence. */
4064 static tree
4065 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4067 unsigned i, j, vec_len;
4068 int ii;
4069 operand_entry_t oe;
4070 repeat_factor_t rf1, rf2;
4071 repeat_factor rfnew;
4072 tree result = NULL_TREE;
4073 tree target_ssa, iter_result;
4074 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4075 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4076 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4077 gimple mul_stmt, pow_stmt;
4079 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4080 target. */
4081 if (!powi_fndecl)
4082 return NULL_TREE;
4084 /* Allocate the repeated factor vector. */
4085 repeat_factor_vec.create (10);
4087 /* Scan the OPS vector for all SSA names in the product and build
4088 up a vector of occurrence counts for each factor. */
4089 FOR_EACH_VEC_ELT (*ops, i, oe)
4091 if (TREE_CODE (oe->op) == SSA_NAME)
4093 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4095 if (rf1->factor == oe->op)
4097 rf1->count += oe->count;
4098 break;
4102 if (j >= repeat_factor_vec.length ())
4104 rfnew.factor = oe->op;
4105 rfnew.rank = oe->rank;
4106 rfnew.count = oe->count;
4107 rfnew.repr = NULL_TREE;
4108 repeat_factor_vec.safe_push (rfnew);
4113 /* Sort the repeated factor vector by (a) increasing occurrence count,
4114 and (b) decreasing rank. */
4115 repeat_factor_vec.qsort (compare_repeat_factors);
4117 /* It is generally best to combine as many base factors as possible
4118 into a product before applying __builtin_powi to the result.
4119 However, the sort order chosen for the repeated factor vector
4120 allows us to cache partial results for the product of the base
4121 factors for subsequent use. When we already have a cached partial
4122 result from a previous iteration, it is best to make use of it
4123 before looking for another __builtin_pow opportunity.
4125 As an example, consider x * x * y * y * y * z * z * z * z.
4126 We want to first compose the product x * y * z, raise it to the
4127 second power, then multiply this by y * z, and finally multiply
4128 by z. This can be done in 5 multiplies provided we cache y * z
4129 for use in both expressions:
4131 t1 = y * z
4132 t2 = t1 * x
4133 t3 = t2 * t2
4134 t4 = t1 * t3
4135 result = t4 * z
4137 If we instead ignored the cached y * z and first multiplied by
4138 the __builtin_pow opportunity z * z, we would get the inferior:
4140 t1 = y * z
4141 t2 = t1 * x
4142 t3 = t2 * t2
4143 t4 = z * z
4144 t5 = t3 * t4
4145 result = t5 * y */
4147 vec_len = repeat_factor_vec.length ();
4149 /* Repeatedly look for opportunities to create a builtin_powi call. */
4150 while (true)
4152 HOST_WIDE_INT power;
4154 /* First look for the largest cached product of factors from
4155 preceding iterations. If found, create a builtin_powi for
4156 it if the minimum occurrence count for its factors is at
4157 least 2, or just use this cached product as our next
4158 multiplicand if the minimum occurrence count is 1. */
4159 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4161 if (rf1->repr && rf1->count > 0)
4162 break;
4165 if (j < vec_len)
4167 power = rf1->count;
4169 if (power == 1)
4171 iter_result = rf1->repr;
4173 if (dump_file && (dump_flags & TDF_DETAILS))
4175 unsigned elt;
4176 repeat_factor_t rf;
4177 fputs ("Multiplying by cached product ", dump_file);
4178 for (elt = j; elt < vec_len; elt++)
4180 rf = &repeat_factor_vec[elt];
4181 print_generic_expr (dump_file, rf->factor, 0);
4182 if (elt < vec_len - 1)
4183 fputs (" * ", dump_file);
4185 fputs ("\n", dump_file);
4188 else
4190 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4191 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4192 build_int_cst (integer_type_node,
4193 power));
4194 gimple_call_set_lhs (pow_stmt, iter_result);
4195 gimple_set_location (pow_stmt, gimple_location (stmt));
4196 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4198 if (dump_file && (dump_flags & TDF_DETAILS))
4200 unsigned elt;
4201 repeat_factor_t rf;
4202 fputs ("Building __builtin_pow call for cached product (",
4203 dump_file);
4204 for (elt = j; elt < vec_len; elt++)
4206 rf = &repeat_factor_vec[elt];
4207 print_generic_expr (dump_file, rf->factor, 0);
4208 if (elt < vec_len - 1)
4209 fputs (" * ", dump_file);
4211 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4212 power);
4216 else
4218 /* Otherwise, find the first factor in the repeated factor
4219 vector whose occurrence count is at least 2. If no such
4220 factor exists, there are no builtin_powi opportunities
4221 remaining. */
4222 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4224 if (rf1->count >= 2)
4225 break;
4228 if (j >= vec_len)
4229 break;
4231 power = rf1->count;
4233 if (dump_file && (dump_flags & TDF_DETAILS))
4235 unsigned elt;
4236 repeat_factor_t rf;
4237 fputs ("Building __builtin_pow call for (", dump_file);
4238 for (elt = j; elt < vec_len; elt++)
4240 rf = &repeat_factor_vec[elt];
4241 print_generic_expr (dump_file, rf->factor, 0);
4242 if (elt < vec_len - 1)
4243 fputs (" * ", dump_file);
4245 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4248 reassociate_stats.pows_created++;
4250 /* Visit each element of the vector in reverse order (so that
4251 high-occurrence elements are visited first, and within the
4252 same occurrence count, lower-ranked elements are visited
4253 first). Form a linear product of all elements in this order
4254 whose occurrencce count is at least that of element J.
4255 Record the SSA name representing the product of each element
4256 with all subsequent elements in the vector. */
4257 if (j == vec_len - 1)
4258 rf1->repr = rf1->factor;
4259 else
4261 for (ii = vec_len - 2; ii >= (int)j; ii--)
4263 tree op1, op2;
4265 rf1 = &repeat_factor_vec[ii];
4266 rf2 = &repeat_factor_vec[ii + 1];
4268 /* Init the last factor's representative to be itself. */
4269 if (!rf2->repr)
4270 rf2->repr = rf2->factor;
4272 op1 = rf1->factor;
4273 op2 = rf2->repr;
4275 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4276 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR,
4277 target_ssa,
4278 op1, op2);
4279 gimple_set_location (mul_stmt, gimple_location (stmt));
4280 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4281 rf1->repr = target_ssa;
4283 /* Don't reprocess the multiply we just introduced. */
4284 gimple_set_visited (mul_stmt, true);
4288 /* Form a call to __builtin_powi for the maximum product
4289 just formed, raised to the power obtained earlier. */
4290 rf1 = &repeat_factor_vec[j];
4291 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4292 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4293 build_int_cst (integer_type_node,
4294 power));
4295 gimple_call_set_lhs (pow_stmt, iter_result);
4296 gimple_set_location (pow_stmt, gimple_location (stmt));
4297 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4300 /* If we previously formed at least one other builtin_powi call,
4301 form the product of this one and those others. */
4302 if (result)
4304 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4305 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_result,
4306 result, iter_result);
4307 gimple_set_location (mul_stmt, gimple_location (stmt));
4308 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4309 gimple_set_visited (mul_stmt, true);
4310 result = new_result;
4312 else
4313 result = iter_result;
4315 /* Decrement the occurrence count of each element in the product
4316 by the count found above, and remove this many copies of each
4317 factor from OPS. */
4318 for (i = j; i < vec_len; i++)
4320 unsigned k = power;
4321 unsigned n;
4323 rf1 = &repeat_factor_vec[i];
4324 rf1->count -= power;
4326 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4328 if (oe->op == rf1->factor)
4330 if (oe->count <= k)
4332 ops->ordered_remove (n);
4333 k -= oe->count;
4335 if (k == 0)
4336 break;
4338 else
4340 oe->count -= k;
4341 break;
4348 /* At this point all elements in the repeated factor vector have a
4349 remaining occurrence count of 0 or 1, and those with a count of 1
4350 don't have cached representatives. Re-sort the ops vector and
4351 clean up. */
4352 ops->qsort (sort_by_operand_rank);
4353 repeat_factor_vec.release ();
4355 /* Return the final product computed herein. Note that there may
4356 still be some elements with single occurrence count left in OPS;
4357 those will be handled by the normal reassociation logic. */
4358 return result;
4361 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4363 static void
4364 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4366 tree rhs1;
4368 if (dump_file && (dump_flags & TDF_DETAILS))
4370 fprintf (dump_file, "Transforming ");
4371 print_gimple_stmt (dump_file, stmt, 0, 0);
4374 rhs1 = gimple_assign_rhs1 (stmt);
4375 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4376 update_stmt (stmt);
4377 remove_visited_stmt_chain (rhs1);
4379 if (dump_file && (dump_flags & TDF_DETAILS))
4381 fprintf (dump_file, " into ");
4382 print_gimple_stmt (dump_file, stmt, 0, 0);
4386 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4388 static void
4389 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4390 tree rhs1, tree rhs2)
4392 if (dump_file && (dump_flags & TDF_DETAILS))
4394 fprintf (dump_file, "Transforming ");
4395 print_gimple_stmt (dump_file, stmt, 0, 0);
4398 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4399 update_stmt (gsi_stmt (*gsi));
4400 remove_visited_stmt_chain (rhs1);
4402 if (dump_file && (dump_flags & TDF_DETAILS))
4404 fprintf (dump_file, " into ");
4405 print_gimple_stmt (dump_file, stmt, 0, 0);
4409 /* Reassociate expressions in basic block BB and its post-dominator as
4410 children. */
4412 static void
4413 reassociate_bb (basic_block bb)
4415 gimple_stmt_iterator gsi;
4416 basic_block son;
4417 gimple stmt = last_stmt (bb);
4419 if (stmt && !gimple_visited_p (stmt))
4420 maybe_optimize_range_tests (stmt);
4422 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4424 stmt = gsi_stmt (gsi);
4426 if (is_gimple_assign (stmt)
4427 && !stmt_could_throw_p (stmt))
4429 tree lhs, rhs1, rhs2;
4430 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4432 /* If this is not a gimple binary expression, there is
4433 nothing for us to do with it. */
4434 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4435 continue;
4437 /* If this was part of an already processed statement,
4438 we don't need to touch it again. */
4439 if (gimple_visited_p (stmt))
4441 /* This statement might have become dead because of previous
4442 reassociations. */
4443 if (has_zero_uses (gimple_get_lhs (stmt)))
4445 reassoc_remove_stmt (&gsi);
4446 release_defs (stmt);
4447 /* We might end up removing the last stmt above which
4448 places the iterator to the end of the sequence.
4449 Reset it to the last stmt in this case which might
4450 be the end of the sequence as well if we removed
4451 the last statement of the sequence. In which case
4452 we need to bail out. */
4453 if (gsi_end_p (gsi))
4455 gsi = gsi_last_bb (bb);
4456 if (gsi_end_p (gsi))
4457 break;
4460 continue;
4463 lhs = gimple_assign_lhs (stmt);
4464 rhs1 = gimple_assign_rhs1 (stmt);
4465 rhs2 = gimple_assign_rhs2 (stmt);
4467 /* For non-bit or min/max operations we can't associate
4468 all types. Verify that here. */
4469 if (rhs_code != BIT_IOR_EXPR
4470 && rhs_code != BIT_AND_EXPR
4471 && rhs_code != BIT_XOR_EXPR
4472 && rhs_code != MIN_EXPR
4473 && rhs_code != MAX_EXPR
4474 && (!can_reassociate_p (lhs)
4475 || !can_reassociate_p (rhs1)
4476 || !can_reassociate_p (rhs2)))
4477 continue;
4479 if (associative_tree_code (rhs_code))
4481 auto_vec<operand_entry_t> ops;
4482 tree powi_result = NULL_TREE;
4484 /* There may be no immediate uses left by the time we
4485 get here because we may have eliminated them all. */
4486 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4487 continue;
4489 gimple_set_visited (stmt, true);
4490 linearize_expr_tree (&ops, stmt, true, true);
4491 ops.qsort (sort_by_operand_rank);
4492 optimize_ops_list (rhs_code, &ops);
4493 if (undistribute_ops_list (rhs_code, &ops,
4494 loop_containing_stmt (stmt)))
4496 ops.qsort (sort_by_operand_rank);
4497 optimize_ops_list (rhs_code, &ops);
4500 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4501 optimize_range_tests (rhs_code, &ops);
4503 if (first_pass_instance
4504 && rhs_code == MULT_EXPR
4505 && flag_unsafe_math_optimizations)
4506 powi_result = attempt_builtin_powi (stmt, &ops);
4508 /* If the operand vector is now empty, all operands were
4509 consumed by the __builtin_powi optimization. */
4510 if (ops.length () == 0)
4511 transform_stmt_to_copy (&gsi, stmt, powi_result);
4512 else if (ops.length () == 1)
4514 tree last_op = ops.last ()->op;
4516 if (powi_result)
4517 transform_stmt_to_multiply (&gsi, stmt, last_op,
4518 powi_result);
4519 else
4520 transform_stmt_to_copy (&gsi, stmt, last_op);
4522 else
4524 enum machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4525 int ops_num = ops.length ();
4526 int width = get_reassociation_width (ops_num, rhs_code, mode);
4527 tree new_lhs = lhs;
4529 if (dump_file && (dump_flags & TDF_DETAILS))
4530 fprintf (dump_file,
4531 "Width = %d was chosen for reassociation\n", width);
4533 if (width > 1
4534 && ops.length () > 3)
4535 rewrite_expr_tree_parallel (stmt, width, ops);
4536 else
4538 /* When there are three operands left, we want
4539 to make sure the ones that get the double
4540 binary op are chosen wisely. */
4541 int len = ops.length ();
4542 if (len >= 3)
4543 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4545 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4546 powi_result != NULL);
4549 /* If we combined some repeated factors into a
4550 __builtin_powi call, multiply that result by the
4551 reassociated operands. */
4552 if (powi_result)
4554 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4555 tree type = TREE_TYPE (lhs);
4556 tree target_ssa = make_temp_ssa_name (type, NULL,
4557 "reassocpow");
4558 gimple_set_lhs (lhs_stmt, target_ssa);
4559 update_stmt (lhs_stmt);
4560 if (lhs != new_lhs)
4561 target_ssa = new_lhs;
4562 mul_stmt = gimple_build_assign_with_ops (MULT_EXPR, lhs,
4563 powi_result,
4564 target_ssa);
4565 gimple_set_location (mul_stmt, gimple_location (stmt));
4566 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4572 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4573 son;
4574 son = next_dom_son (CDI_POST_DOMINATORS, son))
4575 reassociate_bb (son);
4578 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4579 void debug_ops_vector (vec<operand_entry_t> ops);
4581 /* Dump the operand entry vector OPS to FILE. */
4583 void
4584 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4586 operand_entry_t oe;
4587 unsigned int i;
4589 FOR_EACH_VEC_ELT (ops, i, oe)
4591 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4592 print_generic_expr (file, oe->op, 0);
4596 /* Dump the operand entry vector OPS to STDERR. */
4598 DEBUG_FUNCTION void
4599 debug_ops_vector (vec<operand_entry_t> ops)
4601 dump_ops_vector (stderr, ops);
4604 static void
4605 do_reassoc (void)
4607 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4608 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4611 /* Initialize the reassociation pass. */
4613 static void
4614 init_reassoc (void)
4616 int i;
4617 long rank = 2;
4618 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4620 /* Find the loops, so that we can prevent moving calculations in
4621 them. */
4622 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
4624 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
4626 operand_entry_pool = create_alloc_pool ("operand entry pool",
4627 sizeof (struct operand_entry), 30);
4628 next_operand_entry_id = 0;
4630 /* Reverse RPO (Reverse Post Order) will give us something where
4631 deeper loops come later. */
4632 pre_and_rev_post_order_compute (NULL, bbs, false);
4633 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
4634 operand_rank = pointer_map_create ();
4636 /* Give each default definition a distinct rank. This includes
4637 parameters and the static chain. Walk backwards over all
4638 SSA names so that we get proper rank ordering according
4639 to tree_swap_operands_p. */
4640 for (i = num_ssa_names - 1; i > 0; --i)
4642 tree name = ssa_name (i);
4643 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
4644 insert_operand_rank (name, ++rank);
4647 /* Set up rank for each BB */
4648 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
4649 bb_rank[bbs[i]] = ++rank << 16;
4651 free (bbs);
4652 calculate_dominance_info (CDI_POST_DOMINATORS);
4653 plus_negates = vNULL;
4656 /* Cleanup after the reassociation pass, and print stats if
4657 requested. */
4659 static void
4660 fini_reassoc (void)
4662 statistics_counter_event (cfun, "Linearized",
4663 reassociate_stats.linearized);
4664 statistics_counter_event (cfun, "Constants eliminated",
4665 reassociate_stats.constants_eliminated);
4666 statistics_counter_event (cfun, "Ops eliminated",
4667 reassociate_stats.ops_eliminated);
4668 statistics_counter_event (cfun, "Statements rewritten",
4669 reassociate_stats.rewritten);
4670 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
4671 reassociate_stats.pows_encountered);
4672 statistics_counter_event (cfun, "Built-in powi calls created",
4673 reassociate_stats.pows_created);
4675 pointer_map_destroy (operand_rank);
4676 free_alloc_pool (operand_entry_pool);
4677 free (bb_rank);
4678 plus_negates.release ();
4679 free_dominance_info (CDI_POST_DOMINATORS);
4680 loop_optimizer_finalize ();
4683 /* Gate and execute functions for Reassociation. */
4685 static unsigned int
4686 execute_reassoc (void)
4688 init_reassoc ();
4690 do_reassoc ();
4691 repropagate_negates ();
4693 fini_reassoc ();
4694 return 0;
4697 namespace {
4699 const pass_data pass_data_reassoc =
4701 GIMPLE_PASS, /* type */
4702 "reassoc", /* name */
4703 OPTGROUP_NONE, /* optinfo_flags */
4704 true, /* has_execute */
4705 TV_TREE_REASSOC, /* tv_id */
4706 ( PROP_cfg | PROP_ssa ), /* properties_required */
4707 0, /* properties_provided */
4708 0, /* properties_destroyed */
4709 0, /* todo_flags_start */
4710 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
4713 class pass_reassoc : public gimple_opt_pass
4715 public:
4716 pass_reassoc (gcc::context *ctxt)
4717 : gimple_opt_pass (pass_data_reassoc, ctxt)
4720 /* opt_pass methods: */
4721 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
4722 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
4723 virtual unsigned int execute (function *) { return execute_reassoc (); }
4725 }; // class pass_reassoc
4727 } // anon namespace
4729 gimple_opt_pass *
4730 make_pass_reassoc (gcc::context *ctxt)
4732 return new pass_reassoc (ctxt);