libtool-version: Bump soversion.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob9952222f71512a9a42df305903f0d6c0c01a4ad5
1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "hash-set.h"
29 #include "machmode.h"
30 #include "vec.h"
31 #include "double-int.h"
32 #include "input.h"
33 #include "alias.h"
34 #include "symtab.h"
35 #include "wide-int.h"
36 #include "inchash.h"
37 #include "tree.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "function.h"
43 #include "dominance.h"
44 #include "cfg.h"
45 #include "cfganal.h"
46 #include "basic-block.h"
47 #include "gimple-pretty-print.h"
48 #include "tree-inline.h"
49 #include "hash-map.h"
50 #include "tree-ssa-alias.h"
51 #include "internal-fn.h"
52 #include "gimple-fold.h"
53 #include "tree-eh.h"
54 #include "gimple-expr.h"
55 #include "is-a.h"
56 #include "gimple.h"
57 #include "gimple-iterator.h"
58 #include "gimplify-me.h"
59 #include "gimple-ssa.h"
60 #include "tree-cfg.h"
61 #include "tree-phinodes.h"
62 #include "ssa-iterators.h"
63 #include "stringpool.h"
64 #include "tree-ssanames.h"
65 #include "tree-ssa-loop-niter.h"
66 #include "tree-ssa-loop.h"
67 #include "hashtab.h"
68 #include "flags.h"
69 #include "statistics.h"
70 #include "real.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
73 #include "expmed.h"
74 #include "dojump.h"
75 #include "explow.h"
76 #include "calls.h"
77 #include "emit-rtl.h"
78 #include "varasm.h"
79 #include "stmt.h"
80 #include "expr.h"
81 #include "tree-dfa.h"
82 #include "tree-ssa.h"
83 #include "tree-iterator.h"
84 #include "tree-pass.h"
85 #include "alloc-pool.h"
86 #include "langhooks.h"
87 #include "cfgloop.h"
88 #include "target.h"
89 #include "params.h"
90 #include "diagnostic-core.h"
91 #include "builtins.h"
92 #include "gimplify.h"
93 #include "insn-codes.h"
94 #include "optabs.h"
96 /* This is a simple global reassociation pass. It is, in part, based
97 on the LLVM pass of the same name (They do some things more/less
98 than we do, in different orders, etc).
100 It consists of five steps:
102 1. Breaking up subtract operations into addition + negate, where
103 it would promote the reassociation of adds.
105 2. Left linearization of the expression trees, so that (A+B)+(C+D)
106 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
107 During linearization, we place the operands of the binary
108 expressions into a vector of operand_entry_t
110 3. Optimization of the operand lists, eliminating things like a +
111 -a, a & a, etc.
113 3a. Combine repeated factors with the same occurrence counts
114 into a __builtin_powi call that will later be optimized into
115 an optimal number of multiplies.
117 4. Rewrite the expression trees we linearized and optimized so
118 they are in proper rank order.
120 5. Repropagate negates, as nothing else will clean it up ATM.
122 A bit of theory on #4, since nobody seems to write anything down
123 about why it makes sense to do it the way they do it:
125 We could do this much nicer theoretically, but don't (for reasons
126 explained after how to do it theoretically nice :P).
128 In order to promote the most redundancy elimination, you want
129 binary expressions whose operands are the same rank (or
130 preferably, the same value) exposed to the redundancy eliminator,
131 for possible elimination.
133 So the way to do this if we really cared, is to build the new op
134 tree from the leaves to the roots, merging as you go, and putting the
135 new op on the end of the worklist, until you are left with one
136 thing on the worklist.
138 IE if you have to rewrite the following set of operands (listed with
139 rank in parentheses), with opcode PLUS_EXPR:
141 a (1), b (1), c (1), d (2), e (2)
144 We start with our merge worklist empty, and the ops list with all of
145 those on it.
147 You want to first merge all leaves of the same rank, as much as
148 possible.
150 So first build a binary op of
152 mergetmp = a + b, and put "mergetmp" on the merge worklist.
154 Because there is no three operand form of PLUS_EXPR, c is not going to
155 be exposed to redundancy elimination as a rank 1 operand.
157 So you might as well throw it on the merge worklist (you could also
158 consider it to now be a rank two operand, and merge it with d and e,
159 but in this case, you then have evicted e from a binary op. So at
160 least in this situation, you can't win.)
162 Then build a binary op of d + e
163 mergetmp2 = d + e
165 and put mergetmp2 on the merge worklist.
167 so merge worklist = {mergetmp, c, mergetmp2}
169 Continue building binary ops of these operations until you have only
170 one operation left on the worklist.
172 So we have
174 build binary op
175 mergetmp3 = mergetmp + c
177 worklist = {mergetmp2, mergetmp3}
179 mergetmp4 = mergetmp2 + mergetmp3
181 worklist = {mergetmp4}
183 because we have one operation left, we can now just set the original
184 statement equal to the result of that operation.
186 This will at least expose a + b and d + e to redundancy elimination
187 as binary operations.
189 For extra points, you can reuse the old statements to build the
190 mergetmps, since you shouldn't run out.
192 So why don't we do this?
194 Because it's expensive, and rarely will help. Most trees we are
195 reassociating have 3 or less ops. If they have 2 ops, they already
196 will be written into a nice single binary op. If you have 3 ops, a
197 single simple check suffices to tell you whether the first two are of the
198 same rank. If so, you know to order it
200 mergetmp = op1 + op2
201 newstmt = mergetmp + op3
203 instead of
204 mergetmp = op2 + op3
205 newstmt = mergetmp + op1
207 If all three are of the same rank, you can't expose them all in a
208 single binary operator anyway, so the above is *still* the best you
209 can do.
211 Thus, this is what we do. When we have three ops left, we check to see
212 what order to put them in, and call it a day. As a nod to vector sum
213 reduction, we check if any of the ops are really a phi node that is a
214 destructive update for the associating op, and keep the destructive
215 update together for vector sum reduction recognition. */
218 /* Statistics */
219 static struct
221 int linearized;
222 int constants_eliminated;
223 int ops_eliminated;
224 int rewritten;
225 int pows_encountered;
226 int pows_created;
227 } reassociate_stats;
229 /* Operator, rank pair. */
230 typedef struct operand_entry
232 unsigned int rank;
233 int id;
234 tree op;
235 unsigned int count;
236 } *operand_entry_t;
238 static alloc_pool operand_entry_pool;
240 /* This is used to assign a unique ID to each struct operand_entry
241 so that qsort results are identical on different hosts. */
242 static int next_operand_entry_id;
244 /* Starting rank number for a given basic block, so that we can rank
245 operations using unmovable instructions in that BB based on the bb
246 depth. */
247 static long *bb_rank;
249 /* Operand->rank hashtable. */
250 static hash_map<tree, long> *operand_rank;
252 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
253 all basic blocks the CFG should be adjusted - basic blocks
254 split right after that SSA_NAME's definition statement and before
255 the only use, which must be a bit ior. */
256 static vec<tree> reassoc_branch_fixups;
258 /* Forward decls. */
259 static long get_rank (tree);
260 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
262 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
263 possibly added by gsi_remove. */
265 bool
266 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
268 gimple stmt = gsi_stmt (*gsi);
270 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
271 return gsi_remove (gsi, true);
273 gimple_stmt_iterator prev = *gsi;
274 gsi_prev (&prev);
275 unsigned uid = gimple_uid (stmt);
276 basic_block bb = gimple_bb (stmt);
277 bool ret = gsi_remove (gsi, true);
278 if (!gsi_end_p (prev))
279 gsi_next (&prev);
280 else
281 prev = gsi_start_bb (bb);
282 gimple end_stmt = gsi_stmt (*gsi);
283 while ((stmt = gsi_stmt (prev)) != end_stmt)
285 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
286 gimple_set_uid (stmt, uid);
287 gsi_next (&prev);
289 return ret;
292 /* Bias amount for loop-carried phis. We want this to be larger than
293 the depth of any reassociation tree we can see, but not larger than
294 the rank difference between two blocks. */
295 #define PHI_LOOP_BIAS (1 << 15)
297 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
298 an innermost loop, and the phi has only a single use which is inside
299 the loop, then the rank is the block rank of the loop latch plus an
300 extra bias for the loop-carried dependence. This causes expressions
301 calculated into an accumulator variable to be independent for each
302 iteration of the loop. If STMT is some other phi, the rank is the
303 block rank of its containing block. */
304 static long
305 phi_rank (gimple stmt)
307 basic_block bb = gimple_bb (stmt);
308 struct loop *father = bb->loop_father;
309 tree res;
310 unsigned i;
311 use_operand_p use;
312 gimple use_stmt;
314 /* We only care about real loops (those with a latch). */
315 if (!father->latch)
316 return bb_rank[bb->index];
318 /* Interesting phis must be in headers of innermost loops. */
319 if (bb != father->header
320 || father->inner)
321 return bb_rank[bb->index];
323 /* Ignore virtual SSA_NAMEs. */
324 res = gimple_phi_result (stmt);
325 if (virtual_operand_p (res))
326 return bb_rank[bb->index];
328 /* The phi definition must have a single use, and that use must be
329 within the loop. Otherwise this isn't an accumulator pattern. */
330 if (!single_imm_use (res, &use, &use_stmt)
331 || gimple_bb (use_stmt)->loop_father != father)
332 return bb_rank[bb->index];
334 /* Look for phi arguments from within the loop. If found, bias this phi. */
335 for (i = 0; i < gimple_phi_num_args (stmt); i++)
337 tree arg = gimple_phi_arg_def (stmt, i);
338 if (TREE_CODE (arg) == SSA_NAME
339 && !SSA_NAME_IS_DEFAULT_DEF (arg))
341 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
342 if (gimple_bb (def_stmt)->loop_father == father)
343 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
347 /* Must be an uninteresting phi. */
348 return bb_rank[bb->index];
351 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
352 loop-carried dependence of an innermost loop, return TRUE; else
353 return FALSE. */
354 static bool
355 loop_carried_phi (tree exp)
357 gimple phi_stmt;
358 long block_rank;
360 if (TREE_CODE (exp) != SSA_NAME
361 || SSA_NAME_IS_DEFAULT_DEF (exp))
362 return false;
364 phi_stmt = SSA_NAME_DEF_STMT (exp);
366 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
367 return false;
369 /* Non-loop-carried phis have block rank. Loop-carried phis have
370 an additional bias added in. If this phi doesn't have block rank,
371 it's biased and should not be propagated. */
372 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
374 if (phi_rank (phi_stmt) != block_rank)
375 return true;
377 return false;
380 /* Return the maximum of RANK and the rank that should be propagated
381 from expression OP. For most operands, this is just the rank of OP.
382 For loop-carried phis, the value is zero to avoid undoing the bias
383 in favor of the phi. */
384 static long
385 propagate_rank (long rank, tree op)
387 long op_rank;
389 if (loop_carried_phi (op))
390 return rank;
392 op_rank = get_rank (op);
394 return MAX (rank, op_rank);
397 /* Look up the operand rank structure for expression E. */
399 static inline long
400 find_operand_rank (tree e)
402 long *slot = operand_rank->get (e);
403 return slot ? *slot : -1;
406 /* Insert {E,RANK} into the operand rank hashtable. */
408 static inline void
409 insert_operand_rank (tree e, long rank)
411 gcc_assert (rank > 0);
412 gcc_assert (!operand_rank->put (e, rank));
415 /* Given an expression E, return the rank of the expression. */
417 static long
418 get_rank (tree e)
420 /* Constants have rank 0. */
421 if (is_gimple_min_invariant (e))
422 return 0;
424 /* SSA_NAME's have the rank of the expression they are the result
426 For globals and uninitialized values, the rank is 0.
427 For function arguments, use the pre-setup rank.
428 For PHI nodes, stores, asm statements, etc, we use the rank of
429 the BB.
430 For simple operations, the rank is the maximum rank of any of
431 its operands, or the bb_rank, whichever is less.
432 I make no claims that this is optimal, however, it gives good
433 results. */
435 /* We make an exception to the normal ranking system to break
436 dependences of accumulator variables in loops. Suppose we
437 have a simple one-block loop containing:
439 x_1 = phi(x_0, x_2)
440 b = a + x_1
441 c = b + d
442 x_2 = c + e
444 As shown, each iteration of the calculation into x is fully
445 dependent upon the iteration before it. We would prefer to
446 see this in the form:
448 x_1 = phi(x_0, x_2)
449 b = a + d
450 c = b + e
451 x_2 = c + x_1
453 If the loop is unrolled, the calculations of b and c from
454 different iterations can be interleaved.
456 To obtain this result during reassociation, we bias the rank
457 of the phi definition x_1 upward, when it is recognized as an
458 accumulator pattern. The artificial rank causes it to be
459 added last, providing the desired independence. */
461 if (TREE_CODE (e) == SSA_NAME)
463 gimple stmt;
464 long rank;
465 int i, n;
466 tree op;
468 if (SSA_NAME_IS_DEFAULT_DEF (e))
469 return find_operand_rank (e);
471 stmt = SSA_NAME_DEF_STMT (e);
472 if (gimple_code (stmt) == GIMPLE_PHI)
473 return phi_rank (stmt);
475 if (!is_gimple_assign (stmt)
476 || gimple_vdef (stmt))
477 return bb_rank[gimple_bb (stmt)->index];
479 /* If we already have a rank for this expression, use that. */
480 rank = find_operand_rank (e);
481 if (rank != -1)
482 return rank;
484 /* Otherwise, find the maximum rank for the operands. As an
485 exception, remove the bias from loop-carried phis when propagating
486 the rank so that dependent operations are not also biased. */
487 rank = 0;
488 if (gimple_assign_single_p (stmt))
490 tree rhs = gimple_assign_rhs1 (stmt);
491 n = TREE_OPERAND_LENGTH (rhs);
492 if (n == 0)
493 rank = propagate_rank (rank, rhs);
494 else
496 for (i = 0; i < n; i++)
498 op = TREE_OPERAND (rhs, i);
500 if (op != NULL_TREE)
501 rank = propagate_rank (rank, op);
505 else
507 n = gimple_num_ops (stmt);
508 for (i = 1; i < n; i++)
510 op = gimple_op (stmt, i);
511 gcc_assert (op);
512 rank = propagate_rank (rank, op);
516 if (dump_file && (dump_flags & TDF_DETAILS))
518 fprintf (dump_file, "Rank for ");
519 print_generic_expr (dump_file, e, 0);
520 fprintf (dump_file, " is %ld\n", (rank + 1));
523 /* Note the rank in the hashtable so we don't recompute it. */
524 insert_operand_rank (e, (rank + 1));
525 return (rank + 1);
528 /* Globals, etc, are rank 0 */
529 return 0;
533 /* We want integer ones to end up last no matter what, since they are
534 the ones we can do the most with. */
535 #define INTEGER_CONST_TYPE 1 << 3
536 #define FLOAT_CONST_TYPE 1 << 2
537 #define OTHER_CONST_TYPE 1 << 1
539 /* Classify an invariant tree into integer, float, or other, so that
540 we can sort them to be near other constants of the same type. */
541 static inline int
542 constant_type (tree t)
544 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
545 return INTEGER_CONST_TYPE;
546 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
547 return FLOAT_CONST_TYPE;
548 else
549 return OTHER_CONST_TYPE;
552 /* qsort comparison function to sort operand entries PA and PB by rank
553 so that the sorted array is ordered by rank in decreasing order. */
554 static int
555 sort_by_operand_rank (const void *pa, const void *pb)
557 const operand_entry_t oea = *(const operand_entry_t *)pa;
558 const operand_entry_t oeb = *(const operand_entry_t *)pb;
560 /* It's nicer for optimize_expression if constants that are likely
561 to fold when added/multiplied//whatever are put next to each
562 other. Since all constants have rank 0, order them by type. */
563 if (oeb->rank == 0 && oea->rank == 0)
565 if (constant_type (oeb->op) != constant_type (oea->op))
566 return constant_type (oeb->op) - constant_type (oea->op);
567 else
568 /* To make sorting result stable, we use unique IDs to determine
569 order. */
570 return oeb->id - oea->id;
573 /* Lastly, make sure the versions that are the same go next to each
574 other. */
575 if ((oeb->rank - oea->rank == 0)
576 && TREE_CODE (oea->op) == SSA_NAME
577 && TREE_CODE (oeb->op) == SSA_NAME)
579 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
580 versions of removed SSA_NAMEs, so if possible, prefer to sort
581 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
582 See PR60418. */
583 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
584 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
585 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
587 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
588 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
589 basic_block bba = gimple_bb (stmta);
590 basic_block bbb = gimple_bb (stmtb);
591 if (bbb != bba)
593 if (bb_rank[bbb->index] != bb_rank[bba->index])
594 return bb_rank[bbb->index] - bb_rank[bba->index];
596 else
598 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
599 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
600 if (da != db)
601 return da ? 1 : -1;
605 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
606 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
607 else
608 return oeb->id - oea->id;
611 if (oeb->rank != oea->rank)
612 return oeb->rank - oea->rank;
613 else
614 return oeb->id - oea->id;
617 /* Add an operand entry to *OPS for the tree operand OP. */
619 static void
620 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
622 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
624 oe->op = op;
625 oe->rank = get_rank (op);
626 oe->id = next_operand_entry_id++;
627 oe->count = 1;
628 ops->safe_push (oe);
631 /* Add an operand entry to *OPS for the tree operand OP with repeat
632 count REPEAT. */
634 static void
635 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
636 HOST_WIDE_INT repeat)
638 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
640 oe->op = op;
641 oe->rank = get_rank (op);
642 oe->id = next_operand_entry_id++;
643 oe->count = repeat;
644 ops->safe_push (oe);
646 reassociate_stats.pows_encountered++;
649 /* Return true if STMT is reassociable operation containing a binary
650 operation with tree code CODE, and is inside LOOP. */
652 static bool
653 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
655 basic_block bb = gimple_bb (stmt);
657 if (gimple_bb (stmt) == NULL)
658 return false;
660 if (!flow_bb_inside_loop_p (loop, bb))
661 return false;
663 if (is_gimple_assign (stmt)
664 && gimple_assign_rhs_code (stmt) == code
665 && has_single_use (gimple_assign_lhs (stmt)))
666 return true;
668 return false;
672 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
673 operand of the negate operation. Otherwise, return NULL. */
675 static tree
676 get_unary_op (tree name, enum tree_code opcode)
678 gimple stmt = SSA_NAME_DEF_STMT (name);
680 if (!is_gimple_assign (stmt))
681 return NULL_TREE;
683 if (gimple_assign_rhs_code (stmt) == opcode)
684 return gimple_assign_rhs1 (stmt);
685 return NULL_TREE;
688 /* If CURR and LAST are a pair of ops that OPCODE allows us to
689 eliminate through equivalences, do so, remove them from OPS, and
690 return true. Otherwise, return false. */
692 static bool
693 eliminate_duplicate_pair (enum tree_code opcode,
694 vec<operand_entry_t> *ops,
695 bool *all_done,
696 unsigned int i,
697 operand_entry_t curr,
698 operand_entry_t last)
701 /* If we have two of the same op, and the opcode is & |, min, or max,
702 we can eliminate one of them.
703 If we have two of the same op, and the opcode is ^, we can
704 eliminate both of them. */
706 if (last && last->op == curr->op)
708 switch (opcode)
710 case MAX_EXPR:
711 case MIN_EXPR:
712 case BIT_IOR_EXPR:
713 case BIT_AND_EXPR:
714 if (dump_file && (dump_flags & TDF_DETAILS))
716 fprintf (dump_file, "Equivalence: ");
717 print_generic_expr (dump_file, curr->op, 0);
718 fprintf (dump_file, " [&|minmax] ");
719 print_generic_expr (dump_file, last->op, 0);
720 fprintf (dump_file, " -> ");
721 print_generic_stmt (dump_file, last->op, 0);
724 ops->ordered_remove (i);
725 reassociate_stats.ops_eliminated ++;
727 return true;
729 case BIT_XOR_EXPR:
730 if (dump_file && (dump_flags & TDF_DETAILS))
732 fprintf (dump_file, "Equivalence: ");
733 print_generic_expr (dump_file, curr->op, 0);
734 fprintf (dump_file, " ^ ");
735 print_generic_expr (dump_file, last->op, 0);
736 fprintf (dump_file, " -> nothing\n");
739 reassociate_stats.ops_eliminated += 2;
741 if (ops->length () == 2)
743 ops->create (0);
744 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
745 *all_done = true;
747 else
749 ops->ordered_remove (i-1);
750 ops->ordered_remove (i-1);
753 return true;
755 default:
756 break;
759 return false;
762 static vec<tree> plus_negates;
764 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
765 expression, look in OPS for a corresponding positive operation to cancel
766 it out. If we find one, remove the other from OPS, replace
767 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
768 return false. */
770 static bool
771 eliminate_plus_minus_pair (enum tree_code opcode,
772 vec<operand_entry_t> *ops,
773 unsigned int currindex,
774 operand_entry_t curr)
776 tree negateop;
777 tree notop;
778 unsigned int i;
779 operand_entry_t oe;
781 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
782 return false;
784 negateop = get_unary_op (curr->op, NEGATE_EXPR);
785 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
786 if (negateop == NULL_TREE && notop == NULL_TREE)
787 return false;
789 /* Any non-negated version will have a rank that is one less than
790 the current rank. So once we hit those ranks, if we don't find
791 one, we can stop. */
793 for (i = currindex + 1;
794 ops->iterate (i, &oe)
795 && oe->rank >= curr->rank - 1 ;
796 i++)
798 if (oe->op == negateop)
801 if (dump_file && (dump_flags & TDF_DETAILS))
803 fprintf (dump_file, "Equivalence: ");
804 print_generic_expr (dump_file, negateop, 0);
805 fprintf (dump_file, " + -");
806 print_generic_expr (dump_file, oe->op, 0);
807 fprintf (dump_file, " -> 0\n");
810 ops->ordered_remove (i);
811 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
812 ops->ordered_remove (currindex);
813 reassociate_stats.ops_eliminated ++;
815 return true;
817 else if (oe->op == notop)
819 tree op_type = TREE_TYPE (oe->op);
821 if (dump_file && (dump_flags & TDF_DETAILS))
823 fprintf (dump_file, "Equivalence: ");
824 print_generic_expr (dump_file, notop, 0);
825 fprintf (dump_file, " + ~");
826 print_generic_expr (dump_file, oe->op, 0);
827 fprintf (dump_file, " -> -1\n");
830 ops->ordered_remove (i);
831 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
832 ops->ordered_remove (currindex);
833 reassociate_stats.ops_eliminated ++;
835 return true;
839 /* CURR->OP is a negate expr in a plus expr: save it for later
840 inspection in repropagate_negates(). */
841 if (negateop != NULL_TREE)
842 plus_negates.safe_push (curr->op);
844 return false;
847 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
848 bitwise not expression, look in OPS for a corresponding operand to
849 cancel it out. If we find one, remove the other from OPS, replace
850 OPS[CURRINDEX] with 0, and return true. Otherwise, return
851 false. */
853 static bool
854 eliminate_not_pairs (enum tree_code opcode,
855 vec<operand_entry_t> *ops,
856 unsigned int currindex,
857 operand_entry_t curr)
859 tree notop;
860 unsigned int i;
861 operand_entry_t oe;
863 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
864 || TREE_CODE (curr->op) != SSA_NAME)
865 return false;
867 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
868 if (notop == NULL_TREE)
869 return false;
871 /* Any non-not version will have a rank that is one less than
872 the current rank. So once we hit those ranks, if we don't find
873 one, we can stop. */
875 for (i = currindex + 1;
876 ops->iterate (i, &oe)
877 && oe->rank >= curr->rank - 1;
878 i++)
880 if (oe->op == notop)
882 if (dump_file && (dump_flags & TDF_DETAILS))
884 fprintf (dump_file, "Equivalence: ");
885 print_generic_expr (dump_file, notop, 0);
886 if (opcode == BIT_AND_EXPR)
887 fprintf (dump_file, " & ~");
888 else if (opcode == BIT_IOR_EXPR)
889 fprintf (dump_file, " | ~");
890 print_generic_expr (dump_file, oe->op, 0);
891 if (opcode == BIT_AND_EXPR)
892 fprintf (dump_file, " -> 0\n");
893 else if (opcode == BIT_IOR_EXPR)
894 fprintf (dump_file, " -> -1\n");
897 if (opcode == BIT_AND_EXPR)
898 oe->op = build_zero_cst (TREE_TYPE (oe->op));
899 else if (opcode == BIT_IOR_EXPR)
900 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
902 reassociate_stats.ops_eliminated += ops->length () - 1;
903 ops->truncate (0);
904 ops->quick_push (oe);
905 return true;
909 return false;
912 /* Use constant value that may be present in OPS to try to eliminate
913 operands. Note that this function is only really used when we've
914 eliminated ops for other reasons, or merged constants. Across
915 single statements, fold already does all of this, plus more. There
916 is little point in duplicating logic, so I've only included the
917 identities that I could ever construct testcases to trigger. */
919 static void
920 eliminate_using_constants (enum tree_code opcode,
921 vec<operand_entry_t> *ops)
923 operand_entry_t oelast = ops->last ();
924 tree type = TREE_TYPE (oelast->op);
926 if (oelast->rank == 0
927 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
929 switch (opcode)
931 case BIT_AND_EXPR:
932 if (integer_zerop (oelast->op))
934 if (ops->length () != 1)
936 if (dump_file && (dump_flags & TDF_DETAILS))
937 fprintf (dump_file, "Found & 0, removing all other ops\n");
939 reassociate_stats.ops_eliminated += ops->length () - 1;
941 ops->truncate (0);
942 ops->quick_push (oelast);
943 return;
946 else if (integer_all_onesp (oelast->op))
948 if (ops->length () != 1)
950 if (dump_file && (dump_flags & TDF_DETAILS))
951 fprintf (dump_file, "Found & -1, removing\n");
952 ops->pop ();
953 reassociate_stats.ops_eliminated++;
956 break;
957 case BIT_IOR_EXPR:
958 if (integer_all_onesp (oelast->op))
960 if (ops->length () != 1)
962 if (dump_file && (dump_flags & TDF_DETAILS))
963 fprintf (dump_file, "Found | -1, removing all other ops\n");
965 reassociate_stats.ops_eliminated += ops->length () - 1;
967 ops->truncate (0);
968 ops->quick_push (oelast);
969 return;
972 else if (integer_zerop (oelast->op))
974 if (ops->length () != 1)
976 if (dump_file && (dump_flags & TDF_DETAILS))
977 fprintf (dump_file, "Found | 0, removing\n");
978 ops->pop ();
979 reassociate_stats.ops_eliminated++;
982 break;
983 case MULT_EXPR:
984 if (integer_zerop (oelast->op)
985 || (FLOAT_TYPE_P (type)
986 && !HONOR_NANS (type)
987 && !HONOR_SIGNED_ZEROS (type)
988 && real_zerop (oelast->op)))
990 if (ops->length () != 1)
992 if (dump_file && (dump_flags & TDF_DETAILS))
993 fprintf (dump_file, "Found * 0, removing all other ops\n");
995 reassociate_stats.ops_eliminated += ops->length () - 1;
996 ops->truncate (1);
997 ops->quick_push (oelast);
998 return;
1001 else if (integer_onep (oelast->op)
1002 || (FLOAT_TYPE_P (type)
1003 && !HONOR_SNANS (type)
1004 && real_onep (oelast->op)))
1006 if (ops->length () != 1)
1008 if (dump_file && (dump_flags & TDF_DETAILS))
1009 fprintf (dump_file, "Found * 1, removing\n");
1010 ops->pop ();
1011 reassociate_stats.ops_eliminated++;
1012 return;
1015 break;
1016 case BIT_XOR_EXPR:
1017 case PLUS_EXPR:
1018 case MINUS_EXPR:
1019 if (integer_zerop (oelast->op)
1020 || (FLOAT_TYPE_P (type)
1021 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1022 && fold_real_zero_addition_p (type, oelast->op,
1023 opcode == MINUS_EXPR)))
1025 if (ops->length () != 1)
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1028 fprintf (dump_file, "Found [|^+] 0, removing\n");
1029 ops->pop ();
1030 reassociate_stats.ops_eliminated++;
1031 return;
1034 break;
1035 default:
1036 break;
1042 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1043 bool, bool);
1045 /* Structure for tracking and counting operands. */
1046 typedef struct oecount_s {
1047 int cnt;
1048 int id;
1049 enum tree_code oecode;
1050 tree op;
1051 } oecount;
1054 /* The heap for the oecount hashtable and the sorted list of operands. */
1055 static vec<oecount> cvec;
1058 /* Oecount hashtable helpers. */
1060 struct oecount_hasher
1062 typedef int value_type;
1063 typedef int compare_type;
1064 typedef int store_values_directly;
1065 static inline hashval_t hash (const value_type &);
1066 static inline bool equal (const value_type &, const compare_type &);
1067 static bool is_deleted (int &v) { return v == 1; }
1068 static void mark_deleted (int &e) { e = 1; }
1069 static bool is_empty (int &v) { return v == 0; }
1070 static void mark_empty (int &e) { e = 0; }
1071 static void remove (int &) {}
1074 /* Hash function for oecount. */
1076 inline hashval_t
1077 oecount_hasher::hash (const value_type &p)
1079 const oecount *c = &cvec[p - 42];
1080 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1083 /* Comparison function for oecount. */
1085 inline bool
1086 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1088 const oecount *c1 = &cvec[p1 - 42];
1089 const oecount *c2 = &cvec[p2 - 42];
1090 return (c1->oecode == c2->oecode
1091 && c1->op == c2->op);
1094 /* Comparison function for qsort sorting oecount elements by count. */
1096 static int
1097 oecount_cmp (const void *p1, const void *p2)
1099 const oecount *c1 = (const oecount *)p1;
1100 const oecount *c2 = (const oecount *)p2;
1101 if (c1->cnt != c2->cnt)
1102 return c1->cnt - c2->cnt;
1103 else
1104 /* If counts are identical, use unique IDs to stabilize qsort. */
1105 return c1->id - c2->id;
1108 /* Return TRUE iff STMT represents a builtin call that raises OP
1109 to some exponent. */
1111 static bool
1112 stmt_is_power_of_op (gimple stmt, tree op)
1114 tree fndecl;
1116 if (!is_gimple_call (stmt))
1117 return false;
1119 fndecl = gimple_call_fndecl (stmt);
1121 if (!fndecl
1122 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1123 return false;
1125 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1127 CASE_FLT_FN (BUILT_IN_POW):
1128 CASE_FLT_FN (BUILT_IN_POWI):
1129 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1131 default:
1132 return false;
1136 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1137 in place and return the result. Assumes that stmt_is_power_of_op
1138 was previously called for STMT and returned TRUE. */
1140 static HOST_WIDE_INT
1141 decrement_power (gimple stmt)
1143 REAL_VALUE_TYPE c, cint;
1144 HOST_WIDE_INT power;
1145 tree arg1;
1147 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1149 CASE_FLT_FN (BUILT_IN_POW):
1150 arg1 = gimple_call_arg (stmt, 1);
1151 c = TREE_REAL_CST (arg1);
1152 power = real_to_integer (&c) - 1;
1153 real_from_integer (&cint, VOIDmode, power, SIGNED);
1154 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1155 return power;
1157 CASE_FLT_FN (BUILT_IN_POWI):
1158 arg1 = gimple_call_arg (stmt, 1);
1159 power = TREE_INT_CST_LOW (arg1) - 1;
1160 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1161 return power;
1163 default:
1164 gcc_unreachable ();
1168 /* Find the single immediate use of STMT's LHS, and replace it
1169 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1170 replace *DEF with OP as well. */
1172 static void
1173 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1175 tree lhs;
1176 gimple use_stmt;
1177 use_operand_p use;
1178 gimple_stmt_iterator gsi;
1180 if (is_gimple_call (stmt))
1181 lhs = gimple_call_lhs (stmt);
1182 else
1183 lhs = gimple_assign_lhs (stmt);
1185 gcc_assert (has_single_use (lhs));
1186 single_imm_use (lhs, &use, &use_stmt);
1187 if (lhs == *def)
1188 *def = op;
1189 SET_USE (use, op);
1190 if (TREE_CODE (op) != SSA_NAME)
1191 update_stmt (use_stmt);
1192 gsi = gsi_for_stmt (stmt);
1193 unlink_stmt_vdef (stmt);
1194 reassoc_remove_stmt (&gsi);
1195 release_defs (stmt);
1198 /* Walks the linear chain with result *DEF searching for an operation
1199 with operand OP and code OPCODE removing that from the chain. *DEF
1200 is updated if there is only one operand but no operation left. */
1202 static void
1203 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1205 gimple stmt = SSA_NAME_DEF_STMT (*def);
1209 tree name;
1211 if (opcode == MULT_EXPR
1212 && stmt_is_power_of_op (stmt, op))
1214 if (decrement_power (stmt) == 1)
1215 propagate_op_to_single_use (op, stmt, def);
1216 return;
1219 name = gimple_assign_rhs1 (stmt);
1221 /* If this is the operation we look for and one of the operands
1222 is ours simply propagate the other operand into the stmts
1223 single use. */
1224 if (gimple_assign_rhs_code (stmt) == opcode
1225 && (name == op
1226 || gimple_assign_rhs2 (stmt) == op))
1228 if (name == op)
1229 name = gimple_assign_rhs2 (stmt);
1230 propagate_op_to_single_use (name, stmt, def);
1231 return;
1234 /* We might have a multiply of two __builtin_pow* calls, and
1235 the operand might be hiding in the rightmost one. */
1236 if (opcode == MULT_EXPR
1237 && gimple_assign_rhs_code (stmt) == opcode
1238 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1239 && has_single_use (gimple_assign_rhs2 (stmt)))
1241 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1242 if (stmt_is_power_of_op (stmt2, op))
1244 if (decrement_power (stmt2) == 1)
1245 propagate_op_to_single_use (op, stmt2, def);
1246 return;
1250 /* Continue walking the chain. */
1251 gcc_assert (name != op
1252 && TREE_CODE (name) == SSA_NAME);
1253 stmt = SSA_NAME_DEF_STMT (name);
1255 while (1);
1258 /* Returns true if statement S1 dominates statement S2. Like
1259 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1261 static bool
1262 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1264 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1266 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1267 SSA_NAME. Assume it lives at the beginning of function and
1268 thus dominates everything. */
1269 if (!bb1 || s1 == s2)
1270 return true;
1272 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1273 if (!bb2)
1274 return false;
1276 if (bb1 == bb2)
1278 /* PHIs in the same basic block are assumed to be
1279 executed all in parallel, if only one stmt is a PHI,
1280 it dominates the other stmt in the same basic block. */
1281 if (gimple_code (s1) == GIMPLE_PHI)
1282 return true;
1284 if (gimple_code (s2) == GIMPLE_PHI)
1285 return false;
1287 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1289 if (gimple_uid (s1) < gimple_uid (s2))
1290 return true;
1292 if (gimple_uid (s1) > gimple_uid (s2))
1293 return false;
1295 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1296 unsigned int uid = gimple_uid (s1);
1297 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1299 gimple s = gsi_stmt (gsi);
1300 if (gimple_uid (s) != uid)
1301 break;
1302 if (s == s2)
1303 return true;
1306 return false;
1309 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1312 /* Insert STMT after INSERT_POINT. */
1314 static void
1315 insert_stmt_after (gimple stmt, gimple insert_point)
1317 gimple_stmt_iterator gsi;
1318 basic_block bb;
1320 if (gimple_code (insert_point) == GIMPLE_PHI)
1321 bb = gimple_bb (insert_point);
1322 else if (!stmt_ends_bb_p (insert_point))
1324 gsi = gsi_for_stmt (insert_point);
1325 gimple_set_uid (stmt, gimple_uid (insert_point));
1326 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1327 return;
1329 else
1330 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1331 thus if it must end a basic block, it should be a call that can
1332 throw, or some assignment that can throw. If it throws, the LHS
1333 of it will not be initialized though, so only valid places using
1334 the SSA_NAME should be dominated by the fallthru edge. */
1335 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1336 gsi = gsi_after_labels (bb);
1337 if (gsi_end_p (gsi))
1339 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1340 gimple_set_uid (stmt,
1341 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1343 else
1344 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1345 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1348 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1349 the result. Places the statement after the definition of either
1350 OP1 or OP2. Returns the new statement. */
1352 static gimple
1353 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1355 gimple op1def = NULL, op2def = NULL;
1356 gimple_stmt_iterator gsi;
1357 tree op;
1358 gassign *sum;
1360 /* Create the addition statement. */
1361 op = make_ssa_name (type);
1362 sum = gimple_build_assign (op, opcode, op1, op2);
1364 /* Find an insertion place and insert. */
1365 if (TREE_CODE (op1) == SSA_NAME)
1366 op1def = SSA_NAME_DEF_STMT (op1);
1367 if (TREE_CODE (op2) == SSA_NAME)
1368 op2def = SSA_NAME_DEF_STMT (op2);
1369 if ((!op1def || gimple_nop_p (op1def))
1370 && (!op2def || gimple_nop_p (op2def)))
1372 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1373 if (gsi_end_p (gsi))
1375 gimple_stmt_iterator gsi2
1376 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1377 gimple_set_uid (sum,
1378 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1380 else
1381 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1382 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1384 else
1386 gimple insert_point;
1387 if ((!op1def || gimple_nop_p (op1def))
1388 || (op2def && !gimple_nop_p (op2def)
1389 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1390 insert_point = op2def;
1391 else
1392 insert_point = op1def;
1393 insert_stmt_after (sum, insert_point);
1395 update_stmt (sum);
1397 return sum;
1400 /* Perform un-distribution of divisions and multiplications.
1401 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1402 to (A + B) / X for real X.
1404 The algorithm is organized as follows.
1406 - First we walk the addition chain *OPS looking for summands that
1407 are defined by a multiplication or a real division. This results
1408 in the candidates bitmap with relevant indices into *OPS.
1410 - Second we build the chains of multiplications or divisions for
1411 these candidates, counting the number of occurrences of (operand, code)
1412 pairs in all of the candidates chains.
1414 - Third we sort the (operand, code) pairs by number of occurrence and
1415 process them starting with the pair with the most uses.
1417 * For each such pair we walk the candidates again to build a
1418 second candidate bitmap noting all multiplication/division chains
1419 that have at least one occurrence of (operand, code).
1421 * We build an alternate addition chain only covering these
1422 candidates with one (operand, code) operation removed from their
1423 multiplication/division chain.
1425 * The first candidate gets replaced by the alternate addition chain
1426 multiplied/divided by the operand.
1428 * All candidate chains get disabled for further processing and
1429 processing of (operand, code) pairs continues.
1431 The alternate addition chains built are re-processed by the main
1432 reassociation algorithm which allows optimizing a * x * y + b * y * x
1433 to (a + b ) * x * y in one invocation of the reassociation pass. */
1435 static bool
1436 undistribute_ops_list (enum tree_code opcode,
1437 vec<operand_entry_t> *ops, struct loop *loop)
1439 unsigned int length = ops->length ();
1440 operand_entry_t oe1;
1441 unsigned i, j;
1442 sbitmap candidates, candidates2;
1443 unsigned nr_candidates, nr_candidates2;
1444 sbitmap_iterator sbi0;
1445 vec<operand_entry_t> *subops;
1446 bool changed = false;
1447 int next_oecount_id = 0;
1449 if (length <= 1
1450 || opcode != PLUS_EXPR)
1451 return false;
1453 /* Build a list of candidates to process. */
1454 candidates = sbitmap_alloc (length);
1455 bitmap_clear (candidates);
1456 nr_candidates = 0;
1457 FOR_EACH_VEC_ELT (*ops, i, oe1)
1459 enum tree_code dcode;
1460 gimple oe1def;
1462 if (TREE_CODE (oe1->op) != SSA_NAME)
1463 continue;
1464 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1465 if (!is_gimple_assign (oe1def))
1466 continue;
1467 dcode = gimple_assign_rhs_code (oe1def);
1468 if ((dcode != MULT_EXPR
1469 && dcode != RDIV_EXPR)
1470 || !is_reassociable_op (oe1def, dcode, loop))
1471 continue;
1473 bitmap_set_bit (candidates, i);
1474 nr_candidates++;
1477 if (nr_candidates < 2)
1479 sbitmap_free (candidates);
1480 return false;
1483 if (dump_file && (dump_flags & TDF_DETAILS))
1485 fprintf (dump_file, "searching for un-distribute opportunities ");
1486 print_generic_expr (dump_file,
1487 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1488 fprintf (dump_file, " %d\n", nr_candidates);
1491 /* Build linearized sub-operand lists and the counting table. */
1492 cvec.create (0);
1494 hash_table<oecount_hasher> ctable (15);
1496 /* ??? Macro arguments cannot have multi-argument template types in
1497 them. This typedef is needed to workaround that limitation. */
1498 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1499 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1500 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1502 gimple oedef;
1503 enum tree_code oecode;
1504 unsigned j;
1506 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1507 oecode = gimple_assign_rhs_code (oedef);
1508 linearize_expr_tree (&subops[i], oedef,
1509 associative_tree_code (oecode), false);
1511 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1513 oecount c;
1514 int *slot;
1515 int idx;
1516 c.oecode = oecode;
1517 c.cnt = 1;
1518 c.id = next_oecount_id++;
1519 c.op = oe1->op;
1520 cvec.safe_push (c);
1521 idx = cvec.length () + 41;
1522 slot = ctable.find_slot (idx, INSERT);
1523 if (!*slot)
1525 *slot = idx;
1527 else
1529 cvec.pop ();
1530 cvec[*slot - 42].cnt++;
1535 /* Sort the counting table. */
1536 cvec.qsort (oecount_cmp);
1538 if (dump_file && (dump_flags & TDF_DETAILS))
1540 oecount *c;
1541 fprintf (dump_file, "Candidates:\n");
1542 FOR_EACH_VEC_ELT (cvec, j, c)
1544 fprintf (dump_file, " %u %s: ", c->cnt,
1545 c->oecode == MULT_EXPR
1546 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1547 print_generic_expr (dump_file, c->op, 0);
1548 fprintf (dump_file, "\n");
1552 /* Process the (operand, code) pairs in order of most occurrence. */
1553 candidates2 = sbitmap_alloc (length);
1554 while (!cvec.is_empty ())
1556 oecount *c = &cvec.last ();
1557 if (c->cnt < 2)
1558 break;
1560 /* Now collect the operands in the outer chain that contain
1561 the common operand in their inner chain. */
1562 bitmap_clear (candidates2);
1563 nr_candidates2 = 0;
1564 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1566 gimple oedef;
1567 enum tree_code oecode;
1568 unsigned j;
1569 tree op = (*ops)[i]->op;
1571 /* If we undistributed in this chain already this may be
1572 a constant. */
1573 if (TREE_CODE (op) != SSA_NAME)
1574 continue;
1576 oedef = SSA_NAME_DEF_STMT (op);
1577 oecode = gimple_assign_rhs_code (oedef);
1578 if (oecode != c->oecode)
1579 continue;
1581 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1583 if (oe1->op == c->op)
1585 bitmap_set_bit (candidates2, i);
1586 ++nr_candidates2;
1587 break;
1592 if (nr_candidates2 >= 2)
1594 operand_entry_t oe1, oe2;
1595 gimple prod;
1596 int first = bitmap_first_set_bit (candidates2);
1598 /* Build the new addition chain. */
1599 oe1 = (*ops)[first];
1600 if (dump_file && (dump_flags & TDF_DETAILS))
1602 fprintf (dump_file, "Building (");
1603 print_generic_expr (dump_file, oe1->op, 0);
1605 zero_one_operation (&oe1->op, c->oecode, c->op);
1606 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1608 gimple sum;
1609 oe2 = (*ops)[i];
1610 if (dump_file && (dump_flags & TDF_DETAILS))
1612 fprintf (dump_file, " + ");
1613 print_generic_expr (dump_file, oe2->op, 0);
1615 zero_one_operation (&oe2->op, c->oecode, c->op);
1616 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1617 oe1->op, oe2->op, opcode);
1618 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1619 oe2->rank = 0;
1620 oe1->op = gimple_get_lhs (sum);
1623 /* Apply the multiplication/division. */
1624 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1625 oe1->op, c->op, c->oecode);
1626 if (dump_file && (dump_flags & TDF_DETAILS))
1628 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1629 print_generic_expr (dump_file, c->op, 0);
1630 fprintf (dump_file, "\n");
1633 /* Record it in the addition chain and disable further
1634 undistribution with this op. */
1635 oe1->op = gimple_assign_lhs (prod);
1636 oe1->rank = get_rank (oe1->op);
1637 subops[first].release ();
1639 changed = true;
1642 cvec.pop ();
1645 for (i = 0; i < ops->length (); ++i)
1646 subops[i].release ();
1647 free (subops);
1648 cvec.release ();
1649 sbitmap_free (candidates);
1650 sbitmap_free (candidates2);
1652 return changed;
1655 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1656 expression, examine the other OPS to see if any of them are comparisons
1657 of the same values, which we may be able to combine or eliminate.
1658 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1660 static bool
1661 eliminate_redundant_comparison (enum tree_code opcode,
1662 vec<operand_entry_t> *ops,
1663 unsigned int currindex,
1664 operand_entry_t curr)
1666 tree op1, op2;
1667 enum tree_code lcode, rcode;
1668 gimple def1, def2;
1669 int i;
1670 operand_entry_t oe;
1672 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1673 return false;
1675 /* Check that CURR is a comparison. */
1676 if (TREE_CODE (curr->op) != SSA_NAME)
1677 return false;
1678 def1 = SSA_NAME_DEF_STMT (curr->op);
1679 if (!is_gimple_assign (def1))
1680 return false;
1681 lcode = gimple_assign_rhs_code (def1);
1682 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1683 return false;
1684 op1 = gimple_assign_rhs1 (def1);
1685 op2 = gimple_assign_rhs2 (def1);
1687 /* Now look for a similar comparison in the remaining OPS. */
1688 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1690 tree t;
1692 if (TREE_CODE (oe->op) != SSA_NAME)
1693 continue;
1694 def2 = SSA_NAME_DEF_STMT (oe->op);
1695 if (!is_gimple_assign (def2))
1696 continue;
1697 rcode = gimple_assign_rhs_code (def2);
1698 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1699 continue;
1701 /* If we got here, we have a match. See if we can combine the
1702 two comparisons. */
1703 if (opcode == BIT_IOR_EXPR)
1704 t = maybe_fold_or_comparisons (lcode, op1, op2,
1705 rcode, gimple_assign_rhs1 (def2),
1706 gimple_assign_rhs2 (def2));
1707 else
1708 t = maybe_fold_and_comparisons (lcode, op1, op2,
1709 rcode, gimple_assign_rhs1 (def2),
1710 gimple_assign_rhs2 (def2));
1711 if (!t)
1712 continue;
1714 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1715 always give us a boolean_type_node value back. If the original
1716 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1717 we need to convert. */
1718 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1719 t = fold_convert (TREE_TYPE (curr->op), t);
1721 if (TREE_CODE (t) != INTEGER_CST
1722 && !operand_equal_p (t, curr->op, 0))
1724 enum tree_code subcode;
1725 tree newop1, newop2;
1726 if (!COMPARISON_CLASS_P (t))
1727 continue;
1728 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1729 STRIP_USELESS_TYPE_CONVERSION (newop1);
1730 STRIP_USELESS_TYPE_CONVERSION (newop2);
1731 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1732 continue;
1735 if (dump_file && (dump_flags & TDF_DETAILS))
1737 fprintf (dump_file, "Equivalence: ");
1738 print_generic_expr (dump_file, curr->op, 0);
1739 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1740 print_generic_expr (dump_file, oe->op, 0);
1741 fprintf (dump_file, " -> ");
1742 print_generic_expr (dump_file, t, 0);
1743 fprintf (dump_file, "\n");
1746 /* Now we can delete oe, as it has been subsumed by the new combined
1747 expression t. */
1748 ops->ordered_remove (i);
1749 reassociate_stats.ops_eliminated ++;
1751 /* If t is the same as curr->op, we're done. Otherwise we must
1752 replace curr->op with t. Special case is if we got a constant
1753 back, in which case we add it to the end instead of in place of
1754 the current entry. */
1755 if (TREE_CODE (t) == INTEGER_CST)
1757 ops->ordered_remove (currindex);
1758 add_to_ops_vec (ops, t);
1760 else if (!operand_equal_p (t, curr->op, 0))
1762 gimple sum;
1763 enum tree_code subcode;
1764 tree newop1;
1765 tree newop2;
1766 gcc_assert (COMPARISON_CLASS_P (t));
1767 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1768 STRIP_USELESS_TYPE_CONVERSION (newop1);
1769 STRIP_USELESS_TYPE_CONVERSION (newop2);
1770 gcc_checking_assert (is_gimple_val (newop1)
1771 && is_gimple_val (newop2));
1772 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1773 curr->op = gimple_get_lhs (sum);
1775 return true;
1778 return false;
1781 /* Perform various identities and other optimizations on the list of
1782 operand entries, stored in OPS. The tree code for the binary
1783 operation between all the operands is OPCODE. */
1785 static void
1786 optimize_ops_list (enum tree_code opcode,
1787 vec<operand_entry_t> *ops)
1789 unsigned int length = ops->length ();
1790 unsigned int i;
1791 operand_entry_t oe;
1792 operand_entry_t oelast = NULL;
1793 bool iterate = false;
1795 if (length == 1)
1796 return;
1798 oelast = ops->last ();
1800 /* If the last two are constants, pop the constants off, merge them
1801 and try the next two. */
1802 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1804 operand_entry_t oelm1 = (*ops)[length - 2];
1806 if (oelm1->rank == 0
1807 && is_gimple_min_invariant (oelm1->op)
1808 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1809 TREE_TYPE (oelast->op)))
1811 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1812 oelm1->op, oelast->op);
1814 if (folded && is_gimple_min_invariant (folded))
1816 if (dump_file && (dump_flags & TDF_DETAILS))
1817 fprintf (dump_file, "Merging constants\n");
1819 ops->pop ();
1820 ops->pop ();
1822 add_to_ops_vec (ops, folded);
1823 reassociate_stats.constants_eliminated++;
1825 optimize_ops_list (opcode, ops);
1826 return;
1831 eliminate_using_constants (opcode, ops);
1832 oelast = NULL;
1834 for (i = 0; ops->iterate (i, &oe);)
1836 bool done = false;
1838 if (eliminate_not_pairs (opcode, ops, i, oe))
1839 return;
1840 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1841 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1842 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1844 if (done)
1845 return;
1846 iterate = true;
1847 oelast = NULL;
1848 continue;
1850 oelast = oe;
1851 i++;
1854 length = ops->length ();
1855 oelast = ops->last ();
1857 if (iterate)
1858 optimize_ops_list (opcode, ops);
1861 /* The following functions are subroutines to optimize_range_tests and allow
1862 it to try to change a logical combination of comparisons into a range
1863 test.
1865 For example, both
1866 X == 2 || X == 5 || X == 3 || X == 4
1868 X >= 2 && X <= 5
1869 are converted to
1870 (unsigned) (X - 2) <= 3
1872 For more information see comments above fold_test_range in fold-const.c,
1873 this implementation is for GIMPLE. */
1875 struct range_entry
1877 tree exp;
1878 tree low;
1879 tree high;
1880 bool in_p;
1881 bool strict_overflow_p;
1882 unsigned int idx, next;
1885 /* This is similar to make_range in fold-const.c, but on top of
1886 GIMPLE instead of trees. If EXP is non-NULL, it should be
1887 an SSA_NAME and STMT argument is ignored, otherwise STMT
1888 argument should be a GIMPLE_COND. */
1890 static void
1891 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1893 int in_p;
1894 tree low, high;
1895 bool is_bool, strict_overflow_p;
1897 r->exp = NULL_TREE;
1898 r->in_p = false;
1899 r->strict_overflow_p = false;
1900 r->low = NULL_TREE;
1901 r->high = NULL_TREE;
1902 if (exp != NULL_TREE
1903 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1904 return;
1906 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1907 and see if we can refine the range. Some of the cases below may not
1908 happen, but it doesn't seem worth worrying about this. We "continue"
1909 the outer loop when we've changed something; otherwise we "break"
1910 the switch, which will "break" the while. */
1911 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1912 high = low;
1913 in_p = 0;
1914 strict_overflow_p = false;
1915 is_bool = false;
1916 if (exp == NULL_TREE)
1917 is_bool = true;
1918 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1920 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1921 is_bool = true;
1922 else
1923 return;
1925 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1926 is_bool = true;
1928 while (1)
1930 enum tree_code code;
1931 tree arg0, arg1, exp_type;
1932 tree nexp;
1933 location_t loc;
1935 if (exp != NULL_TREE)
1937 if (TREE_CODE (exp) != SSA_NAME
1938 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1939 break;
1941 stmt = SSA_NAME_DEF_STMT (exp);
1942 if (!is_gimple_assign (stmt))
1943 break;
1945 code = gimple_assign_rhs_code (stmt);
1946 arg0 = gimple_assign_rhs1 (stmt);
1947 arg1 = gimple_assign_rhs2 (stmt);
1948 exp_type = TREE_TYPE (exp);
1950 else
1952 code = gimple_cond_code (stmt);
1953 arg0 = gimple_cond_lhs (stmt);
1954 arg1 = gimple_cond_rhs (stmt);
1955 exp_type = boolean_type_node;
1958 if (TREE_CODE (arg0) != SSA_NAME)
1959 break;
1960 loc = gimple_location (stmt);
1961 switch (code)
1963 case BIT_NOT_EXPR:
1964 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1965 /* Ensure the range is either +[-,0], +[0,0],
1966 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1967 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1968 or similar expression of unconditional true or
1969 false, it should not be negated. */
1970 && ((high && integer_zerop (high))
1971 || (low && integer_onep (low))))
1973 in_p = !in_p;
1974 exp = arg0;
1975 continue;
1977 break;
1978 case SSA_NAME:
1979 exp = arg0;
1980 continue;
1981 CASE_CONVERT:
1982 if (is_bool)
1983 goto do_default;
1984 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1986 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1987 is_bool = true;
1988 else
1989 return;
1991 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1992 is_bool = true;
1993 goto do_default;
1994 case EQ_EXPR:
1995 case NE_EXPR:
1996 case LT_EXPR:
1997 case LE_EXPR:
1998 case GE_EXPR:
1999 case GT_EXPR:
2000 is_bool = true;
2001 /* FALLTHRU */
2002 default:
2003 if (!is_bool)
2004 return;
2005 do_default:
2006 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2007 &low, &high, &in_p,
2008 &strict_overflow_p);
2009 if (nexp != NULL_TREE)
2011 exp = nexp;
2012 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2013 continue;
2015 break;
2017 break;
2019 if (is_bool)
2021 r->exp = exp;
2022 r->in_p = in_p;
2023 r->low = low;
2024 r->high = high;
2025 r->strict_overflow_p = strict_overflow_p;
2029 /* Comparison function for qsort. Sort entries
2030 without SSA_NAME exp first, then with SSA_NAMEs sorted
2031 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2032 by increasing ->low and if ->low is the same, by increasing
2033 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2034 maximum. */
2036 static int
2037 range_entry_cmp (const void *a, const void *b)
2039 const struct range_entry *p = (const struct range_entry *) a;
2040 const struct range_entry *q = (const struct range_entry *) b;
2042 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2044 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2046 /* Group range_entries for the same SSA_NAME together. */
2047 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2048 return -1;
2049 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2050 return 1;
2051 /* If ->low is different, NULL low goes first, then by
2052 ascending low. */
2053 if (p->low != NULL_TREE)
2055 if (q->low != NULL_TREE)
2057 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2058 p->low, q->low);
2059 if (tem && integer_onep (tem))
2060 return -1;
2061 tem = fold_binary (GT_EXPR, boolean_type_node,
2062 p->low, q->low);
2063 if (tem && integer_onep (tem))
2064 return 1;
2066 else
2067 return 1;
2069 else if (q->low != NULL_TREE)
2070 return -1;
2071 /* If ->high is different, NULL high goes last, before that by
2072 ascending high. */
2073 if (p->high != NULL_TREE)
2075 if (q->high != NULL_TREE)
2077 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2078 p->high, q->high);
2079 if (tem && integer_onep (tem))
2080 return -1;
2081 tem = fold_binary (GT_EXPR, boolean_type_node,
2082 p->high, q->high);
2083 if (tem && integer_onep (tem))
2084 return 1;
2086 else
2087 return -1;
2089 else if (q->high != NULL_TREE)
2090 return 1;
2091 /* If both ranges are the same, sort below by ascending idx. */
2093 else
2094 return 1;
2096 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2097 return -1;
2099 if (p->idx < q->idx)
2100 return -1;
2101 else
2103 gcc_checking_assert (p->idx > q->idx);
2104 return 1;
2108 /* Helper routine of optimize_range_test.
2109 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2110 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2111 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2112 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2113 an array of COUNT pointers to other ranges. Return
2114 true if the range merge has been successful.
2115 If OPCODE is ERROR_MARK, this is called from within
2116 maybe_optimize_range_tests and is performing inter-bb range optimization.
2117 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2118 oe->rank. */
2120 static bool
2121 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2122 struct range_entry **otherrangep,
2123 unsigned int count, enum tree_code opcode,
2124 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2125 bool in_p, tree low, tree high, bool strict_overflow_p)
2127 operand_entry_t oe = (*ops)[range->idx];
2128 tree op = oe->op;
2129 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2130 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2131 location_t loc = gimple_location (stmt);
2132 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2133 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2134 in_p, low, high);
2135 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2136 gimple_stmt_iterator gsi;
2137 unsigned int i;
2139 if (tem == NULL_TREE)
2140 return false;
2142 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2143 warning_at (loc, OPT_Wstrict_overflow,
2144 "assuming signed overflow does not occur "
2145 "when simplifying range test");
2147 if (dump_file && (dump_flags & TDF_DETAILS))
2149 struct range_entry *r;
2150 fprintf (dump_file, "Optimizing range tests ");
2151 print_generic_expr (dump_file, range->exp, 0);
2152 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2153 print_generic_expr (dump_file, range->low, 0);
2154 fprintf (dump_file, ", ");
2155 print_generic_expr (dump_file, range->high, 0);
2156 fprintf (dump_file, "]");
2157 for (i = 0; i < count; i++)
2159 if (otherrange)
2160 r = otherrange + i;
2161 else
2162 r = otherrangep[i];
2163 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2164 print_generic_expr (dump_file, r->low, 0);
2165 fprintf (dump_file, ", ");
2166 print_generic_expr (dump_file, r->high, 0);
2167 fprintf (dump_file, "]");
2169 fprintf (dump_file, "\n into ");
2170 print_generic_expr (dump_file, tem, 0);
2171 fprintf (dump_file, "\n");
2174 if (opcode == BIT_IOR_EXPR
2175 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2176 tem = invert_truthvalue_loc (loc, tem);
2178 tem = fold_convert_loc (loc, optype, tem);
2179 gsi = gsi_for_stmt (stmt);
2180 /* In rare cases range->exp can be equal to lhs of stmt.
2181 In that case we have to insert after the stmt rather then before
2182 it. */
2183 if (op == range->exp)
2185 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2186 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2187 GSI_CONTINUE_LINKING);
2189 else
2191 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2192 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2193 GSI_SAME_STMT);
2194 gsi_prev (&gsi);
2196 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2197 if (gimple_uid (gsi_stmt (gsi)))
2198 break;
2199 else
2200 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2202 oe->op = tem;
2203 range->exp = exp;
2204 range->low = low;
2205 range->high = high;
2206 range->in_p = in_p;
2207 range->strict_overflow_p = false;
2209 for (i = 0; i < count; i++)
2211 if (otherrange)
2212 range = otherrange + i;
2213 else
2214 range = otherrangep[i];
2215 oe = (*ops)[range->idx];
2216 /* Now change all the other range test immediate uses, so that
2217 those tests will be optimized away. */
2218 if (opcode == ERROR_MARK)
2220 if (oe->op)
2221 oe->op = build_int_cst (TREE_TYPE (oe->op),
2222 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2223 else
2224 oe->op = (oe->rank == BIT_IOR_EXPR
2225 ? boolean_false_node : boolean_true_node);
2227 else
2228 oe->op = error_mark_node;
2229 range->exp = NULL_TREE;
2231 return true;
2234 /* Optimize X == CST1 || X == CST2
2235 if popcount (CST1 ^ CST2) == 1 into
2236 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2237 Similarly for ranges. E.g.
2238 X != 2 && X != 3 && X != 10 && X != 11
2239 will be transformed by the previous optimization into
2240 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2241 and this loop can transform that into
2242 !(((X & ~8) - 2U) <= 1U). */
2244 static bool
2245 optimize_range_tests_xor (enum tree_code opcode, tree type,
2246 tree lowi, tree lowj, tree highi, tree highj,
2247 vec<operand_entry_t> *ops,
2248 struct range_entry *rangei,
2249 struct range_entry *rangej)
2251 tree lowxor, highxor, tem, exp;
2252 /* Check lowi ^ lowj == highi ^ highj and
2253 popcount (lowi ^ lowj) == 1. */
2254 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2255 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2256 return false;
2257 if (!integer_pow2p (lowxor))
2258 return false;
2259 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2260 if (!tree_int_cst_equal (lowxor, highxor))
2261 return false;
2263 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2264 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2265 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2266 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2267 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2268 NULL, rangei->in_p, lowj, highj,
2269 rangei->strict_overflow_p
2270 || rangej->strict_overflow_p))
2271 return true;
2272 return false;
2275 /* Optimize X == CST1 || X == CST2
2276 if popcount (CST2 - CST1) == 1 into
2277 ((X - CST1) & ~(CST2 - CST1)) == 0.
2278 Similarly for ranges. E.g.
2279 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2280 || X == 75 || X == 45
2281 will be transformed by the previous optimization into
2282 (X - 43U) <= 3U || (X - 75U) <= 3U
2283 and this loop can transform that into
2284 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2285 static bool
2286 optimize_range_tests_diff (enum tree_code opcode, tree type,
2287 tree lowi, tree lowj, tree highi, tree highj,
2288 vec<operand_entry_t> *ops,
2289 struct range_entry *rangei,
2290 struct range_entry *rangej)
2292 tree tem1, tem2, mask;
2293 /* Check highi - lowi == highj - lowj. */
2294 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2295 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2296 return false;
2297 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2298 if (!tree_int_cst_equal (tem1, tem2))
2299 return false;
2300 /* Check popcount (lowj - lowi) == 1. */
2301 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2302 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2303 return false;
2304 if (!integer_pow2p (tem1))
2305 return false;
2307 type = unsigned_type_for (type);
2308 tem1 = fold_convert (type, tem1);
2309 tem2 = fold_convert (type, tem2);
2310 lowi = fold_convert (type, lowi);
2311 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2312 tem1 = fold_binary (MINUS_EXPR, type,
2313 fold_convert (type, rangei->exp), lowi);
2314 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2315 lowj = build_int_cst (type, 0);
2316 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2317 NULL, rangei->in_p, lowj, tem2,
2318 rangei->strict_overflow_p
2319 || rangej->strict_overflow_p))
2320 return true;
2321 return false;
2324 /* It does some common checks for function optimize_range_tests_xor and
2325 optimize_range_tests_diff.
2326 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2327 Else it calls optimize_range_tests_diff. */
2329 static bool
2330 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2331 bool optimize_xor, vec<operand_entry_t> *ops,
2332 struct range_entry *ranges)
2334 int i, j;
2335 bool any_changes = false;
2336 for (i = first; i < length; i++)
2338 tree lowi, highi, lowj, highj, type, tem;
2340 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2341 continue;
2342 type = TREE_TYPE (ranges[i].exp);
2343 if (!INTEGRAL_TYPE_P (type))
2344 continue;
2345 lowi = ranges[i].low;
2346 if (lowi == NULL_TREE)
2347 lowi = TYPE_MIN_VALUE (type);
2348 highi = ranges[i].high;
2349 if (highi == NULL_TREE)
2350 continue;
2351 for (j = i + 1; j < length && j < i + 64; j++)
2353 bool changes;
2354 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2355 continue;
2356 lowj = ranges[j].low;
2357 if (lowj == NULL_TREE)
2358 continue;
2359 highj = ranges[j].high;
2360 if (highj == NULL_TREE)
2361 highj = TYPE_MAX_VALUE (type);
2362 /* Check lowj > highi. */
2363 tem = fold_binary (GT_EXPR, boolean_type_node,
2364 lowj, highi);
2365 if (tem == NULL_TREE || !integer_onep (tem))
2366 continue;
2367 if (optimize_xor)
2368 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2369 highi, highj, ops,
2370 ranges + i, ranges + j);
2371 else
2372 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2373 highi, highj, ops,
2374 ranges + i, ranges + j);
2375 if (changes)
2377 any_changes = true;
2378 break;
2382 return any_changes;
2385 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2386 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2387 EXP on success, NULL otherwise. */
2389 static tree
2390 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2391 wide_int *mask, tree *totallowp)
2393 tree tem = int_const_binop (MINUS_EXPR, high, low);
2394 if (tem == NULL_TREE
2395 || TREE_CODE (tem) != INTEGER_CST
2396 || TREE_OVERFLOW (tem)
2397 || tree_int_cst_sgn (tem) == -1
2398 || compare_tree_int (tem, prec) != -1)
2399 return NULL_TREE;
2401 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2402 *mask = wi::shifted_mask (0, max, false, prec);
2403 if (TREE_CODE (exp) == BIT_AND_EXPR
2404 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2406 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2407 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2408 if (wi::popcount (msk) == 1
2409 && wi::ltu_p (msk, prec - max))
2411 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2412 max += msk.to_uhwi ();
2413 exp = TREE_OPERAND (exp, 0);
2414 if (integer_zerop (low)
2415 && TREE_CODE (exp) == PLUS_EXPR
2416 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2418 widest_int bias
2419 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2420 TYPE_PRECISION (TREE_TYPE (low))));
2421 tree tbias = wide_int_to_tree (TREE_TYPE (low), bias);
2422 if (totallowp)
2424 *totallowp = tbias;
2425 exp = TREE_OPERAND (exp, 0);
2426 STRIP_NOPS (exp);
2427 return exp;
2429 else if (!tree_int_cst_lt (totallow, tbias))
2430 return NULL_TREE;
2431 bias -= wi::to_widest (totallow);
2432 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2434 *mask = wi::lshift (*mask, bias);
2435 exp = TREE_OPERAND (exp, 0);
2436 STRIP_NOPS (exp);
2437 return exp;
2442 if (totallowp)
2443 return exp;
2444 if (!tree_int_cst_lt (totallow, low))
2445 return exp;
2446 tem = int_const_binop (MINUS_EXPR, low, totallow);
2447 if (tem == NULL_TREE
2448 || TREE_CODE (tem) != INTEGER_CST
2449 || TREE_OVERFLOW (tem)
2450 || compare_tree_int (tem, prec - max) == 1)
2451 return NULL_TREE;
2453 *mask = wi::lshift (*mask, wi::to_widest (tem));
2454 return exp;
2457 /* Attempt to optimize small range tests using bit test.
2458 E.g.
2459 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2460 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2461 has been by earlier optimizations optimized into:
2462 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2463 As all the 43 through 82 range is less than 64 numbers,
2464 for 64-bit word targets optimize that into:
2465 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2467 static bool
2468 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2469 vec<operand_entry_t> *ops,
2470 struct range_entry *ranges)
2472 int i, j;
2473 bool any_changes = false;
2474 int prec = GET_MODE_BITSIZE (word_mode);
2475 auto_vec<struct range_entry *, 64> candidates;
2477 for (i = first; i < length - 2; i++)
2479 tree lowi, highi, lowj, highj, type;
2481 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2482 continue;
2483 type = TREE_TYPE (ranges[i].exp);
2484 if (!INTEGRAL_TYPE_P (type))
2485 continue;
2486 lowi = ranges[i].low;
2487 if (lowi == NULL_TREE)
2488 lowi = TYPE_MIN_VALUE (type);
2489 highi = ranges[i].high;
2490 if (highi == NULL_TREE)
2491 continue;
2492 wide_int mask;
2493 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2494 highi, &mask, &lowi);
2495 if (exp == NULL_TREE)
2496 continue;
2497 bool strict_overflow_p = ranges[i].strict_overflow_p;
2498 candidates.truncate (0);
2499 int end = MIN (i + 64, length);
2500 for (j = i + 1; j < end; j++)
2502 tree exp2;
2503 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2504 continue;
2505 if (ranges[j].exp == exp)
2507 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2509 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2510 if (exp2 == exp)
2512 else if (TREE_CODE (exp2) == PLUS_EXPR)
2514 exp2 = TREE_OPERAND (exp2, 0);
2515 STRIP_NOPS (exp2);
2516 if (exp2 != exp)
2517 continue;
2519 else
2520 continue;
2522 else
2523 continue;
2524 lowj = ranges[j].low;
2525 if (lowj == NULL_TREE)
2526 continue;
2527 highj = ranges[j].high;
2528 if (highj == NULL_TREE)
2529 highj = TYPE_MAX_VALUE (type);
2530 wide_int mask2;
2531 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2532 highj, &mask2, NULL);
2533 if (exp2 != exp)
2534 continue;
2535 mask |= mask2;
2536 strict_overflow_p |= ranges[j].strict_overflow_p;
2537 candidates.safe_push (&ranges[j]);
2540 /* If we need otherwise 3 or more comparisons, use a bit test. */
2541 if (candidates.length () >= 2)
2543 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2544 wi::to_widest (lowi)
2545 + prec - 1 - wi::clz (mask));
2546 operand_entry_t oe = (*ops)[ranges[i].idx];
2547 tree op = oe->op;
2548 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2549 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2550 location_t loc = gimple_location (stmt);
2551 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2553 /* See if it isn't cheaper to pretend the minimum value of the
2554 range is 0, if maximum value is small enough.
2555 We can avoid then subtraction of the minimum value, but the
2556 mask constant could be perhaps more expensive. */
2557 if (compare_tree_int (lowi, 0) > 0
2558 && compare_tree_int (high, prec) < 0)
2560 int cost_diff;
2561 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2562 rtx reg = gen_raw_REG (word_mode, 10000);
2563 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2564 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2565 GEN_INT (-m)), speed_p);
2566 rtx r = immed_wide_int_const (mask, word_mode);
2567 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2568 speed_p);
2569 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2570 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2571 speed_p);
2572 if (cost_diff > 0)
2574 mask = wi::lshift (mask, m);
2575 lowi = build_zero_cst (TREE_TYPE (lowi));
2579 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2580 false, lowi, high);
2581 if (tem == NULL_TREE || is_gimple_val (tem))
2582 continue;
2583 tree etype = unsigned_type_for (TREE_TYPE (exp));
2584 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2585 fold_convert_loc (loc, etype, exp),
2586 fold_convert_loc (loc, etype, lowi));
2587 exp = fold_convert_loc (loc, integer_type_node, exp);
2588 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2589 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2590 build_int_cst (word_type, 1), exp);
2591 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2592 wide_int_to_tree (word_type, mask));
2593 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2594 build_zero_cst (word_type));
2595 if (is_gimple_val (exp))
2596 continue;
2598 /* The shift might have undefined behavior if TEM is true,
2599 but reassociate_bb isn't prepared to have basic blocks
2600 split when it is running. So, temporarily emit a code
2601 with BIT_IOR_EXPR instead of &&, and fix it up in
2602 branch_fixup. */
2603 gimple_seq seq;
2604 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2605 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2606 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2607 gimple_seq seq2;
2608 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2609 gimple_seq_add_seq_without_update (&seq, seq2);
2610 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2611 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2612 gimple g = gimple_build_assign (make_ssa_name (optype),
2613 BIT_IOR_EXPR, tem, exp);
2614 gimple_set_location (g, loc);
2615 gimple_seq_add_stmt_without_update (&seq, g);
2616 exp = gimple_assign_lhs (g);
2617 tree val = build_zero_cst (optype);
2618 if (update_range_test (&ranges[i], NULL, candidates.address (),
2619 candidates.length (), opcode, ops, exp,
2620 seq, false, val, val, strict_overflow_p))
2622 any_changes = true;
2623 reassoc_branch_fixups.safe_push (tem);
2625 else
2626 gimple_seq_discard (seq);
2629 return any_changes;
2632 /* Optimize range tests, similarly how fold_range_test optimizes
2633 it on trees. The tree code for the binary
2634 operation between all the operands is OPCODE.
2635 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2636 maybe_optimize_range_tests for inter-bb range optimization.
2637 In that case if oe->op is NULL, oe->id is bb->index whose
2638 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2639 the actual opcode. */
2641 static bool
2642 optimize_range_tests (enum tree_code opcode,
2643 vec<operand_entry_t> *ops)
2645 unsigned int length = ops->length (), i, j, first;
2646 operand_entry_t oe;
2647 struct range_entry *ranges;
2648 bool any_changes = false;
2650 if (length == 1)
2651 return false;
2653 ranges = XNEWVEC (struct range_entry, length);
2654 for (i = 0; i < length; i++)
2656 oe = (*ops)[i];
2657 ranges[i].idx = i;
2658 init_range_entry (ranges + i, oe->op,
2659 oe->op ? NULL :
2660 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2661 /* For | invert it now, we will invert it again before emitting
2662 the optimized expression. */
2663 if (opcode == BIT_IOR_EXPR
2664 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2665 ranges[i].in_p = !ranges[i].in_p;
2668 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2669 for (i = 0; i < length; i++)
2670 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2671 break;
2673 /* Try to merge ranges. */
2674 for (first = i; i < length; i++)
2676 tree low = ranges[i].low;
2677 tree high = ranges[i].high;
2678 int in_p = ranges[i].in_p;
2679 bool strict_overflow_p = ranges[i].strict_overflow_p;
2680 int update_fail_count = 0;
2682 for (j = i + 1; j < length; j++)
2684 if (ranges[i].exp != ranges[j].exp)
2685 break;
2686 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2687 ranges[j].in_p, ranges[j].low, ranges[j].high))
2688 break;
2689 strict_overflow_p |= ranges[j].strict_overflow_p;
2692 if (j == i + 1)
2693 continue;
2695 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2696 opcode, ops, ranges[i].exp, NULL, in_p,
2697 low, high, strict_overflow_p))
2699 i = j - 1;
2700 any_changes = true;
2702 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2703 while update_range_test would fail. */
2704 else if (update_fail_count == 64)
2705 i = j - 1;
2706 else
2707 ++update_fail_count;
2710 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2711 ops, ranges);
2713 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2714 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2715 ops, ranges);
2716 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2717 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2718 ops, ranges);
2720 if (any_changes && opcode != ERROR_MARK)
2722 j = 0;
2723 FOR_EACH_VEC_ELT (*ops, i, oe)
2725 if (oe->op == error_mark_node)
2726 continue;
2727 else if (i != j)
2728 (*ops)[j] = oe;
2729 j++;
2731 ops->truncate (j);
2734 XDELETEVEC (ranges);
2735 return any_changes;
2738 /* Return true if STMT is a cast like:
2739 <bb N>:
2741 _123 = (int) _234;
2743 <bb M>:
2744 # _345 = PHI <_123(N), 1(...), 1(...)>
2745 where _234 has bool type, _123 has single use and
2746 bb N has a single successor M. This is commonly used in
2747 the last block of a range test. */
2749 static bool
2750 final_range_test_p (gimple stmt)
2752 basic_block bb, rhs_bb;
2753 edge e;
2754 tree lhs, rhs;
2755 use_operand_p use_p;
2756 gimple use_stmt;
2758 if (!gimple_assign_cast_p (stmt))
2759 return false;
2760 bb = gimple_bb (stmt);
2761 if (!single_succ_p (bb))
2762 return false;
2763 e = single_succ_edge (bb);
2764 if (e->flags & EDGE_COMPLEX)
2765 return false;
2767 lhs = gimple_assign_lhs (stmt);
2768 rhs = gimple_assign_rhs1 (stmt);
2769 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2770 || TREE_CODE (rhs) != SSA_NAME
2771 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2772 return false;
2774 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2775 if (!single_imm_use (lhs, &use_p, &use_stmt))
2776 return false;
2778 if (gimple_code (use_stmt) != GIMPLE_PHI
2779 || gimple_bb (use_stmt) != e->dest)
2780 return false;
2782 /* And that the rhs is defined in the same loop. */
2783 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2784 if (rhs_bb == NULL
2785 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2786 return false;
2788 return true;
2791 /* Return true if BB is suitable basic block for inter-bb range test
2792 optimization. If BACKWARD is true, BB should be the only predecessor
2793 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2794 or compared with to find a common basic block to which all conditions
2795 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2796 be the only predecessor of BB. */
2798 static bool
2799 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2800 bool backward)
2802 edge_iterator ei, ei2;
2803 edge e, e2;
2804 gimple stmt;
2805 gphi_iterator gsi;
2806 bool other_edge_seen = false;
2807 bool is_cond;
2809 if (test_bb == bb)
2810 return false;
2811 /* Check last stmt first. */
2812 stmt = last_stmt (bb);
2813 if (stmt == NULL
2814 || (gimple_code (stmt) != GIMPLE_COND
2815 && (backward || !final_range_test_p (stmt)))
2816 || gimple_visited_p (stmt)
2817 || stmt_could_throw_p (stmt)
2818 || *other_bb == bb)
2819 return false;
2820 is_cond = gimple_code (stmt) == GIMPLE_COND;
2821 if (is_cond)
2823 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2824 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2825 to *OTHER_BB (if not set yet, try to find it out). */
2826 if (EDGE_COUNT (bb->succs) != 2)
2827 return false;
2828 FOR_EACH_EDGE (e, ei, bb->succs)
2830 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2831 return false;
2832 if (e->dest == test_bb)
2834 if (backward)
2835 continue;
2836 else
2837 return false;
2839 if (e->dest == bb)
2840 return false;
2841 if (*other_bb == NULL)
2843 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2844 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2845 return false;
2846 else if (e->dest == e2->dest)
2847 *other_bb = e->dest;
2848 if (*other_bb == NULL)
2849 return false;
2851 if (e->dest == *other_bb)
2852 other_edge_seen = true;
2853 else if (backward)
2854 return false;
2856 if (*other_bb == NULL || !other_edge_seen)
2857 return false;
2859 else if (single_succ (bb) != *other_bb)
2860 return false;
2862 /* Now check all PHIs of *OTHER_BB. */
2863 e = find_edge (bb, *other_bb);
2864 e2 = find_edge (test_bb, *other_bb);
2865 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2867 gphi *phi = gsi.phi ();
2868 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2869 corresponding to BB and TEST_BB predecessor must be the same. */
2870 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2871 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2873 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2874 one of the PHIs should have the lhs of the last stmt in
2875 that block as PHI arg and that PHI should have 0 or 1
2876 corresponding to it in all other range test basic blocks
2877 considered. */
2878 if (!is_cond)
2880 if (gimple_phi_arg_def (phi, e->dest_idx)
2881 == gimple_assign_lhs (stmt)
2882 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2883 || integer_onep (gimple_phi_arg_def (phi,
2884 e2->dest_idx))))
2885 continue;
2887 else
2889 gimple test_last = last_stmt (test_bb);
2890 if (gimple_code (test_last) != GIMPLE_COND
2891 && gimple_phi_arg_def (phi, e2->dest_idx)
2892 == gimple_assign_lhs (test_last)
2893 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2894 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2895 continue;
2898 return false;
2901 return true;
2904 /* Return true if BB doesn't have side-effects that would disallow
2905 range test optimization, all SSA_NAMEs set in the bb are consumed
2906 in the bb and there are no PHIs. */
2908 static bool
2909 no_side_effect_bb (basic_block bb)
2911 gimple_stmt_iterator gsi;
2912 gimple last;
2914 if (!gimple_seq_empty_p (phi_nodes (bb)))
2915 return false;
2916 last = last_stmt (bb);
2917 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2919 gimple stmt = gsi_stmt (gsi);
2920 tree lhs;
2921 imm_use_iterator imm_iter;
2922 use_operand_p use_p;
2924 if (is_gimple_debug (stmt))
2925 continue;
2926 if (gimple_has_side_effects (stmt))
2927 return false;
2928 if (stmt == last)
2929 return true;
2930 if (!is_gimple_assign (stmt))
2931 return false;
2932 lhs = gimple_assign_lhs (stmt);
2933 if (TREE_CODE (lhs) != SSA_NAME)
2934 return false;
2935 if (gimple_assign_rhs_could_trap_p (stmt))
2936 return false;
2937 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2939 gimple use_stmt = USE_STMT (use_p);
2940 if (is_gimple_debug (use_stmt))
2941 continue;
2942 if (gimple_bb (use_stmt) != bb)
2943 return false;
2946 return false;
2949 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2950 return true and fill in *OPS recursively. */
2952 static bool
2953 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2954 struct loop *loop)
2956 gimple stmt = SSA_NAME_DEF_STMT (var);
2957 tree rhs[2];
2958 int i;
2960 if (!is_reassociable_op (stmt, code, loop))
2961 return false;
2963 rhs[0] = gimple_assign_rhs1 (stmt);
2964 rhs[1] = gimple_assign_rhs2 (stmt);
2965 gimple_set_visited (stmt, true);
2966 for (i = 0; i < 2; i++)
2967 if (TREE_CODE (rhs[i]) == SSA_NAME
2968 && !get_ops (rhs[i], code, ops, loop)
2969 && has_single_use (rhs[i]))
2971 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2973 oe->op = rhs[i];
2974 oe->rank = code;
2975 oe->id = 0;
2976 oe->count = 1;
2977 ops->safe_push (oe);
2979 return true;
2982 /* Find the ops that were added by get_ops starting from VAR, see if
2983 they were changed during update_range_test and if yes, create new
2984 stmts. */
2986 static tree
2987 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2988 unsigned int *pidx, struct loop *loop)
2990 gimple stmt = SSA_NAME_DEF_STMT (var);
2991 tree rhs[4];
2992 int i;
2994 if (!is_reassociable_op (stmt, code, loop))
2995 return NULL;
2997 rhs[0] = gimple_assign_rhs1 (stmt);
2998 rhs[1] = gimple_assign_rhs2 (stmt);
2999 rhs[2] = rhs[0];
3000 rhs[3] = rhs[1];
3001 for (i = 0; i < 2; i++)
3002 if (TREE_CODE (rhs[i]) == SSA_NAME)
3004 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3005 if (rhs[2 + i] == NULL_TREE)
3007 if (has_single_use (rhs[i]))
3008 rhs[2 + i] = ops[(*pidx)++]->op;
3009 else
3010 rhs[2 + i] = rhs[i];
3013 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3014 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3016 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3017 var = make_ssa_name (TREE_TYPE (var));
3018 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3019 rhs[2], rhs[3]);
3020 gimple_set_uid (g, gimple_uid (stmt));
3021 gimple_set_visited (g, true);
3022 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3024 return var;
3027 /* Structure to track the initial value passed to get_ops and
3028 the range in the ops vector for each basic block. */
3030 struct inter_bb_range_test_entry
3032 tree op;
3033 unsigned int first_idx, last_idx;
3036 /* Inter-bb range test optimization. */
3038 static void
3039 maybe_optimize_range_tests (gimple stmt)
3041 basic_block first_bb = gimple_bb (stmt);
3042 basic_block last_bb = first_bb;
3043 basic_block other_bb = NULL;
3044 basic_block bb;
3045 edge_iterator ei;
3046 edge e;
3047 auto_vec<operand_entry_t> ops;
3048 auto_vec<inter_bb_range_test_entry> bbinfo;
3049 bool any_changes = false;
3051 /* Consider only basic blocks that end with GIMPLE_COND or
3052 a cast statement satisfying final_range_test_p. All
3053 but the last bb in the first_bb .. last_bb range
3054 should end with GIMPLE_COND. */
3055 if (gimple_code (stmt) == GIMPLE_COND)
3057 if (EDGE_COUNT (first_bb->succs) != 2)
3058 return;
3060 else if (final_range_test_p (stmt))
3061 other_bb = single_succ (first_bb);
3062 else
3063 return;
3065 if (stmt_could_throw_p (stmt))
3066 return;
3068 /* As relative ordering of post-dominator sons isn't fixed,
3069 maybe_optimize_range_tests can be called first on any
3070 bb in the range we want to optimize. So, start searching
3071 backwards, if first_bb can be set to a predecessor. */
3072 while (single_pred_p (first_bb))
3074 basic_block pred_bb = single_pred (first_bb);
3075 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3076 break;
3077 if (!no_side_effect_bb (first_bb))
3078 break;
3079 first_bb = pred_bb;
3081 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3082 Before starting forward search in last_bb successors, find
3083 out the other_bb. */
3084 if (first_bb == last_bb)
3086 other_bb = NULL;
3087 /* As non-GIMPLE_COND last stmt always terminates the range,
3088 if forward search didn't discover anything, just give up. */
3089 if (gimple_code (stmt) != GIMPLE_COND)
3090 return;
3091 /* Look at both successors. Either it ends with a GIMPLE_COND
3092 and satisfies suitable_cond_bb, or ends with a cast and
3093 other_bb is that cast's successor. */
3094 FOR_EACH_EDGE (e, ei, first_bb->succs)
3095 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3096 || e->dest == first_bb)
3097 return;
3098 else if (single_pred_p (e->dest))
3100 stmt = last_stmt (e->dest);
3101 if (stmt
3102 && gimple_code (stmt) == GIMPLE_COND
3103 && EDGE_COUNT (e->dest->succs) == 2)
3105 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3106 break;
3107 else
3108 other_bb = NULL;
3110 else if (stmt
3111 && final_range_test_p (stmt)
3112 && find_edge (first_bb, single_succ (e->dest)))
3114 other_bb = single_succ (e->dest);
3115 if (other_bb == first_bb)
3116 other_bb = NULL;
3119 if (other_bb == NULL)
3120 return;
3122 /* Now do the forward search, moving last_bb to successor bbs
3123 that aren't other_bb. */
3124 while (EDGE_COUNT (last_bb->succs) == 2)
3126 FOR_EACH_EDGE (e, ei, last_bb->succs)
3127 if (e->dest != other_bb)
3128 break;
3129 if (e == NULL)
3130 break;
3131 if (!single_pred_p (e->dest))
3132 break;
3133 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3134 break;
3135 if (!no_side_effect_bb (e->dest))
3136 break;
3137 last_bb = e->dest;
3139 if (first_bb == last_bb)
3140 return;
3141 /* Here basic blocks first_bb through last_bb's predecessor
3142 end with GIMPLE_COND, all of them have one of the edges to
3143 other_bb and another to another block in the range,
3144 all blocks except first_bb don't have side-effects and
3145 last_bb ends with either GIMPLE_COND, or cast satisfying
3146 final_range_test_p. */
3147 for (bb = last_bb; ; bb = single_pred (bb))
3149 enum tree_code code;
3150 tree lhs, rhs;
3151 inter_bb_range_test_entry bb_ent;
3153 bb_ent.op = NULL_TREE;
3154 bb_ent.first_idx = ops.length ();
3155 bb_ent.last_idx = bb_ent.first_idx;
3156 e = find_edge (bb, other_bb);
3157 stmt = last_stmt (bb);
3158 gimple_set_visited (stmt, true);
3159 if (gimple_code (stmt) != GIMPLE_COND)
3161 use_operand_p use_p;
3162 gimple phi;
3163 edge e2;
3164 unsigned int d;
3166 lhs = gimple_assign_lhs (stmt);
3167 rhs = gimple_assign_rhs1 (stmt);
3168 gcc_assert (bb == last_bb);
3170 /* stmt is
3171 _123 = (int) _234;
3173 followed by:
3174 <bb M>:
3175 # _345 = PHI <_123(N), 1(...), 1(...)>
3177 or 0 instead of 1. If it is 0, the _234
3178 range test is anded together with all the
3179 other range tests, if it is 1, it is ored with
3180 them. */
3181 single_imm_use (lhs, &use_p, &phi);
3182 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3183 e2 = find_edge (first_bb, other_bb);
3184 d = e2->dest_idx;
3185 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3186 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3187 code = BIT_AND_EXPR;
3188 else
3190 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3191 code = BIT_IOR_EXPR;
3194 /* If _234 SSA_NAME_DEF_STMT is
3195 _234 = _567 | _789;
3196 (or &, corresponding to 1/0 in the phi arguments,
3197 push into ops the individual range test arguments
3198 of the bitwise or resp. and, recursively. */
3199 if (!get_ops (rhs, code, &ops,
3200 loop_containing_stmt (stmt))
3201 && has_single_use (rhs))
3203 /* Otherwise, push the _234 range test itself. */
3204 operand_entry_t oe
3205 = (operand_entry_t) pool_alloc (operand_entry_pool);
3207 oe->op = rhs;
3208 oe->rank = code;
3209 oe->id = 0;
3210 oe->count = 1;
3211 ops.safe_push (oe);
3212 bb_ent.last_idx++;
3214 else
3215 bb_ent.last_idx = ops.length ();
3216 bb_ent.op = rhs;
3217 bbinfo.safe_push (bb_ent);
3218 continue;
3220 /* Otherwise stmt is GIMPLE_COND. */
3221 code = gimple_cond_code (stmt);
3222 lhs = gimple_cond_lhs (stmt);
3223 rhs = gimple_cond_rhs (stmt);
3224 if (TREE_CODE (lhs) == SSA_NAME
3225 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3226 && ((code != EQ_EXPR && code != NE_EXPR)
3227 || rhs != boolean_false_node
3228 /* Either push into ops the individual bitwise
3229 or resp. and operands, depending on which
3230 edge is other_bb. */
3231 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3232 ^ (code == EQ_EXPR))
3233 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3234 loop_containing_stmt (stmt))))
3236 /* Or push the GIMPLE_COND stmt itself. */
3237 operand_entry_t oe
3238 = (operand_entry_t) pool_alloc (operand_entry_pool);
3240 oe->op = NULL;
3241 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3242 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3243 /* oe->op = NULL signs that there is no SSA_NAME
3244 for the range test, and oe->id instead is the
3245 basic block number, at which's end the GIMPLE_COND
3246 is. */
3247 oe->id = bb->index;
3248 oe->count = 1;
3249 ops.safe_push (oe);
3250 bb_ent.op = NULL;
3251 bb_ent.last_idx++;
3253 else if (ops.length () > bb_ent.first_idx)
3255 bb_ent.op = lhs;
3256 bb_ent.last_idx = ops.length ();
3258 bbinfo.safe_push (bb_ent);
3259 if (bb == first_bb)
3260 break;
3262 if (ops.length () > 1)
3263 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3264 if (any_changes)
3266 unsigned int idx;
3267 /* update_ops relies on has_single_use predicates returning the
3268 same values as it did during get_ops earlier. Additionally it
3269 never removes statements, only adds new ones and it should walk
3270 from the single imm use and check the predicate already before
3271 making those changes.
3272 On the other side, the handling of GIMPLE_COND directly can turn
3273 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3274 it needs to be done in a separate loop afterwards. */
3275 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3277 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3278 && bbinfo[idx].op != NULL_TREE)
3280 tree new_op;
3282 stmt = last_stmt (bb);
3283 new_op = update_ops (bbinfo[idx].op,
3284 (enum tree_code)
3285 ops[bbinfo[idx].first_idx]->rank,
3286 ops, &bbinfo[idx].first_idx,
3287 loop_containing_stmt (stmt));
3288 if (new_op == NULL_TREE)
3290 gcc_assert (bb == last_bb);
3291 new_op = ops[bbinfo[idx].first_idx++]->op;
3293 if (bbinfo[idx].op != new_op)
3295 imm_use_iterator iter;
3296 use_operand_p use_p;
3297 gimple use_stmt, cast_stmt = NULL;
3299 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3300 if (is_gimple_debug (use_stmt))
3301 continue;
3302 else if (gimple_code (use_stmt) == GIMPLE_COND
3303 || gimple_code (use_stmt) == GIMPLE_PHI)
3304 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3305 SET_USE (use_p, new_op);
3306 else if (gimple_assign_cast_p (use_stmt))
3307 cast_stmt = use_stmt;
3308 else
3309 gcc_unreachable ();
3310 if (cast_stmt)
3312 gcc_assert (bb == last_bb);
3313 tree lhs = gimple_assign_lhs (cast_stmt);
3314 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3315 enum tree_code rhs_code
3316 = gimple_assign_rhs_code (cast_stmt);
3317 gassign *g;
3318 if (is_gimple_min_invariant (new_op))
3320 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3321 g = gimple_build_assign (new_lhs, new_op);
3323 else
3324 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3325 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3326 gimple_set_uid (g, gimple_uid (cast_stmt));
3327 gimple_set_visited (g, true);
3328 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3329 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3330 if (is_gimple_debug (use_stmt))
3331 continue;
3332 else if (gimple_code (use_stmt) == GIMPLE_COND
3333 || gimple_code (use_stmt) == GIMPLE_PHI)
3334 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3335 SET_USE (use_p, new_lhs);
3336 else
3337 gcc_unreachable ();
3341 if (bb == first_bb)
3342 break;
3344 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3346 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3347 && bbinfo[idx].op == NULL_TREE
3348 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3350 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3351 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3352 gimple_cond_make_false (cond_stmt);
3353 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3354 gimple_cond_make_true (cond_stmt);
3355 else
3357 gimple_cond_set_code (cond_stmt, NE_EXPR);
3358 gimple_cond_set_lhs (cond_stmt,
3359 ops[bbinfo[idx].first_idx]->op);
3360 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3362 update_stmt (cond_stmt);
3364 if (bb == first_bb)
3365 break;
3370 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3371 of STMT in it's operands. This is also known as a "destructive
3372 update" operation. */
3374 static bool
3375 is_phi_for_stmt (gimple stmt, tree operand)
3377 gimple def_stmt;
3378 gphi *def_phi;
3379 tree lhs;
3380 use_operand_p arg_p;
3381 ssa_op_iter i;
3383 if (TREE_CODE (operand) != SSA_NAME)
3384 return false;
3386 lhs = gimple_assign_lhs (stmt);
3388 def_stmt = SSA_NAME_DEF_STMT (operand);
3389 def_phi = dyn_cast <gphi *> (def_stmt);
3390 if (!def_phi)
3391 return false;
3393 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3394 if (lhs == USE_FROM_PTR (arg_p))
3395 return true;
3396 return false;
3399 /* Remove def stmt of VAR if VAR has zero uses and recurse
3400 on rhs1 operand if so. */
3402 static void
3403 remove_visited_stmt_chain (tree var)
3405 gimple stmt;
3406 gimple_stmt_iterator gsi;
3408 while (1)
3410 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3411 return;
3412 stmt = SSA_NAME_DEF_STMT (var);
3413 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3415 var = gimple_assign_rhs1 (stmt);
3416 gsi = gsi_for_stmt (stmt);
3417 reassoc_remove_stmt (&gsi);
3418 release_defs (stmt);
3420 else
3421 return;
3425 /* This function checks three consequtive operands in
3426 passed operands vector OPS starting from OPINDEX and
3427 swaps two operands if it is profitable for binary operation
3428 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3430 We pair ops with the same rank if possible.
3432 The alternative we try is to see if STMT is a destructive
3433 update style statement, which is like:
3434 b = phi (a, ...)
3435 a = c + b;
3436 In that case, we want to use the destructive update form to
3437 expose the possible vectorizer sum reduction opportunity.
3438 In that case, the third operand will be the phi node. This
3439 check is not performed if STMT is null.
3441 We could, of course, try to be better as noted above, and do a
3442 lot of work to try to find these opportunities in >3 operand
3443 cases, but it is unlikely to be worth it. */
3445 static void
3446 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3447 unsigned int opindex, gimple stmt)
3449 operand_entry_t oe1, oe2, oe3;
3451 oe1 = ops[opindex];
3452 oe2 = ops[opindex + 1];
3453 oe3 = ops[opindex + 2];
3455 if ((oe1->rank == oe2->rank
3456 && oe2->rank != oe3->rank)
3457 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3458 && !is_phi_for_stmt (stmt, oe1->op)
3459 && !is_phi_for_stmt (stmt, oe2->op)))
3461 struct operand_entry temp = *oe3;
3462 oe3->op = oe1->op;
3463 oe3->rank = oe1->rank;
3464 oe1->op = temp.op;
3465 oe1->rank= temp.rank;
3467 else if ((oe1->rank == oe3->rank
3468 && oe2->rank != oe3->rank)
3469 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3470 && !is_phi_for_stmt (stmt, oe1->op)
3471 && !is_phi_for_stmt (stmt, oe3->op)))
3473 struct operand_entry temp = *oe2;
3474 oe2->op = oe1->op;
3475 oe2->rank = oe1->rank;
3476 oe1->op = temp.op;
3477 oe1->rank = temp.rank;
3481 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3482 two definitions, otherwise return STMT. */
3484 static inline gimple
3485 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3487 if (TREE_CODE (rhs1) == SSA_NAME
3488 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3489 stmt = SSA_NAME_DEF_STMT (rhs1);
3490 if (TREE_CODE (rhs2) == SSA_NAME
3491 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3492 stmt = SSA_NAME_DEF_STMT (rhs2);
3493 return stmt;
3496 /* Recursively rewrite our linearized statements so that the operators
3497 match those in OPS[OPINDEX], putting the computation in rank
3498 order. Return new lhs. */
3500 static tree
3501 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3502 vec<operand_entry_t> ops, bool changed)
3504 tree rhs1 = gimple_assign_rhs1 (stmt);
3505 tree rhs2 = gimple_assign_rhs2 (stmt);
3506 tree lhs = gimple_assign_lhs (stmt);
3507 operand_entry_t oe;
3509 /* The final recursion case for this function is that you have
3510 exactly two operations left.
3511 If we had one exactly one op in the entire list to start with, we
3512 would have never called this function, and the tail recursion
3513 rewrites them one at a time. */
3514 if (opindex + 2 == ops.length ())
3516 operand_entry_t oe1, oe2;
3518 oe1 = ops[opindex];
3519 oe2 = ops[opindex + 1];
3521 if (rhs1 != oe1->op || rhs2 != oe2->op)
3523 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3524 unsigned int uid = gimple_uid (stmt);
3526 if (dump_file && (dump_flags & TDF_DETAILS))
3528 fprintf (dump_file, "Transforming ");
3529 print_gimple_stmt (dump_file, stmt, 0, 0);
3532 if (changed)
3534 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3535 lhs = make_ssa_name (TREE_TYPE (lhs));
3536 stmt
3537 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3538 oe1->op, oe2->op);
3539 gimple_set_uid (stmt, uid);
3540 gimple_set_visited (stmt, true);
3541 if (insert_point == gsi_stmt (gsi))
3542 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3543 else
3544 insert_stmt_after (stmt, insert_point);
3546 else
3548 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3549 == stmt);
3550 gimple_assign_set_rhs1 (stmt, oe1->op);
3551 gimple_assign_set_rhs2 (stmt, oe2->op);
3552 update_stmt (stmt);
3555 if (rhs1 != oe1->op && rhs1 != oe2->op)
3556 remove_visited_stmt_chain (rhs1);
3558 if (dump_file && (dump_flags & TDF_DETAILS))
3560 fprintf (dump_file, " into ");
3561 print_gimple_stmt (dump_file, stmt, 0, 0);
3564 return lhs;
3567 /* If we hit here, we should have 3 or more ops left. */
3568 gcc_assert (opindex + 2 < ops.length ());
3570 /* Rewrite the next operator. */
3571 oe = ops[opindex];
3573 /* Recurse on the LHS of the binary operator, which is guaranteed to
3574 be the non-leaf side. */
3575 tree new_rhs1
3576 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3577 changed || oe->op != rhs2);
3579 if (oe->op != rhs2 || new_rhs1 != rhs1)
3581 if (dump_file && (dump_flags & TDF_DETAILS))
3583 fprintf (dump_file, "Transforming ");
3584 print_gimple_stmt (dump_file, stmt, 0, 0);
3587 /* If changed is false, this is either opindex == 0
3588 or all outer rhs2's were equal to corresponding oe->op,
3589 and powi_result is NULL.
3590 That means lhs is equivalent before and after reassociation.
3591 Otherwise ensure the old lhs SSA_NAME is not reused and
3592 create a new stmt as well, so that any debug stmts will be
3593 properly adjusted. */
3594 if (changed)
3596 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3597 unsigned int uid = gimple_uid (stmt);
3598 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3600 lhs = make_ssa_name (TREE_TYPE (lhs));
3601 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3602 new_rhs1, oe->op);
3603 gimple_set_uid (stmt, uid);
3604 gimple_set_visited (stmt, true);
3605 if (insert_point == gsi_stmt (gsi))
3606 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3607 else
3608 insert_stmt_after (stmt, insert_point);
3610 else
3612 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3613 == stmt);
3614 gimple_assign_set_rhs1 (stmt, new_rhs1);
3615 gimple_assign_set_rhs2 (stmt, oe->op);
3616 update_stmt (stmt);
3619 if (dump_file && (dump_flags & TDF_DETAILS))
3621 fprintf (dump_file, " into ");
3622 print_gimple_stmt (dump_file, stmt, 0, 0);
3625 return lhs;
3628 /* Find out how many cycles we need to compute statements chain.
3629 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3630 maximum number of independent statements we may execute per cycle. */
3632 static int
3633 get_required_cycles (int ops_num, int cpu_width)
3635 int res;
3636 int elog;
3637 unsigned int rest;
3639 /* While we have more than 2 * cpu_width operands
3640 we may reduce number of operands by cpu_width
3641 per cycle. */
3642 res = ops_num / (2 * cpu_width);
3644 /* Remained operands count may be reduced twice per cycle
3645 until we have only one operand. */
3646 rest = (unsigned)(ops_num - res * cpu_width);
3647 elog = exact_log2 (rest);
3648 if (elog >= 0)
3649 res += elog;
3650 else
3651 res += floor_log2 (rest) + 1;
3653 return res;
3656 /* Returns an optimal number of registers to use for computation of
3657 given statements. */
3659 static int
3660 get_reassociation_width (int ops_num, enum tree_code opc,
3661 machine_mode mode)
3663 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3664 int width;
3665 int width_min;
3666 int cycles_best;
3668 if (param_width > 0)
3669 width = param_width;
3670 else
3671 width = targetm.sched.reassociation_width (opc, mode);
3673 if (width == 1)
3674 return width;
3676 /* Get the minimal time required for sequence computation. */
3677 cycles_best = get_required_cycles (ops_num, width);
3679 /* Check if we may use less width and still compute sequence for
3680 the same time. It will allow us to reduce registers usage.
3681 get_required_cycles is monotonically increasing with lower width
3682 so we can perform a binary search for the minimal width that still
3683 results in the optimal cycle count. */
3684 width_min = 1;
3685 while (width > width_min)
3687 int width_mid = (width + width_min) / 2;
3689 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3690 width = width_mid;
3691 else if (width_min < width_mid)
3692 width_min = width_mid;
3693 else
3694 break;
3697 return width;
3700 /* Recursively rewrite our linearized statements so that the operators
3701 match those in OPS[OPINDEX], putting the computation in rank
3702 order and trying to allow operations to be executed in
3703 parallel. */
3705 static void
3706 rewrite_expr_tree_parallel (gassign *stmt, int width,
3707 vec<operand_entry_t> ops)
3709 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3710 int op_num = ops.length ();
3711 int stmt_num = op_num - 1;
3712 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3713 int op_index = op_num - 1;
3714 int stmt_index = 0;
3715 int ready_stmts_end = 0;
3716 int i = 0;
3717 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3719 /* We start expression rewriting from the top statements.
3720 So, in this loop we create a full list of statements
3721 we will work with. */
3722 stmts[stmt_num - 1] = stmt;
3723 for (i = stmt_num - 2; i >= 0; i--)
3724 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3726 for (i = 0; i < stmt_num; i++)
3728 tree op1, op2;
3730 /* Determine whether we should use results of
3731 already handled statements or not. */
3732 if (ready_stmts_end == 0
3733 && (i - stmt_index >= width || op_index < 1))
3734 ready_stmts_end = i;
3736 /* Now we choose operands for the next statement. Non zero
3737 value in ready_stmts_end means here that we should use
3738 the result of already generated statements as new operand. */
3739 if (ready_stmts_end > 0)
3741 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3742 if (ready_stmts_end > stmt_index)
3743 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3744 else if (op_index >= 0)
3745 op2 = ops[op_index--]->op;
3746 else
3748 gcc_assert (stmt_index < i);
3749 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3752 if (stmt_index >= ready_stmts_end)
3753 ready_stmts_end = 0;
3755 else
3757 if (op_index > 1)
3758 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3759 op2 = ops[op_index--]->op;
3760 op1 = ops[op_index--]->op;
3763 /* If we emit the last statement then we should put
3764 operands into the last statement. It will also
3765 break the loop. */
3766 if (op_index < 0 && stmt_index == i)
3767 i = stmt_num - 1;
3769 if (dump_file && (dump_flags & TDF_DETAILS))
3771 fprintf (dump_file, "Transforming ");
3772 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3775 /* We keep original statement only for the last one. All
3776 others are recreated. */
3777 if (i == stmt_num - 1)
3779 gimple_assign_set_rhs1 (stmts[i], op1);
3780 gimple_assign_set_rhs2 (stmts[i], op2);
3781 update_stmt (stmts[i]);
3783 else
3784 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3786 if (dump_file && (dump_flags & TDF_DETAILS))
3788 fprintf (dump_file, " into ");
3789 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3793 remove_visited_stmt_chain (last_rhs1);
3796 /* Transform STMT, which is really (A +B) + (C + D) into the left
3797 linear form, ((A+B)+C)+D.
3798 Recurse on D if necessary. */
3800 static void
3801 linearize_expr (gimple stmt)
3803 gimple_stmt_iterator gsi;
3804 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3805 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3806 gimple oldbinrhs = binrhs;
3807 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3808 gimple newbinrhs = NULL;
3809 struct loop *loop = loop_containing_stmt (stmt);
3810 tree lhs = gimple_assign_lhs (stmt);
3812 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3813 && is_reassociable_op (binrhs, rhscode, loop));
3815 gsi = gsi_for_stmt (stmt);
3817 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3818 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3819 gimple_assign_rhs_code (binrhs),
3820 gimple_assign_lhs (binlhs),
3821 gimple_assign_rhs2 (binrhs));
3822 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3823 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3824 gimple_set_uid (binrhs, gimple_uid (stmt));
3826 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3827 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3829 if (dump_file && (dump_flags & TDF_DETAILS))
3831 fprintf (dump_file, "Linearized: ");
3832 print_gimple_stmt (dump_file, stmt, 0, 0);
3835 reassociate_stats.linearized++;
3836 update_stmt (stmt);
3838 gsi = gsi_for_stmt (oldbinrhs);
3839 reassoc_remove_stmt (&gsi);
3840 release_defs (oldbinrhs);
3842 gimple_set_visited (stmt, true);
3843 gimple_set_visited (binlhs, true);
3844 gimple_set_visited (binrhs, true);
3846 /* Tail recurse on the new rhs if it still needs reassociation. */
3847 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3848 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3849 want to change the algorithm while converting to tuples. */
3850 linearize_expr (stmt);
3853 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3854 it. Otherwise, return NULL. */
3856 static gimple
3857 get_single_immediate_use (tree lhs)
3859 use_operand_p immuse;
3860 gimple immusestmt;
3862 if (TREE_CODE (lhs) == SSA_NAME
3863 && single_imm_use (lhs, &immuse, &immusestmt)
3864 && is_gimple_assign (immusestmt))
3865 return immusestmt;
3867 return NULL;
3870 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3871 representing the negated value. Insertions of any necessary
3872 instructions go before GSI.
3873 This function is recursive in that, if you hand it "a_5" as the
3874 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3875 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3877 static tree
3878 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3880 gimple negatedefstmt = NULL;
3881 tree resultofnegate;
3882 gimple_stmt_iterator gsi;
3883 unsigned int uid;
3885 /* If we are trying to negate a name, defined by an add, negate the
3886 add operands instead. */
3887 if (TREE_CODE (tonegate) == SSA_NAME)
3888 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3889 if (TREE_CODE (tonegate) == SSA_NAME
3890 && is_gimple_assign (negatedefstmt)
3891 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3892 && has_single_use (gimple_assign_lhs (negatedefstmt))
3893 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3895 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3896 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3897 tree lhs = gimple_assign_lhs (negatedefstmt);
3898 gimple g;
3900 gsi = gsi_for_stmt (negatedefstmt);
3901 rhs1 = negate_value (rhs1, &gsi);
3903 gsi = gsi_for_stmt (negatedefstmt);
3904 rhs2 = negate_value (rhs2, &gsi);
3906 gsi = gsi_for_stmt (negatedefstmt);
3907 lhs = make_ssa_name (TREE_TYPE (lhs));
3908 gimple_set_visited (negatedefstmt, true);
3909 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3910 gimple_set_uid (g, gimple_uid (negatedefstmt));
3911 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3912 return lhs;
3915 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3916 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3917 NULL_TREE, true, GSI_SAME_STMT);
3918 gsi = *gsip;
3919 uid = gimple_uid (gsi_stmt (gsi));
3920 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3922 gimple stmt = gsi_stmt (gsi);
3923 if (gimple_uid (stmt) != 0)
3924 break;
3925 gimple_set_uid (stmt, uid);
3927 return resultofnegate;
3930 /* Return true if we should break up the subtract in STMT into an add
3931 with negate. This is true when we the subtract operands are really
3932 adds, or the subtract itself is used in an add expression. In
3933 either case, breaking up the subtract into an add with negate
3934 exposes the adds to reassociation. */
3936 static bool
3937 should_break_up_subtract (gimple stmt)
3939 tree lhs = gimple_assign_lhs (stmt);
3940 tree binlhs = gimple_assign_rhs1 (stmt);
3941 tree binrhs = gimple_assign_rhs2 (stmt);
3942 gimple immusestmt;
3943 struct loop *loop = loop_containing_stmt (stmt);
3945 if (TREE_CODE (binlhs) == SSA_NAME
3946 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3947 return true;
3949 if (TREE_CODE (binrhs) == SSA_NAME
3950 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3951 return true;
3953 if (TREE_CODE (lhs) == SSA_NAME
3954 && (immusestmt = get_single_immediate_use (lhs))
3955 && is_gimple_assign (immusestmt)
3956 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3957 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3958 return true;
3959 return false;
3962 /* Transform STMT from A - B into A + -B. */
3964 static void
3965 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3967 tree rhs1 = gimple_assign_rhs1 (stmt);
3968 tree rhs2 = gimple_assign_rhs2 (stmt);
3970 if (dump_file && (dump_flags & TDF_DETAILS))
3972 fprintf (dump_file, "Breaking up subtract ");
3973 print_gimple_stmt (dump_file, stmt, 0, 0);
3976 rhs2 = negate_value (rhs2, gsip);
3977 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3978 update_stmt (stmt);
3981 /* Determine whether STMT is a builtin call that raises an SSA name
3982 to an integer power and has only one use. If so, and this is early
3983 reassociation and unsafe math optimizations are permitted, place
3984 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3985 If any of these conditions does not hold, return FALSE. */
3987 static bool
3988 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3990 tree fndecl, arg1;
3991 REAL_VALUE_TYPE c, cint;
3993 if (!first_pass_instance
3994 || !flag_unsafe_math_optimizations
3995 || !is_gimple_call (stmt)
3996 || !has_single_use (gimple_call_lhs (stmt)))
3997 return false;
3999 fndecl = gimple_call_fndecl (stmt);
4001 if (!fndecl
4002 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
4003 return false;
4005 switch (DECL_FUNCTION_CODE (fndecl))
4007 CASE_FLT_FN (BUILT_IN_POW):
4008 if (flag_errno_math)
4009 return false;
4011 *base = gimple_call_arg (stmt, 0);
4012 arg1 = gimple_call_arg (stmt, 1);
4014 if (TREE_CODE (arg1) != REAL_CST)
4015 return false;
4017 c = TREE_REAL_CST (arg1);
4019 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4020 return false;
4022 *exponent = real_to_integer (&c);
4023 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4024 if (!real_identical (&c, &cint))
4025 return false;
4027 break;
4029 CASE_FLT_FN (BUILT_IN_POWI):
4030 *base = gimple_call_arg (stmt, 0);
4031 arg1 = gimple_call_arg (stmt, 1);
4033 if (!tree_fits_shwi_p (arg1))
4034 return false;
4036 *exponent = tree_to_shwi (arg1);
4037 break;
4039 default:
4040 return false;
4043 /* Expanding negative exponents is generally unproductive, so we don't
4044 complicate matters with those. Exponents of zero and one should
4045 have been handled by expression folding. */
4046 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4047 return false;
4049 return true;
4052 /* Recursively linearize a binary expression that is the RHS of STMT.
4053 Place the operands of the expression tree in the vector named OPS. */
4055 static void
4056 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4057 bool is_associative, bool set_visited)
4059 tree binlhs = gimple_assign_rhs1 (stmt);
4060 tree binrhs = gimple_assign_rhs2 (stmt);
4061 gimple binlhsdef = NULL, binrhsdef = NULL;
4062 bool binlhsisreassoc = false;
4063 bool binrhsisreassoc = false;
4064 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4065 struct loop *loop = loop_containing_stmt (stmt);
4066 tree base = NULL_TREE;
4067 HOST_WIDE_INT exponent = 0;
4069 if (set_visited)
4070 gimple_set_visited (stmt, true);
4072 if (TREE_CODE (binlhs) == SSA_NAME)
4074 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4075 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4076 && !stmt_could_throw_p (binlhsdef));
4079 if (TREE_CODE (binrhs) == SSA_NAME)
4081 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4082 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4083 && !stmt_could_throw_p (binrhsdef));
4086 /* If the LHS is not reassociable, but the RHS is, we need to swap
4087 them. If neither is reassociable, there is nothing we can do, so
4088 just put them in the ops vector. If the LHS is reassociable,
4089 linearize it. If both are reassociable, then linearize the RHS
4090 and the LHS. */
4092 if (!binlhsisreassoc)
4094 tree temp;
4096 /* If this is not a associative operation like division, give up. */
4097 if (!is_associative)
4099 add_to_ops_vec (ops, binrhs);
4100 return;
4103 if (!binrhsisreassoc)
4105 if (rhscode == MULT_EXPR
4106 && TREE_CODE (binrhs) == SSA_NAME
4107 && acceptable_pow_call (binrhsdef, &base, &exponent))
4109 add_repeat_to_ops_vec (ops, base, exponent);
4110 gimple_set_visited (binrhsdef, true);
4112 else
4113 add_to_ops_vec (ops, binrhs);
4115 if (rhscode == MULT_EXPR
4116 && TREE_CODE (binlhs) == SSA_NAME
4117 && acceptable_pow_call (binlhsdef, &base, &exponent))
4119 add_repeat_to_ops_vec (ops, base, exponent);
4120 gimple_set_visited (binlhsdef, true);
4122 else
4123 add_to_ops_vec (ops, binlhs);
4125 return;
4128 if (dump_file && (dump_flags & TDF_DETAILS))
4130 fprintf (dump_file, "swapping operands of ");
4131 print_gimple_stmt (dump_file, stmt, 0, 0);
4134 swap_ssa_operands (stmt,
4135 gimple_assign_rhs1_ptr (stmt),
4136 gimple_assign_rhs2_ptr (stmt));
4137 update_stmt (stmt);
4139 if (dump_file && (dump_flags & TDF_DETAILS))
4141 fprintf (dump_file, " is now ");
4142 print_gimple_stmt (dump_file, stmt, 0, 0);
4145 /* We want to make it so the lhs is always the reassociative op,
4146 so swap. */
4147 temp = binlhs;
4148 binlhs = binrhs;
4149 binrhs = temp;
4151 else if (binrhsisreassoc)
4153 linearize_expr (stmt);
4154 binlhs = gimple_assign_rhs1 (stmt);
4155 binrhs = gimple_assign_rhs2 (stmt);
4158 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4159 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4160 rhscode, loop));
4161 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4162 is_associative, set_visited);
4164 if (rhscode == MULT_EXPR
4165 && TREE_CODE (binrhs) == SSA_NAME
4166 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4168 add_repeat_to_ops_vec (ops, base, exponent);
4169 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4171 else
4172 add_to_ops_vec (ops, binrhs);
4175 /* Repropagate the negates back into subtracts, since no other pass
4176 currently does it. */
4178 static void
4179 repropagate_negates (void)
4181 unsigned int i = 0;
4182 tree negate;
4184 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4186 gimple user = get_single_immediate_use (negate);
4188 if (!user || !is_gimple_assign (user))
4189 continue;
4191 /* The negate operand can be either operand of a PLUS_EXPR
4192 (it can be the LHS if the RHS is a constant for example).
4194 Force the negate operand to the RHS of the PLUS_EXPR, then
4195 transform the PLUS_EXPR into a MINUS_EXPR. */
4196 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4198 /* If the negated operand appears on the LHS of the
4199 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4200 to force the negated operand to the RHS of the PLUS_EXPR. */
4201 if (gimple_assign_rhs1 (user) == negate)
4203 swap_ssa_operands (user,
4204 gimple_assign_rhs1_ptr (user),
4205 gimple_assign_rhs2_ptr (user));
4208 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4209 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4210 if (gimple_assign_rhs2 (user) == negate)
4212 tree rhs1 = gimple_assign_rhs1 (user);
4213 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4214 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4215 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4216 update_stmt (user);
4219 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4221 if (gimple_assign_rhs1 (user) == negate)
4223 /* We have
4224 x = -a
4225 y = x - b
4226 which we transform into
4227 x = a + b
4228 y = -x .
4229 This pushes down the negate which we possibly can merge
4230 into some other operation, hence insert it into the
4231 plus_negates vector. */
4232 gimple feed = SSA_NAME_DEF_STMT (negate);
4233 tree a = gimple_assign_rhs1 (feed);
4234 tree b = gimple_assign_rhs2 (user);
4235 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4236 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4237 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4238 gimple g = gimple_build_assign (x, PLUS_EXPR, a, b);
4239 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4240 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4241 user = gsi_stmt (gsi2);
4242 update_stmt (user);
4243 reassoc_remove_stmt (&gsi);
4244 release_defs (feed);
4245 plus_negates.safe_push (gimple_assign_lhs (user));
4247 else
4249 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4250 rid of one operation. */
4251 gimple feed = SSA_NAME_DEF_STMT (negate);
4252 tree a = gimple_assign_rhs1 (feed);
4253 tree rhs1 = gimple_assign_rhs1 (user);
4254 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4255 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4256 update_stmt (gsi_stmt (gsi));
4262 /* Returns true if OP is of a type for which we can do reassociation.
4263 That is for integral or non-saturating fixed-point types, and for
4264 floating point type when associative-math is enabled. */
4266 static bool
4267 can_reassociate_p (tree op)
4269 tree type = TREE_TYPE (op);
4270 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4271 || NON_SAT_FIXED_POINT_TYPE_P (type)
4272 || (flag_associative_math && FLOAT_TYPE_P (type)))
4273 return true;
4274 return false;
4277 /* Break up subtract operations in block BB.
4279 We do this top down because we don't know whether the subtract is
4280 part of a possible chain of reassociation except at the top.
4282 IE given
4283 d = f + g
4284 c = a + e
4285 b = c - d
4286 q = b - r
4287 k = t - q
4289 we want to break up k = t - q, but we won't until we've transformed q
4290 = b - r, which won't be broken up until we transform b = c - d.
4292 En passant, clear the GIMPLE visited flag on every statement
4293 and set UIDs within each basic block. */
4295 static void
4296 break_up_subtract_bb (basic_block bb)
4298 gimple_stmt_iterator gsi;
4299 basic_block son;
4300 unsigned int uid = 1;
4302 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4304 gimple stmt = gsi_stmt (gsi);
4305 gimple_set_visited (stmt, false);
4306 gimple_set_uid (stmt, uid++);
4308 if (!is_gimple_assign (stmt)
4309 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4310 continue;
4312 /* Look for simple gimple subtract operations. */
4313 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4315 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4316 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4317 continue;
4319 /* Check for a subtract used only in an addition. If this
4320 is the case, transform it into add of a negate for better
4321 reassociation. IE transform C = A-B into C = A + -B if C
4322 is only used in an addition. */
4323 if (should_break_up_subtract (stmt))
4324 break_up_subtract (stmt, &gsi);
4326 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4327 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4328 plus_negates.safe_push (gimple_assign_lhs (stmt));
4330 for (son = first_dom_son (CDI_DOMINATORS, bb);
4331 son;
4332 son = next_dom_son (CDI_DOMINATORS, son))
4333 break_up_subtract_bb (son);
4336 /* Used for repeated factor analysis. */
4337 struct repeat_factor_d
4339 /* An SSA name that occurs in a multiply chain. */
4340 tree factor;
4342 /* Cached rank of the factor. */
4343 unsigned rank;
4345 /* Number of occurrences of the factor in the chain. */
4346 HOST_WIDE_INT count;
4348 /* An SSA name representing the product of this factor and
4349 all factors appearing later in the repeated factor vector. */
4350 tree repr;
4353 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4354 typedef const struct repeat_factor_d *const_repeat_factor_t;
4357 static vec<repeat_factor> repeat_factor_vec;
4359 /* Used for sorting the repeat factor vector. Sort primarily by
4360 ascending occurrence count, secondarily by descending rank. */
4362 static int
4363 compare_repeat_factors (const void *x1, const void *x2)
4365 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4366 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4368 if (rf1->count != rf2->count)
4369 return rf1->count - rf2->count;
4371 return rf2->rank - rf1->rank;
4374 /* Look for repeated operands in OPS in the multiply tree rooted at
4375 STMT. Replace them with an optimal sequence of multiplies and powi
4376 builtin calls, and remove the used operands from OPS. Return an
4377 SSA name representing the value of the replacement sequence. */
4379 static tree
4380 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4382 unsigned i, j, vec_len;
4383 int ii;
4384 operand_entry_t oe;
4385 repeat_factor_t rf1, rf2;
4386 repeat_factor rfnew;
4387 tree result = NULL_TREE;
4388 tree target_ssa, iter_result;
4389 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4390 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4391 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4392 gimple mul_stmt, pow_stmt;
4394 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4395 target. */
4396 if (!powi_fndecl)
4397 return NULL_TREE;
4399 /* Allocate the repeated factor vector. */
4400 repeat_factor_vec.create (10);
4402 /* Scan the OPS vector for all SSA names in the product and build
4403 up a vector of occurrence counts for each factor. */
4404 FOR_EACH_VEC_ELT (*ops, i, oe)
4406 if (TREE_CODE (oe->op) == SSA_NAME)
4408 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4410 if (rf1->factor == oe->op)
4412 rf1->count += oe->count;
4413 break;
4417 if (j >= repeat_factor_vec.length ())
4419 rfnew.factor = oe->op;
4420 rfnew.rank = oe->rank;
4421 rfnew.count = oe->count;
4422 rfnew.repr = NULL_TREE;
4423 repeat_factor_vec.safe_push (rfnew);
4428 /* Sort the repeated factor vector by (a) increasing occurrence count,
4429 and (b) decreasing rank. */
4430 repeat_factor_vec.qsort (compare_repeat_factors);
4432 /* It is generally best to combine as many base factors as possible
4433 into a product before applying __builtin_powi to the result.
4434 However, the sort order chosen for the repeated factor vector
4435 allows us to cache partial results for the product of the base
4436 factors for subsequent use. When we already have a cached partial
4437 result from a previous iteration, it is best to make use of it
4438 before looking for another __builtin_pow opportunity.
4440 As an example, consider x * x * y * y * y * z * z * z * z.
4441 We want to first compose the product x * y * z, raise it to the
4442 second power, then multiply this by y * z, and finally multiply
4443 by z. This can be done in 5 multiplies provided we cache y * z
4444 for use in both expressions:
4446 t1 = y * z
4447 t2 = t1 * x
4448 t3 = t2 * t2
4449 t4 = t1 * t3
4450 result = t4 * z
4452 If we instead ignored the cached y * z and first multiplied by
4453 the __builtin_pow opportunity z * z, we would get the inferior:
4455 t1 = y * z
4456 t2 = t1 * x
4457 t3 = t2 * t2
4458 t4 = z * z
4459 t5 = t3 * t4
4460 result = t5 * y */
4462 vec_len = repeat_factor_vec.length ();
4464 /* Repeatedly look for opportunities to create a builtin_powi call. */
4465 while (true)
4467 HOST_WIDE_INT power;
4469 /* First look for the largest cached product of factors from
4470 preceding iterations. If found, create a builtin_powi for
4471 it if the minimum occurrence count for its factors is at
4472 least 2, or just use this cached product as our next
4473 multiplicand if the minimum occurrence count is 1. */
4474 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4476 if (rf1->repr && rf1->count > 0)
4477 break;
4480 if (j < vec_len)
4482 power = rf1->count;
4484 if (power == 1)
4486 iter_result = rf1->repr;
4488 if (dump_file && (dump_flags & TDF_DETAILS))
4490 unsigned elt;
4491 repeat_factor_t rf;
4492 fputs ("Multiplying by cached product ", dump_file);
4493 for (elt = j; elt < vec_len; elt++)
4495 rf = &repeat_factor_vec[elt];
4496 print_generic_expr (dump_file, rf->factor, 0);
4497 if (elt < vec_len - 1)
4498 fputs (" * ", dump_file);
4500 fputs ("\n", dump_file);
4503 else
4505 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4506 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4507 build_int_cst (integer_type_node,
4508 power));
4509 gimple_call_set_lhs (pow_stmt, iter_result);
4510 gimple_set_location (pow_stmt, gimple_location (stmt));
4511 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4513 if (dump_file && (dump_flags & TDF_DETAILS))
4515 unsigned elt;
4516 repeat_factor_t rf;
4517 fputs ("Building __builtin_pow call for cached product (",
4518 dump_file);
4519 for (elt = j; elt < vec_len; elt++)
4521 rf = &repeat_factor_vec[elt];
4522 print_generic_expr (dump_file, rf->factor, 0);
4523 if (elt < vec_len - 1)
4524 fputs (" * ", dump_file);
4526 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4527 power);
4531 else
4533 /* Otherwise, find the first factor in the repeated factor
4534 vector whose occurrence count is at least 2. If no such
4535 factor exists, there are no builtin_powi opportunities
4536 remaining. */
4537 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4539 if (rf1->count >= 2)
4540 break;
4543 if (j >= vec_len)
4544 break;
4546 power = rf1->count;
4548 if (dump_file && (dump_flags & TDF_DETAILS))
4550 unsigned elt;
4551 repeat_factor_t rf;
4552 fputs ("Building __builtin_pow call for (", dump_file);
4553 for (elt = j; elt < vec_len; elt++)
4555 rf = &repeat_factor_vec[elt];
4556 print_generic_expr (dump_file, rf->factor, 0);
4557 if (elt < vec_len - 1)
4558 fputs (" * ", dump_file);
4560 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4563 reassociate_stats.pows_created++;
4565 /* Visit each element of the vector in reverse order (so that
4566 high-occurrence elements are visited first, and within the
4567 same occurrence count, lower-ranked elements are visited
4568 first). Form a linear product of all elements in this order
4569 whose occurrencce count is at least that of element J.
4570 Record the SSA name representing the product of each element
4571 with all subsequent elements in the vector. */
4572 if (j == vec_len - 1)
4573 rf1->repr = rf1->factor;
4574 else
4576 for (ii = vec_len - 2; ii >= (int)j; ii--)
4578 tree op1, op2;
4580 rf1 = &repeat_factor_vec[ii];
4581 rf2 = &repeat_factor_vec[ii + 1];
4583 /* Init the last factor's representative to be itself. */
4584 if (!rf2->repr)
4585 rf2->repr = rf2->factor;
4587 op1 = rf1->factor;
4588 op2 = rf2->repr;
4590 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4591 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4592 op1, op2);
4593 gimple_set_location (mul_stmt, gimple_location (stmt));
4594 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4595 rf1->repr = target_ssa;
4597 /* Don't reprocess the multiply we just introduced. */
4598 gimple_set_visited (mul_stmt, true);
4602 /* Form a call to __builtin_powi for the maximum product
4603 just formed, raised to the power obtained earlier. */
4604 rf1 = &repeat_factor_vec[j];
4605 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4606 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4607 build_int_cst (integer_type_node,
4608 power));
4609 gimple_call_set_lhs (pow_stmt, iter_result);
4610 gimple_set_location (pow_stmt, gimple_location (stmt));
4611 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4614 /* If we previously formed at least one other builtin_powi call,
4615 form the product of this one and those others. */
4616 if (result)
4618 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4619 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4620 result, iter_result);
4621 gimple_set_location (mul_stmt, gimple_location (stmt));
4622 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4623 gimple_set_visited (mul_stmt, true);
4624 result = new_result;
4626 else
4627 result = iter_result;
4629 /* Decrement the occurrence count of each element in the product
4630 by the count found above, and remove this many copies of each
4631 factor from OPS. */
4632 for (i = j; i < vec_len; i++)
4634 unsigned k = power;
4635 unsigned n;
4637 rf1 = &repeat_factor_vec[i];
4638 rf1->count -= power;
4640 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4642 if (oe->op == rf1->factor)
4644 if (oe->count <= k)
4646 ops->ordered_remove (n);
4647 k -= oe->count;
4649 if (k == 0)
4650 break;
4652 else
4654 oe->count -= k;
4655 break;
4662 /* At this point all elements in the repeated factor vector have a
4663 remaining occurrence count of 0 or 1, and those with a count of 1
4664 don't have cached representatives. Re-sort the ops vector and
4665 clean up. */
4666 ops->qsort (sort_by_operand_rank);
4667 repeat_factor_vec.release ();
4669 /* Return the final product computed herein. Note that there may
4670 still be some elements with single occurrence count left in OPS;
4671 those will be handled by the normal reassociation logic. */
4672 return result;
4675 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4677 static void
4678 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4680 tree rhs1;
4682 if (dump_file && (dump_flags & TDF_DETAILS))
4684 fprintf (dump_file, "Transforming ");
4685 print_gimple_stmt (dump_file, stmt, 0, 0);
4688 rhs1 = gimple_assign_rhs1 (stmt);
4689 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4690 update_stmt (stmt);
4691 remove_visited_stmt_chain (rhs1);
4693 if (dump_file && (dump_flags & TDF_DETAILS))
4695 fprintf (dump_file, " into ");
4696 print_gimple_stmt (dump_file, stmt, 0, 0);
4700 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4702 static void
4703 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4704 tree rhs1, tree rhs2)
4706 if (dump_file && (dump_flags & TDF_DETAILS))
4708 fprintf (dump_file, "Transforming ");
4709 print_gimple_stmt (dump_file, stmt, 0, 0);
4712 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4713 update_stmt (gsi_stmt (*gsi));
4714 remove_visited_stmt_chain (rhs1);
4716 if (dump_file && (dump_flags & TDF_DETAILS))
4718 fprintf (dump_file, " into ");
4719 print_gimple_stmt (dump_file, stmt, 0, 0);
4723 /* Reassociate expressions in basic block BB and its post-dominator as
4724 children. */
4726 static void
4727 reassociate_bb (basic_block bb)
4729 gimple_stmt_iterator gsi;
4730 basic_block son;
4731 gimple stmt = last_stmt (bb);
4733 if (stmt && !gimple_visited_p (stmt))
4734 maybe_optimize_range_tests (stmt);
4736 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4738 stmt = gsi_stmt (gsi);
4740 if (is_gimple_assign (stmt)
4741 && !stmt_could_throw_p (stmt))
4743 tree lhs, rhs1, rhs2;
4744 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4746 /* If this is not a gimple binary expression, there is
4747 nothing for us to do with it. */
4748 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4749 continue;
4751 /* If this was part of an already processed statement,
4752 we don't need to touch it again. */
4753 if (gimple_visited_p (stmt))
4755 /* This statement might have become dead because of previous
4756 reassociations. */
4757 if (has_zero_uses (gimple_get_lhs (stmt)))
4759 reassoc_remove_stmt (&gsi);
4760 release_defs (stmt);
4761 /* We might end up removing the last stmt above which
4762 places the iterator to the end of the sequence.
4763 Reset it to the last stmt in this case which might
4764 be the end of the sequence as well if we removed
4765 the last statement of the sequence. In which case
4766 we need to bail out. */
4767 if (gsi_end_p (gsi))
4769 gsi = gsi_last_bb (bb);
4770 if (gsi_end_p (gsi))
4771 break;
4774 continue;
4777 lhs = gimple_assign_lhs (stmt);
4778 rhs1 = gimple_assign_rhs1 (stmt);
4779 rhs2 = gimple_assign_rhs2 (stmt);
4781 /* For non-bit or min/max operations we can't associate
4782 all types. Verify that here. */
4783 if (rhs_code != BIT_IOR_EXPR
4784 && rhs_code != BIT_AND_EXPR
4785 && rhs_code != BIT_XOR_EXPR
4786 && rhs_code != MIN_EXPR
4787 && rhs_code != MAX_EXPR
4788 && (!can_reassociate_p (lhs)
4789 || !can_reassociate_p (rhs1)
4790 || !can_reassociate_p (rhs2)))
4791 continue;
4793 if (associative_tree_code (rhs_code))
4795 auto_vec<operand_entry_t> ops;
4796 tree powi_result = NULL_TREE;
4798 /* There may be no immediate uses left by the time we
4799 get here because we may have eliminated them all. */
4800 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4801 continue;
4803 gimple_set_visited (stmt, true);
4804 linearize_expr_tree (&ops, stmt, true, true);
4805 ops.qsort (sort_by_operand_rank);
4806 optimize_ops_list (rhs_code, &ops);
4807 if (undistribute_ops_list (rhs_code, &ops,
4808 loop_containing_stmt (stmt)))
4810 ops.qsort (sort_by_operand_rank);
4811 optimize_ops_list (rhs_code, &ops);
4814 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4815 optimize_range_tests (rhs_code, &ops);
4817 if (first_pass_instance
4818 && rhs_code == MULT_EXPR
4819 && flag_unsafe_math_optimizations)
4820 powi_result = attempt_builtin_powi (stmt, &ops);
4822 /* If the operand vector is now empty, all operands were
4823 consumed by the __builtin_powi optimization. */
4824 if (ops.length () == 0)
4825 transform_stmt_to_copy (&gsi, stmt, powi_result);
4826 else if (ops.length () == 1)
4828 tree last_op = ops.last ()->op;
4830 if (powi_result)
4831 transform_stmt_to_multiply (&gsi, stmt, last_op,
4832 powi_result);
4833 else
4834 transform_stmt_to_copy (&gsi, stmt, last_op);
4836 else
4838 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4839 int ops_num = ops.length ();
4840 int width = get_reassociation_width (ops_num, rhs_code, mode);
4841 tree new_lhs = lhs;
4843 if (dump_file && (dump_flags & TDF_DETAILS))
4844 fprintf (dump_file,
4845 "Width = %d was chosen for reassociation\n", width);
4847 if (width > 1
4848 && ops.length () > 3)
4849 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4850 width, ops);
4851 else
4853 /* When there are three operands left, we want
4854 to make sure the ones that get the double
4855 binary op are chosen wisely. */
4856 int len = ops.length ();
4857 if (len >= 3)
4858 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4860 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4861 powi_result != NULL);
4864 /* If we combined some repeated factors into a
4865 __builtin_powi call, multiply that result by the
4866 reassociated operands. */
4867 if (powi_result)
4869 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4870 tree type = TREE_TYPE (lhs);
4871 tree target_ssa = make_temp_ssa_name (type, NULL,
4872 "reassocpow");
4873 gimple_set_lhs (lhs_stmt, target_ssa);
4874 update_stmt (lhs_stmt);
4875 if (lhs != new_lhs)
4876 target_ssa = new_lhs;
4877 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4878 powi_result, target_ssa);
4879 gimple_set_location (mul_stmt, gimple_location (stmt));
4880 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4886 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4887 son;
4888 son = next_dom_son (CDI_POST_DOMINATORS, son))
4889 reassociate_bb (son);
4892 /* Add jumps around shifts for range tests turned into bit tests.
4893 For each SSA_NAME VAR we have code like:
4894 VAR = ...; // final stmt of range comparison
4895 // bit test here...;
4896 OTHERVAR = ...; // final stmt of the bit test sequence
4897 RES = VAR | OTHERVAR;
4898 Turn the above into:
4899 VAR = ...;
4900 if (VAR != 0)
4901 goto <l3>;
4902 else
4903 goto <l2>;
4904 <l2>:
4905 // bit test here...;
4906 OTHERVAR = ...;
4907 <l3>:
4908 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4910 static void
4911 branch_fixup (void)
4913 tree var;
4914 unsigned int i;
4916 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4918 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4919 gimple use_stmt;
4920 use_operand_p use;
4921 bool ok = single_imm_use (var, &use, &use_stmt);
4922 gcc_assert (ok
4923 && is_gimple_assign (use_stmt)
4924 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4925 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4927 basic_block cond_bb = gimple_bb (def_stmt);
4928 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4929 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4931 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4932 gimple g = gimple_build_cond (NE_EXPR, var,
4933 build_zero_cst (TREE_TYPE (var)),
4934 NULL_TREE, NULL_TREE);
4935 location_t loc = gimple_location (use_stmt);
4936 gimple_set_location (g, loc);
4937 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4939 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4940 etrue->probability = REG_BR_PROB_BASE / 2;
4941 etrue->count = cond_bb->count / 2;
4942 edge efalse = find_edge (cond_bb, then_bb);
4943 efalse->flags = EDGE_FALSE_VALUE;
4944 efalse->probability -= etrue->probability;
4945 efalse->count -= etrue->count;
4946 then_bb->count -= etrue->count;
4948 tree othervar = NULL_TREE;
4949 if (gimple_assign_rhs1 (use_stmt) == var)
4950 othervar = gimple_assign_rhs2 (use_stmt);
4951 else if (gimple_assign_rhs2 (use_stmt) == var)
4952 othervar = gimple_assign_rhs1 (use_stmt);
4953 else
4954 gcc_unreachable ();
4955 tree lhs = gimple_assign_lhs (use_stmt);
4956 gphi *phi = create_phi_node (lhs, merge_bb);
4957 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4958 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4959 gsi = gsi_for_stmt (use_stmt);
4960 gsi_remove (&gsi, true);
4962 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4963 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4965 reassoc_branch_fixups.release ();
4968 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4969 void debug_ops_vector (vec<operand_entry_t> ops);
4971 /* Dump the operand entry vector OPS to FILE. */
4973 void
4974 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4976 operand_entry_t oe;
4977 unsigned int i;
4979 FOR_EACH_VEC_ELT (ops, i, oe)
4981 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4982 print_generic_expr (file, oe->op, 0);
4986 /* Dump the operand entry vector OPS to STDERR. */
4988 DEBUG_FUNCTION void
4989 debug_ops_vector (vec<operand_entry_t> ops)
4991 dump_ops_vector (stderr, ops);
4994 static void
4995 do_reassoc (void)
4997 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4998 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5001 /* Initialize the reassociation pass. */
5003 static void
5004 init_reassoc (void)
5006 int i;
5007 long rank = 2;
5008 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5010 /* Find the loops, so that we can prevent moving calculations in
5011 them. */
5012 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5014 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5016 operand_entry_pool = create_alloc_pool ("operand entry pool",
5017 sizeof (struct operand_entry), 30);
5018 next_operand_entry_id = 0;
5020 /* Reverse RPO (Reverse Post Order) will give us something where
5021 deeper loops come later. */
5022 pre_and_rev_post_order_compute (NULL, bbs, false);
5023 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5024 operand_rank = new hash_map<tree, long>;
5026 /* Give each default definition a distinct rank. This includes
5027 parameters and the static chain. Walk backwards over all
5028 SSA names so that we get proper rank ordering according
5029 to tree_swap_operands_p. */
5030 for (i = num_ssa_names - 1; i > 0; --i)
5032 tree name = ssa_name (i);
5033 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5034 insert_operand_rank (name, ++rank);
5037 /* Set up rank for each BB */
5038 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5039 bb_rank[bbs[i]] = ++rank << 16;
5041 free (bbs);
5042 calculate_dominance_info (CDI_POST_DOMINATORS);
5043 plus_negates = vNULL;
5046 /* Cleanup after the reassociation pass, and print stats if
5047 requested. */
5049 static void
5050 fini_reassoc (void)
5052 statistics_counter_event (cfun, "Linearized",
5053 reassociate_stats.linearized);
5054 statistics_counter_event (cfun, "Constants eliminated",
5055 reassociate_stats.constants_eliminated);
5056 statistics_counter_event (cfun, "Ops eliminated",
5057 reassociate_stats.ops_eliminated);
5058 statistics_counter_event (cfun, "Statements rewritten",
5059 reassociate_stats.rewritten);
5060 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5061 reassociate_stats.pows_encountered);
5062 statistics_counter_event (cfun, "Built-in powi calls created",
5063 reassociate_stats.pows_created);
5065 delete operand_rank;
5066 free_alloc_pool (operand_entry_pool);
5067 free (bb_rank);
5068 plus_negates.release ();
5069 free_dominance_info (CDI_POST_DOMINATORS);
5070 loop_optimizer_finalize ();
5073 /* Gate and execute functions for Reassociation. */
5075 static unsigned int
5076 execute_reassoc (void)
5078 init_reassoc ();
5080 do_reassoc ();
5081 repropagate_negates ();
5082 branch_fixup ();
5084 fini_reassoc ();
5085 return 0;
5088 namespace {
5090 const pass_data pass_data_reassoc =
5092 GIMPLE_PASS, /* type */
5093 "reassoc", /* name */
5094 OPTGROUP_NONE, /* optinfo_flags */
5095 TV_TREE_REASSOC, /* tv_id */
5096 ( PROP_cfg | PROP_ssa ), /* properties_required */
5097 0, /* properties_provided */
5098 0, /* properties_destroyed */
5099 0, /* todo_flags_start */
5100 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5103 class pass_reassoc : public gimple_opt_pass
5105 public:
5106 pass_reassoc (gcc::context *ctxt)
5107 : gimple_opt_pass (pass_data_reassoc, ctxt)
5110 /* opt_pass methods: */
5111 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5112 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5113 virtual unsigned int execute (function *) { return execute_reassoc (); }
5115 }; // class pass_reassoc
5117 } // anon namespace
5119 gimple_opt_pass *
5120 make_pass_reassoc (gcc::context *ctxt)
5122 return new pass_reassoc (ctxt);