PR target/65871
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobae9d6318fb0e7ad6e2bb8206fdbf6d811f5f8199
1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "hash-set.h"
29 #include "machmode.h"
30 #include "vec.h"
31 #include "double-int.h"
32 #include "input.h"
33 #include "alias.h"
34 #include "symtab.h"
35 #include "wide-int.h"
36 #include "inchash.h"
37 #include "tree.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "function.h"
43 #include "dominance.h"
44 #include "cfg.h"
45 #include "cfganal.h"
46 #include "basic-block.h"
47 #include "gimple-pretty-print.h"
48 #include "tree-inline.h"
49 #include "hash-map.h"
50 #include "tree-ssa-alias.h"
51 #include "internal-fn.h"
52 #include "gimple-fold.h"
53 #include "tree-eh.h"
54 #include "gimple-expr.h"
55 #include "is-a.h"
56 #include "gimple.h"
57 #include "gimple-iterator.h"
58 #include "gimplify-me.h"
59 #include "gimple-ssa.h"
60 #include "tree-cfg.h"
61 #include "tree-phinodes.h"
62 #include "ssa-iterators.h"
63 #include "stringpool.h"
64 #include "tree-ssanames.h"
65 #include "tree-ssa-loop-niter.h"
66 #include "tree-ssa-loop.h"
67 #include "hashtab.h"
68 #include "flags.h"
69 #include "statistics.h"
70 #include "real.h"
71 #include "fixed-value.h"
72 #include "insn-config.h"
73 #include "expmed.h"
74 #include "dojump.h"
75 #include "explow.h"
76 #include "calls.h"
77 #include "emit-rtl.h"
78 #include "varasm.h"
79 #include "stmt.h"
80 #include "expr.h"
81 #include "tree-dfa.h"
82 #include "tree-ssa.h"
83 #include "tree-iterator.h"
84 #include "tree-pass.h"
85 #include "alloc-pool.h"
86 #include "langhooks.h"
87 #include "cfgloop.h"
88 #include "target.h"
89 #include "params.h"
90 #include "diagnostic-core.h"
91 #include "builtins.h"
92 #include "gimplify.h"
93 #include "insn-codes.h"
94 #include "optabs.h"
96 /* This is a simple global reassociation pass. It is, in part, based
97 on the LLVM pass of the same name (They do some things more/less
98 than we do, in different orders, etc).
100 It consists of five steps:
102 1. Breaking up subtract operations into addition + negate, where
103 it would promote the reassociation of adds.
105 2. Left linearization of the expression trees, so that (A+B)+(C+D)
106 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
107 During linearization, we place the operands of the binary
108 expressions into a vector of operand_entry_t
110 3. Optimization of the operand lists, eliminating things like a +
111 -a, a & a, etc.
113 3a. Combine repeated factors with the same occurrence counts
114 into a __builtin_powi call that will later be optimized into
115 an optimal number of multiplies.
117 4. Rewrite the expression trees we linearized and optimized so
118 they are in proper rank order.
120 5. Repropagate negates, as nothing else will clean it up ATM.
122 A bit of theory on #4, since nobody seems to write anything down
123 about why it makes sense to do it the way they do it:
125 We could do this much nicer theoretically, but don't (for reasons
126 explained after how to do it theoretically nice :P).
128 In order to promote the most redundancy elimination, you want
129 binary expressions whose operands are the same rank (or
130 preferably, the same value) exposed to the redundancy eliminator,
131 for possible elimination.
133 So the way to do this if we really cared, is to build the new op
134 tree from the leaves to the roots, merging as you go, and putting the
135 new op on the end of the worklist, until you are left with one
136 thing on the worklist.
138 IE if you have to rewrite the following set of operands (listed with
139 rank in parentheses), with opcode PLUS_EXPR:
141 a (1), b (1), c (1), d (2), e (2)
144 We start with our merge worklist empty, and the ops list with all of
145 those on it.
147 You want to first merge all leaves of the same rank, as much as
148 possible.
150 So first build a binary op of
152 mergetmp = a + b, and put "mergetmp" on the merge worklist.
154 Because there is no three operand form of PLUS_EXPR, c is not going to
155 be exposed to redundancy elimination as a rank 1 operand.
157 So you might as well throw it on the merge worklist (you could also
158 consider it to now be a rank two operand, and merge it with d and e,
159 but in this case, you then have evicted e from a binary op. So at
160 least in this situation, you can't win.)
162 Then build a binary op of d + e
163 mergetmp2 = d + e
165 and put mergetmp2 on the merge worklist.
167 so merge worklist = {mergetmp, c, mergetmp2}
169 Continue building binary ops of these operations until you have only
170 one operation left on the worklist.
172 So we have
174 build binary op
175 mergetmp3 = mergetmp + c
177 worklist = {mergetmp2, mergetmp3}
179 mergetmp4 = mergetmp2 + mergetmp3
181 worklist = {mergetmp4}
183 because we have one operation left, we can now just set the original
184 statement equal to the result of that operation.
186 This will at least expose a + b and d + e to redundancy elimination
187 as binary operations.
189 For extra points, you can reuse the old statements to build the
190 mergetmps, since you shouldn't run out.
192 So why don't we do this?
194 Because it's expensive, and rarely will help. Most trees we are
195 reassociating have 3 or less ops. If they have 2 ops, they already
196 will be written into a nice single binary op. If you have 3 ops, a
197 single simple check suffices to tell you whether the first two are of the
198 same rank. If so, you know to order it
200 mergetmp = op1 + op2
201 newstmt = mergetmp + op3
203 instead of
204 mergetmp = op2 + op3
205 newstmt = mergetmp + op1
207 If all three are of the same rank, you can't expose them all in a
208 single binary operator anyway, so the above is *still* the best you
209 can do.
211 Thus, this is what we do. When we have three ops left, we check to see
212 what order to put them in, and call it a day. As a nod to vector sum
213 reduction, we check if any of the ops are really a phi node that is a
214 destructive update for the associating op, and keep the destructive
215 update together for vector sum reduction recognition. */
218 /* Statistics */
219 static struct
221 int linearized;
222 int constants_eliminated;
223 int ops_eliminated;
224 int rewritten;
225 int pows_encountered;
226 int pows_created;
227 } reassociate_stats;
229 /* Operator, rank pair. */
230 typedef struct operand_entry
232 unsigned int rank;
233 int id;
234 tree op;
235 unsigned int count;
236 } *operand_entry_t;
238 static alloc_pool operand_entry_pool;
240 /* This is used to assign a unique ID to each struct operand_entry
241 so that qsort results are identical on different hosts. */
242 static int next_operand_entry_id;
244 /* Starting rank number for a given basic block, so that we can rank
245 operations using unmovable instructions in that BB based on the bb
246 depth. */
247 static long *bb_rank;
249 /* Operand->rank hashtable. */
250 static hash_map<tree, long> *operand_rank;
252 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
253 all basic blocks the CFG should be adjusted - basic blocks
254 split right after that SSA_NAME's definition statement and before
255 the only use, which must be a bit ior. */
256 static vec<tree> reassoc_branch_fixups;
258 /* Forward decls. */
259 static long get_rank (tree);
260 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
262 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
263 possibly added by gsi_remove. */
265 bool
266 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
268 gimple stmt = gsi_stmt (*gsi);
270 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
271 return gsi_remove (gsi, true);
273 gimple_stmt_iterator prev = *gsi;
274 gsi_prev (&prev);
275 unsigned uid = gimple_uid (stmt);
276 basic_block bb = gimple_bb (stmt);
277 bool ret = gsi_remove (gsi, true);
278 if (!gsi_end_p (prev))
279 gsi_next (&prev);
280 else
281 prev = gsi_start_bb (bb);
282 gimple end_stmt = gsi_stmt (*gsi);
283 while ((stmt = gsi_stmt (prev)) != end_stmt)
285 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
286 gimple_set_uid (stmt, uid);
287 gsi_next (&prev);
289 return ret;
292 /* Bias amount for loop-carried phis. We want this to be larger than
293 the depth of any reassociation tree we can see, but not larger than
294 the rank difference between two blocks. */
295 #define PHI_LOOP_BIAS (1 << 15)
297 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
298 an innermost loop, and the phi has only a single use which is inside
299 the loop, then the rank is the block rank of the loop latch plus an
300 extra bias for the loop-carried dependence. This causes expressions
301 calculated into an accumulator variable to be independent for each
302 iteration of the loop. If STMT is some other phi, the rank is the
303 block rank of its containing block. */
304 static long
305 phi_rank (gimple stmt)
307 basic_block bb = gimple_bb (stmt);
308 struct loop *father = bb->loop_father;
309 tree res;
310 unsigned i;
311 use_operand_p use;
312 gimple use_stmt;
314 /* We only care about real loops (those with a latch). */
315 if (!father->latch)
316 return bb_rank[bb->index];
318 /* Interesting phis must be in headers of innermost loops. */
319 if (bb != father->header
320 || father->inner)
321 return bb_rank[bb->index];
323 /* Ignore virtual SSA_NAMEs. */
324 res = gimple_phi_result (stmt);
325 if (virtual_operand_p (res))
326 return bb_rank[bb->index];
328 /* The phi definition must have a single use, and that use must be
329 within the loop. Otherwise this isn't an accumulator pattern. */
330 if (!single_imm_use (res, &use, &use_stmt)
331 || gimple_bb (use_stmt)->loop_father != father)
332 return bb_rank[bb->index];
334 /* Look for phi arguments from within the loop. If found, bias this phi. */
335 for (i = 0; i < gimple_phi_num_args (stmt); i++)
337 tree arg = gimple_phi_arg_def (stmt, i);
338 if (TREE_CODE (arg) == SSA_NAME
339 && !SSA_NAME_IS_DEFAULT_DEF (arg))
341 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
342 if (gimple_bb (def_stmt)->loop_father == father)
343 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
347 /* Must be an uninteresting phi. */
348 return bb_rank[bb->index];
351 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
352 loop-carried dependence of an innermost loop, return TRUE; else
353 return FALSE. */
354 static bool
355 loop_carried_phi (tree exp)
357 gimple phi_stmt;
358 long block_rank;
360 if (TREE_CODE (exp) != SSA_NAME
361 || SSA_NAME_IS_DEFAULT_DEF (exp))
362 return false;
364 phi_stmt = SSA_NAME_DEF_STMT (exp);
366 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
367 return false;
369 /* Non-loop-carried phis have block rank. Loop-carried phis have
370 an additional bias added in. If this phi doesn't have block rank,
371 it's biased and should not be propagated. */
372 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
374 if (phi_rank (phi_stmt) != block_rank)
375 return true;
377 return false;
380 /* Return the maximum of RANK and the rank that should be propagated
381 from expression OP. For most operands, this is just the rank of OP.
382 For loop-carried phis, the value is zero to avoid undoing the bias
383 in favor of the phi. */
384 static long
385 propagate_rank (long rank, tree op)
387 long op_rank;
389 if (loop_carried_phi (op))
390 return rank;
392 op_rank = get_rank (op);
394 return MAX (rank, op_rank);
397 /* Look up the operand rank structure for expression E. */
399 static inline long
400 find_operand_rank (tree e)
402 long *slot = operand_rank->get (e);
403 return slot ? *slot : -1;
406 /* Insert {E,RANK} into the operand rank hashtable. */
408 static inline void
409 insert_operand_rank (tree e, long rank)
411 gcc_assert (rank > 0);
412 gcc_assert (!operand_rank->put (e, rank));
415 /* Given an expression E, return the rank of the expression. */
417 static long
418 get_rank (tree e)
420 /* Constants have rank 0. */
421 if (is_gimple_min_invariant (e))
422 return 0;
424 /* SSA_NAME's have the rank of the expression they are the result
426 For globals and uninitialized values, the rank is 0.
427 For function arguments, use the pre-setup rank.
428 For PHI nodes, stores, asm statements, etc, we use the rank of
429 the BB.
430 For simple operations, the rank is the maximum rank of any of
431 its operands, or the bb_rank, whichever is less.
432 I make no claims that this is optimal, however, it gives good
433 results. */
435 /* We make an exception to the normal ranking system to break
436 dependences of accumulator variables in loops. Suppose we
437 have a simple one-block loop containing:
439 x_1 = phi(x_0, x_2)
440 b = a + x_1
441 c = b + d
442 x_2 = c + e
444 As shown, each iteration of the calculation into x is fully
445 dependent upon the iteration before it. We would prefer to
446 see this in the form:
448 x_1 = phi(x_0, x_2)
449 b = a + d
450 c = b + e
451 x_2 = c + x_1
453 If the loop is unrolled, the calculations of b and c from
454 different iterations can be interleaved.
456 To obtain this result during reassociation, we bias the rank
457 of the phi definition x_1 upward, when it is recognized as an
458 accumulator pattern. The artificial rank causes it to be
459 added last, providing the desired independence. */
461 if (TREE_CODE (e) == SSA_NAME)
463 gimple stmt;
464 long rank;
465 int i, n;
466 tree op;
468 if (SSA_NAME_IS_DEFAULT_DEF (e))
469 return find_operand_rank (e);
471 stmt = SSA_NAME_DEF_STMT (e);
472 if (gimple_code (stmt) == GIMPLE_PHI)
473 return phi_rank (stmt);
475 if (!is_gimple_assign (stmt)
476 || gimple_vdef (stmt))
477 return bb_rank[gimple_bb (stmt)->index];
479 /* If we already have a rank for this expression, use that. */
480 rank = find_operand_rank (e);
481 if (rank != -1)
482 return rank;
484 /* Otherwise, find the maximum rank for the operands. As an
485 exception, remove the bias from loop-carried phis when propagating
486 the rank so that dependent operations are not also biased. */
487 rank = 0;
488 if (gimple_assign_single_p (stmt))
490 tree rhs = gimple_assign_rhs1 (stmt);
491 n = TREE_OPERAND_LENGTH (rhs);
492 if (n == 0)
493 rank = propagate_rank (rank, rhs);
494 else
496 for (i = 0; i < n; i++)
498 op = TREE_OPERAND (rhs, i);
500 if (op != NULL_TREE)
501 rank = propagate_rank (rank, op);
505 else
507 n = gimple_num_ops (stmt);
508 for (i = 1; i < n; i++)
510 op = gimple_op (stmt, i);
511 gcc_assert (op);
512 rank = propagate_rank (rank, op);
516 if (dump_file && (dump_flags & TDF_DETAILS))
518 fprintf (dump_file, "Rank for ");
519 print_generic_expr (dump_file, e, 0);
520 fprintf (dump_file, " is %ld\n", (rank + 1));
523 /* Note the rank in the hashtable so we don't recompute it. */
524 insert_operand_rank (e, (rank + 1));
525 return (rank + 1);
528 /* Globals, etc, are rank 0 */
529 return 0;
533 /* We want integer ones to end up last no matter what, since they are
534 the ones we can do the most with. */
535 #define INTEGER_CONST_TYPE 1 << 3
536 #define FLOAT_CONST_TYPE 1 << 2
537 #define OTHER_CONST_TYPE 1 << 1
539 /* Classify an invariant tree into integer, float, or other, so that
540 we can sort them to be near other constants of the same type. */
541 static inline int
542 constant_type (tree t)
544 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
545 return INTEGER_CONST_TYPE;
546 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
547 return FLOAT_CONST_TYPE;
548 else
549 return OTHER_CONST_TYPE;
552 /* qsort comparison function to sort operand entries PA and PB by rank
553 so that the sorted array is ordered by rank in decreasing order. */
554 static int
555 sort_by_operand_rank (const void *pa, const void *pb)
557 const operand_entry_t oea = *(const operand_entry_t *)pa;
558 const operand_entry_t oeb = *(const operand_entry_t *)pb;
560 /* It's nicer for optimize_expression if constants that are likely
561 to fold when added/multiplied//whatever are put next to each
562 other. Since all constants have rank 0, order them by type. */
563 if (oeb->rank == 0 && oea->rank == 0)
565 if (constant_type (oeb->op) != constant_type (oea->op))
566 return constant_type (oeb->op) - constant_type (oea->op);
567 else
568 /* To make sorting result stable, we use unique IDs to determine
569 order. */
570 return oeb->id - oea->id;
573 /* Lastly, make sure the versions that are the same go next to each
574 other. */
575 if ((oeb->rank - oea->rank == 0)
576 && TREE_CODE (oea->op) == SSA_NAME
577 && TREE_CODE (oeb->op) == SSA_NAME)
579 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
580 versions of removed SSA_NAMEs, so if possible, prefer to sort
581 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
582 See PR60418. */
583 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
584 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
585 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
587 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
588 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
589 basic_block bba = gimple_bb (stmta);
590 basic_block bbb = gimple_bb (stmtb);
591 if (bbb != bba)
593 if (bb_rank[bbb->index] != bb_rank[bba->index])
594 return bb_rank[bbb->index] - bb_rank[bba->index];
596 else
598 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
599 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
600 if (da != db)
601 return da ? 1 : -1;
605 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
606 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
607 else
608 return oeb->id - oea->id;
611 if (oeb->rank != oea->rank)
612 return oeb->rank - oea->rank;
613 else
614 return oeb->id - oea->id;
617 /* Add an operand entry to *OPS for the tree operand OP. */
619 static void
620 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
622 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
624 oe->op = op;
625 oe->rank = get_rank (op);
626 oe->id = next_operand_entry_id++;
627 oe->count = 1;
628 ops->safe_push (oe);
631 /* Add an operand entry to *OPS for the tree operand OP with repeat
632 count REPEAT. */
634 static void
635 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
636 HOST_WIDE_INT repeat)
638 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
640 oe->op = op;
641 oe->rank = get_rank (op);
642 oe->id = next_operand_entry_id++;
643 oe->count = repeat;
644 ops->safe_push (oe);
646 reassociate_stats.pows_encountered++;
649 /* Return true if STMT is reassociable operation containing a binary
650 operation with tree code CODE, and is inside LOOP. */
652 static bool
653 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
655 basic_block bb = gimple_bb (stmt);
657 if (gimple_bb (stmt) == NULL)
658 return false;
660 if (!flow_bb_inside_loop_p (loop, bb))
661 return false;
663 if (is_gimple_assign (stmt)
664 && gimple_assign_rhs_code (stmt) == code
665 && has_single_use (gimple_assign_lhs (stmt)))
666 return true;
668 return false;
672 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
673 operand of the negate operation. Otherwise, return NULL. */
675 static tree
676 get_unary_op (tree name, enum tree_code opcode)
678 gimple stmt = SSA_NAME_DEF_STMT (name);
680 if (!is_gimple_assign (stmt))
681 return NULL_TREE;
683 if (gimple_assign_rhs_code (stmt) == opcode)
684 return gimple_assign_rhs1 (stmt);
685 return NULL_TREE;
688 /* If CURR and LAST are a pair of ops that OPCODE allows us to
689 eliminate through equivalences, do so, remove them from OPS, and
690 return true. Otherwise, return false. */
692 static bool
693 eliminate_duplicate_pair (enum tree_code opcode,
694 vec<operand_entry_t> *ops,
695 bool *all_done,
696 unsigned int i,
697 operand_entry_t curr,
698 operand_entry_t last)
701 /* If we have two of the same op, and the opcode is & |, min, or max,
702 we can eliminate one of them.
703 If we have two of the same op, and the opcode is ^, we can
704 eliminate both of them. */
706 if (last && last->op == curr->op)
708 switch (opcode)
710 case MAX_EXPR:
711 case MIN_EXPR:
712 case BIT_IOR_EXPR:
713 case BIT_AND_EXPR:
714 if (dump_file && (dump_flags & TDF_DETAILS))
716 fprintf (dump_file, "Equivalence: ");
717 print_generic_expr (dump_file, curr->op, 0);
718 fprintf (dump_file, " [&|minmax] ");
719 print_generic_expr (dump_file, last->op, 0);
720 fprintf (dump_file, " -> ");
721 print_generic_stmt (dump_file, last->op, 0);
724 ops->ordered_remove (i);
725 reassociate_stats.ops_eliminated ++;
727 return true;
729 case BIT_XOR_EXPR:
730 if (dump_file && (dump_flags & TDF_DETAILS))
732 fprintf (dump_file, "Equivalence: ");
733 print_generic_expr (dump_file, curr->op, 0);
734 fprintf (dump_file, " ^ ");
735 print_generic_expr (dump_file, last->op, 0);
736 fprintf (dump_file, " -> nothing\n");
739 reassociate_stats.ops_eliminated += 2;
741 if (ops->length () == 2)
743 ops->create (0);
744 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
745 *all_done = true;
747 else
749 ops->ordered_remove (i-1);
750 ops->ordered_remove (i-1);
753 return true;
755 default:
756 break;
759 return false;
762 static vec<tree> plus_negates;
764 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
765 expression, look in OPS for a corresponding positive operation to cancel
766 it out. If we find one, remove the other from OPS, replace
767 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
768 return false. */
770 static bool
771 eliminate_plus_minus_pair (enum tree_code opcode,
772 vec<operand_entry_t> *ops,
773 unsigned int currindex,
774 operand_entry_t curr)
776 tree negateop;
777 tree notop;
778 unsigned int i;
779 operand_entry_t oe;
781 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
782 return false;
784 negateop = get_unary_op (curr->op, NEGATE_EXPR);
785 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
786 if (negateop == NULL_TREE && notop == NULL_TREE)
787 return false;
789 /* Any non-negated version will have a rank that is one less than
790 the current rank. So once we hit those ranks, if we don't find
791 one, we can stop. */
793 for (i = currindex + 1;
794 ops->iterate (i, &oe)
795 && oe->rank >= curr->rank - 1 ;
796 i++)
798 if (oe->op == negateop)
801 if (dump_file && (dump_flags & TDF_DETAILS))
803 fprintf (dump_file, "Equivalence: ");
804 print_generic_expr (dump_file, negateop, 0);
805 fprintf (dump_file, " + -");
806 print_generic_expr (dump_file, oe->op, 0);
807 fprintf (dump_file, " -> 0\n");
810 ops->ordered_remove (i);
811 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
812 ops->ordered_remove (currindex);
813 reassociate_stats.ops_eliminated ++;
815 return true;
817 else if (oe->op == notop)
819 tree op_type = TREE_TYPE (oe->op);
821 if (dump_file && (dump_flags & TDF_DETAILS))
823 fprintf (dump_file, "Equivalence: ");
824 print_generic_expr (dump_file, notop, 0);
825 fprintf (dump_file, " + ~");
826 print_generic_expr (dump_file, oe->op, 0);
827 fprintf (dump_file, " -> -1\n");
830 ops->ordered_remove (i);
831 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
832 ops->ordered_remove (currindex);
833 reassociate_stats.ops_eliminated ++;
835 return true;
839 /* CURR->OP is a negate expr in a plus expr: save it for later
840 inspection in repropagate_negates(). */
841 if (negateop != NULL_TREE)
842 plus_negates.safe_push (curr->op);
844 return false;
847 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
848 bitwise not expression, look in OPS for a corresponding operand to
849 cancel it out. If we find one, remove the other from OPS, replace
850 OPS[CURRINDEX] with 0, and return true. Otherwise, return
851 false. */
853 static bool
854 eliminate_not_pairs (enum tree_code opcode,
855 vec<operand_entry_t> *ops,
856 unsigned int currindex,
857 operand_entry_t curr)
859 tree notop;
860 unsigned int i;
861 operand_entry_t oe;
863 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
864 || TREE_CODE (curr->op) != SSA_NAME)
865 return false;
867 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
868 if (notop == NULL_TREE)
869 return false;
871 /* Any non-not version will have a rank that is one less than
872 the current rank. So once we hit those ranks, if we don't find
873 one, we can stop. */
875 for (i = currindex + 1;
876 ops->iterate (i, &oe)
877 && oe->rank >= curr->rank - 1;
878 i++)
880 if (oe->op == notop)
882 if (dump_file && (dump_flags & TDF_DETAILS))
884 fprintf (dump_file, "Equivalence: ");
885 print_generic_expr (dump_file, notop, 0);
886 if (opcode == BIT_AND_EXPR)
887 fprintf (dump_file, " & ~");
888 else if (opcode == BIT_IOR_EXPR)
889 fprintf (dump_file, " | ~");
890 print_generic_expr (dump_file, oe->op, 0);
891 if (opcode == BIT_AND_EXPR)
892 fprintf (dump_file, " -> 0\n");
893 else if (opcode == BIT_IOR_EXPR)
894 fprintf (dump_file, " -> -1\n");
897 if (opcode == BIT_AND_EXPR)
898 oe->op = build_zero_cst (TREE_TYPE (oe->op));
899 else if (opcode == BIT_IOR_EXPR)
900 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
902 reassociate_stats.ops_eliminated += ops->length () - 1;
903 ops->truncate (0);
904 ops->quick_push (oe);
905 return true;
909 return false;
912 /* Use constant value that may be present in OPS to try to eliminate
913 operands. Note that this function is only really used when we've
914 eliminated ops for other reasons, or merged constants. Across
915 single statements, fold already does all of this, plus more. There
916 is little point in duplicating logic, so I've only included the
917 identities that I could ever construct testcases to trigger. */
919 static void
920 eliminate_using_constants (enum tree_code opcode,
921 vec<operand_entry_t> *ops)
923 operand_entry_t oelast = ops->last ();
924 tree type = TREE_TYPE (oelast->op);
926 if (oelast->rank == 0
927 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
929 switch (opcode)
931 case BIT_AND_EXPR:
932 if (integer_zerop (oelast->op))
934 if (ops->length () != 1)
936 if (dump_file && (dump_flags & TDF_DETAILS))
937 fprintf (dump_file, "Found & 0, removing all other ops\n");
939 reassociate_stats.ops_eliminated += ops->length () - 1;
941 ops->truncate (0);
942 ops->quick_push (oelast);
943 return;
946 else if (integer_all_onesp (oelast->op))
948 if (ops->length () != 1)
950 if (dump_file && (dump_flags & TDF_DETAILS))
951 fprintf (dump_file, "Found & -1, removing\n");
952 ops->pop ();
953 reassociate_stats.ops_eliminated++;
956 break;
957 case BIT_IOR_EXPR:
958 if (integer_all_onesp (oelast->op))
960 if (ops->length () != 1)
962 if (dump_file && (dump_flags & TDF_DETAILS))
963 fprintf (dump_file, "Found | -1, removing all other ops\n");
965 reassociate_stats.ops_eliminated += ops->length () - 1;
967 ops->truncate (0);
968 ops->quick_push (oelast);
969 return;
972 else if (integer_zerop (oelast->op))
974 if (ops->length () != 1)
976 if (dump_file && (dump_flags & TDF_DETAILS))
977 fprintf (dump_file, "Found | 0, removing\n");
978 ops->pop ();
979 reassociate_stats.ops_eliminated++;
982 break;
983 case MULT_EXPR:
984 if (integer_zerop (oelast->op)
985 || (FLOAT_TYPE_P (type)
986 && !HONOR_NANS (type)
987 && !HONOR_SIGNED_ZEROS (type)
988 && real_zerop (oelast->op)))
990 if (ops->length () != 1)
992 if (dump_file && (dump_flags & TDF_DETAILS))
993 fprintf (dump_file, "Found * 0, removing all other ops\n");
995 reassociate_stats.ops_eliminated += ops->length () - 1;
996 ops->truncate (1);
997 ops->quick_push (oelast);
998 return;
1001 else if (integer_onep (oelast->op)
1002 || (FLOAT_TYPE_P (type)
1003 && !HONOR_SNANS (type)
1004 && real_onep (oelast->op)))
1006 if (ops->length () != 1)
1008 if (dump_file && (dump_flags & TDF_DETAILS))
1009 fprintf (dump_file, "Found * 1, removing\n");
1010 ops->pop ();
1011 reassociate_stats.ops_eliminated++;
1012 return;
1015 break;
1016 case BIT_XOR_EXPR:
1017 case PLUS_EXPR:
1018 case MINUS_EXPR:
1019 if (integer_zerop (oelast->op)
1020 || (FLOAT_TYPE_P (type)
1021 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1022 && fold_real_zero_addition_p (type, oelast->op,
1023 opcode == MINUS_EXPR)))
1025 if (ops->length () != 1)
1027 if (dump_file && (dump_flags & TDF_DETAILS))
1028 fprintf (dump_file, "Found [|^+] 0, removing\n");
1029 ops->pop ();
1030 reassociate_stats.ops_eliminated++;
1031 return;
1034 break;
1035 default:
1036 break;
1042 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1043 bool, bool);
1045 /* Structure for tracking and counting operands. */
1046 typedef struct oecount_s {
1047 int cnt;
1048 int id;
1049 enum tree_code oecode;
1050 tree op;
1051 } oecount;
1054 /* The heap for the oecount hashtable and the sorted list of operands. */
1055 static vec<oecount> cvec;
1058 /* Oecount hashtable helpers. */
1060 struct oecount_hasher
1062 typedef int value_type;
1063 typedef int compare_type;
1064 static inline hashval_t hash (const value_type &);
1065 static inline bool equal (const value_type &, const compare_type &);
1066 static bool is_deleted (int &v) { return v == 1; }
1067 static void mark_deleted (int &e) { e = 1; }
1068 static bool is_empty (int &v) { return v == 0; }
1069 static void mark_empty (int &e) { e = 0; }
1070 static void remove (int &) {}
1073 /* Hash function for oecount. */
1075 inline hashval_t
1076 oecount_hasher::hash (const value_type &p)
1078 const oecount *c = &cvec[p - 42];
1079 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1082 /* Comparison function for oecount. */
1084 inline bool
1085 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1087 const oecount *c1 = &cvec[p1 - 42];
1088 const oecount *c2 = &cvec[p2 - 42];
1089 return (c1->oecode == c2->oecode
1090 && c1->op == c2->op);
1093 /* Comparison function for qsort sorting oecount elements by count. */
1095 static int
1096 oecount_cmp (const void *p1, const void *p2)
1098 const oecount *c1 = (const oecount *)p1;
1099 const oecount *c2 = (const oecount *)p2;
1100 if (c1->cnt != c2->cnt)
1101 return c1->cnt - c2->cnt;
1102 else
1103 /* If counts are identical, use unique IDs to stabilize qsort. */
1104 return c1->id - c2->id;
1107 /* Return TRUE iff STMT represents a builtin call that raises OP
1108 to some exponent. */
1110 static bool
1111 stmt_is_power_of_op (gimple stmt, tree op)
1113 tree fndecl;
1115 if (!is_gimple_call (stmt))
1116 return false;
1118 fndecl = gimple_call_fndecl (stmt);
1120 if (!fndecl
1121 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1122 return false;
1124 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1126 CASE_FLT_FN (BUILT_IN_POW):
1127 CASE_FLT_FN (BUILT_IN_POWI):
1128 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1130 default:
1131 return false;
1135 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1136 in place and return the result. Assumes that stmt_is_power_of_op
1137 was previously called for STMT and returned TRUE. */
1139 static HOST_WIDE_INT
1140 decrement_power (gimple stmt)
1142 REAL_VALUE_TYPE c, cint;
1143 HOST_WIDE_INT power;
1144 tree arg1;
1146 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1148 CASE_FLT_FN (BUILT_IN_POW):
1149 arg1 = gimple_call_arg (stmt, 1);
1150 c = TREE_REAL_CST (arg1);
1151 power = real_to_integer (&c) - 1;
1152 real_from_integer (&cint, VOIDmode, power, SIGNED);
1153 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1154 return power;
1156 CASE_FLT_FN (BUILT_IN_POWI):
1157 arg1 = gimple_call_arg (stmt, 1);
1158 power = TREE_INT_CST_LOW (arg1) - 1;
1159 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1160 return power;
1162 default:
1163 gcc_unreachable ();
1167 /* Find the single immediate use of STMT's LHS, and replace it
1168 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1169 replace *DEF with OP as well. */
1171 static void
1172 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1174 tree lhs;
1175 gimple use_stmt;
1176 use_operand_p use;
1177 gimple_stmt_iterator gsi;
1179 if (is_gimple_call (stmt))
1180 lhs = gimple_call_lhs (stmt);
1181 else
1182 lhs = gimple_assign_lhs (stmt);
1184 gcc_assert (has_single_use (lhs));
1185 single_imm_use (lhs, &use, &use_stmt);
1186 if (lhs == *def)
1187 *def = op;
1188 SET_USE (use, op);
1189 if (TREE_CODE (op) != SSA_NAME)
1190 update_stmt (use_stmt);
1191 gsi = gsi_for_stmt (stmt);
1192 unlink_stmt_vdef (stmt);
1193 reassoc_remove_stmt (&gsi);
1194 release_defs (stmt);
1197 /* Walks the linear chain with result *DEF searching for an operation
1198 with operand OP and code OPCODE removing that from the chain. *DEF
1199 is updated if there is only one operand but no operation left. */
1201 static void
1202 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1204 gimple stmt = SSA_NAME_DEF_STMT (*def);
1208 tree name;
1210 if (opcode == MULT_EXPR
1211 && stmt_is_power_of_op (stmt, op))
1213 if (decrement_power (stmt) == 1)
1214 propagate_op_to_single_use (op, stmt, def);
1215 return;
1218 name = gimple_assign_rhs1 (stmt);
1220 /* If this is the operation we look for and one of the operands
1221 is ours simply propagate the other operand into the stmts
1222 single use. */
1223 if (gimple_assign_rhs_code (stmt) == opcode
1224 && (name == op
1225 || gimple_assign_rhs2 (stmt) == op))
1227 if (name == op)
1228 name = gimple_assign_rhs2 (stmt);
1229 propagate_op_to_single_use (name, stmt, def);
1230 return;
1233 /* We might have a multiply of two __builtin_pow* calls, and
1234 the operand might be hiding in the rightmost one. */
1235 if (opcode == MULT_EXPR
1236 && gimple_assign_rhs_code (stmt) == opcode
1237 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1238 && has_single_use (gimple_assign_rhs2 (stmt)))
1240 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1241 if (stmt_is_power_of_op (stmt2, op))
1243 if (decrement_power (stmt2) == 1)
1244 propagate_op_to_single_use (op, stmt2, def);
1245 return;
1249 /* Continue walking the chain. */
1250 gcc_assert (name != op
1251 && TREE_CODE (name) == SSA_NAME);
1252 stmt = SSA_NAME_DEF_STMT (name);
1254 while (1);
1257 /* Returns true if statement S1 dominates statement S2. Like
1258 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1260 static bool
1261 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1263 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1265 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1266 SSA_NAME. Assume it lives at the beginning of function and
1267 thus dominates everything. */
1268 if (!bb1 || s1 == s2)
1269 return true;
1271 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1272 if (!bb2)
1273 return false;
1275 if (bb1 == bb2)
1277 /* PHIs in the same basic block are assumed to be
1278 executed all in parallel, if only one stmt is a PHI,
1279 it dominates the other stmt in the same basic block. */
1280 if (gimple_code (s1) == GIMPLE_PHI)
1281 return true;
1283 if (gimple_code (s2) == GIMPLE_PHI)
1284 return false;
1286 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1288 if (gimple_uid (s1) < gimple_uid (s2))
1289 return true;
1291 if (gimple_uid (s1) > gimple_uid (s2))
1292 return false;
1294 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1295 unsigned int uid = gimple_uid (s1);
1296 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1298 gimple s = gsi_stmt (gsi);
1299 if (gimple_uid (s) != uid)
1300 break;
1301 if (s == s2)
1302 return true;
1305 return false;
1308 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1311 /* Insert STMT after INSERT_POINT. */
1313 static void
1314 insert_stmt_after (gimple stmt, gimple insert_point)
1316 gimple_stmt_iterator gsi;
1317 basic_block bb;
1319 if (gimple_code (insert_point) == GIMPLE_PHI)
1320 bb = gimple_bb (insert_point);
1321 else if (!stmt_ends_bb_p (insert_point))
1323 gsi = gsi_for_stmt (insert_point);
1324 gimple_set_uid (stmt, gimple_uid (insert_point));
1325 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1326 return;
1328 else
1329 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1330 thus if it must end a basic block, it should be a call that can
1331 throw, or some assignment that can throw. If it throws, the LHS
1332 of it will not be initialized though, so only valid places using
1333 the SSA_NAME should be dominated by the fallthru edge. */
1334 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1335 gsi = gsi_after_labels (bb);
1336 if (gsi_end_p (gsi))
1338 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1339 gimple_set_uid (stmt,
1340 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1342 else
1343 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1344 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1347 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1348 the result. Places the statement after the definition of either
1349 OP1 or OP2. Returns the new statement. */
1351 static gimple
1352 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1354 gimple op1def = NULL, op2def = NULL;
1355 gimple_stmt_iterator gsi;
1356 tree op;
1357 gassign *sum;
1359 /* Create the addition statement. */
1360 op = make_ssa_name (type);
1361 sum = gimple_build_assign (op, opcode, op1, op2);
1363 /* Find an insertion place and insert. */
1364 if (TREE_CODE (op1) == SSA_NAME)
1365 op1def = SSA_NAME_DEF_STMT (op1);
1366 if (TREE_CODE (op2) == SSA_NAME)
1367 op2def = SSA_NAME_DEF_STMT (op2);
1368 if ((!op1def || gimple_nop_p (op1def))
1369 && (!op2def || gimple_nop_p (op2def)))
1371 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1372 if (gsi_end_p (gsi))
1374 gimple_stmt_iterator gsi2
1375 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1376 gimple_set_uid (sum,
1377 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1379 else
1380 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1381 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1383 else
1385 gimple insert_point;
1386 if ((!op1def || gimple_nop_p (op1def))
1387 || (op2def && !gimple_nop_p (op2def)
1388 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1389 insert_point = op2def;
1390 else
1391 insert_point = op1def;
1392 insert_stmt_after (sum, insert_point);
1394 update_stmt (sum);
1396 return sum;
1399 /* Perform un-distribution of divisions and multiplications.
1400 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1401 to (A + B) / X for real X.
1403 The algorithm is organized as follows.
1405 - First we walk the addition chain *OPS looking for summands that
1406 are defined by a multiplication or a real division. This results
1407 in the candidates bitmap with relevant indices into *OPS.
1409 - Second we build the chains of multiplications or divisions for
1410 these candidates, counting the number of occurrences of (operand, code)
1411 pairs in all of the candidates chains.
1413 - Third we sort the (operand, code) pairs by number of occurrence and
1414 process them starting with the pair with the most uses.
1416 * For each such pair we walk the candidates again to build a
1417 second candidate bitmap noting all multiplication/division chains
1418 that have at least one occurrence of (operand, code).
1420 * We build an alternate addition chain only covering these
1421 candidates with one (operand, code) operation removed from their
1422 multiplication/division chain.
1424 * The first candidate gets replaced by the alternate addition chain
1425 multiplied/divided by the operand.
1427 * All candidate chains get disabled for further processing and
1428 processing of (operand, code) pairs continues.
1430 The alternate addition chains built are re-processed by the main
1431 reassociation algorithm which allows optimizing a * x * y + b * y * x
1432 to (a + b ) * x * y in one invocation of the reassociation pass. */
1434 static bool
1435 undistribute_ops_list (enum tree_code opcode,
1436 vec<operand_entry_t> *ops, struct loop *loop)
1438 unsigned int length = ops->length ();
1439 operand_entry_t oe1;
1440 unsigned i, j;
1441 sbitmap candidates, candidates2;
1442 unsigned nr_candidates, nr_candidates2;
1443 sbitmap_iterator sbi0;
1444 vec<operand_entry_t> *subops;
1445 bool changed = false;
1446 int next_oecount_id = 0;
1448 if (length <= 1
1449 || opcode != PLUS_EXPR)
1450 return false;
1452 /* Build a list of candidates to process. */
1453 candidates = sbitmap_alloc (length);
1454 bitmap_clear (candidates);
1455 nr_candidates = 0;
1456 FOR_EACH_VEC_ELT (*ops, i, oe1)
1458 enum tree_code dcode;
1459 gimple oe1def;
1461 if (TREE_CODE (oe1->op) != SSA_NAME)
1462 continue;
1463 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1464 if (!is_gimple_assign (oe1def))
1465 continue;
1466 dcode = gimple_assign_rhs_code (oe1def);
1467 if ((dcode != MULT_EXPR
1468 && dcode != RDIV_EXPR)
1469 || !is_reassociable_op (oe1def, dcode, loop))
1470 continue;
1472 bitmap_set_bit (candidates, i);
1473 nr_candidates++;
1476 if (nr_candidates < 2)
1478 sbitmap_free (candidates);
1479 return false;
1482 if (dump_file && (dump_flags & TDF_DETAILS))
1484 fprintf (dump_file, "searching for un-distribute opportunities ");
1485 print_generic_expr (dump_file,
1486 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1487 fprintf (dump_file, " %d\n", nr_candidates);
1490 /* Build linearized sub-operand lists and the counting table. */
1491 cvec.create (0);
1493 hash_table<oecount_hasher> ctable (15);
1495 /* ??? Macro arguments cannot have multi-argument template types in
1496 them. This typedef is needed to workaround that limitation. */
1497 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1498 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1499 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1501 gimple oedef;
1502 enum tree_code oecode;
1503 unsigned j;
1505 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1506 oecode = gimple_assign_rhs_code (oedef);
1507 linearize_expr_tree (&subops[i], oedef,
1508 associative_tree_code (oecode), false);
1510 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1512 oecount c;
1513 int *slot;
1514 int idx;
1515 c.oecode = oecode;
1516 c.cnt = 1;
1517 c.id = next_oecount_id++;
1518 c.op = oe1->op;
1519 cvec.safe_push (c);
1520 idx = cvec.length () + 41;
1521 slot = ctable.find_slot (idx, INSERT);
1522 if (!*slot)
1524 *slot = idx;
1526 else
1528 cvec.pop ();
1529 cvec[*slot - 42].cnt++;
1534 /* Sort the counting table. */
1535 cvec.qsort (oecount_cmp);
1537 if (dump_file && (dump_flags & TDF_DETAILS))
1539 oecount *c;
1540 fprintf (dump_file, "Candidates:\n");
1541 FOR_EACH_VEC_ELT (cvec, j, c)
1543 fprintf (dump_file, " %u %s: ", c->cnt,
1544 c->oecode == MULT_EXPR
1545 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1546 print_generic_expr (dump_file, c->op, 0);
1547 fprintf (dump_file, "\n");
1551 /* Process the (operand, code) pairs in order of most occurrence. */
1552 candidates2 = sbitmap_alloc (length);
1553 while (!cvec.is_empty ())
1555 oecount *c = &cvec.last ();
1556 if (c->cnt < 2)
1557 break;
1559 /* Now collect the operands in the outer chain that contain
1560 the common operand in their inner chain. */
1561 bitmap_clear (candidates2);
1562 nr_candidates2 = 0;
1563 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1565 gimple oedef;
1566 enum tree_code oecode;
1567 unsigned j;
1568 tree op = (*ops)[i]->op;
1570 /* If we undistributed in this chain already this may be
1571 a constant. */
1572 if (TREE_CODE (op) != SSA_NAME)
1573 continue;
1575 oedef = SSA_NAME_DEF_STMT (op);
1576 oecode = gimple_assign_rhs_code (oedef);
1577 if (oecode != c->oecode)
1578 continue;
1580 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1582 if (oe1->op == c->op)
1584 bitmap_set_bit (candidates2, i);
1585 ++nr_candidates2;
1586 break;
1591 if (nr_candidates2 >= 2)
1593 operand_entry_t oe1, oe2;
1594 gimple prod;
1595 int first = bitmap_first_set_bit (candidates2);
1597 /* Build the new addition chain. */
1598 oe1 = (*ops)[first];
1599 if (dump_file && (dump_flags & TDF_DETAILS))
1601 fprintf (dump_file, "Building (");
1602 print_generic_expr (dump_file, oe1->op, 0);
1604 zero_one_operation (&oe1->op, c->oecode, c->op);
1605 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1607 gimple sum;
1608 oe2 = (*ops)[i];
1609 if (dump_file && (dump_flags & TDF_DETAILS))
1611 fprintf (dump_file, " + ");
1612 print_generic_expr (dump_file, oe2->op, 0);
1614 zero_one_operation (&oe2->op, c->oecode, c->op);
1615 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1616 oe1->op, oe2->op, opcode);
1617 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1618 oe2->rank = 0;
1619 oe1->op = gimple_get_lhs (sum);
1622 /* Apply the multiplication/division. */
1623 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1624 oe1->op, c->op, c->oecode);
1625 if (dump_file && (dump_flags & TDF_DETAILS))
1627 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1628 print_generic_expr (dump_file, c->op, 0);
1629 fprintf (dump_file, "\n");
1632 /* Record it in the addition chain and disable further
1633 undistribution with this op. */
1634 oe1->op = gimple_assign_lhs (prod);
1635 oe1->rank = get_rank (oe1->op);
1636 subops[first].release ();
1638 changed = true;
1641 cvec.pop ();
1644 for (i = 0; i < ops->length (); ++i)
1645 subops[i].release ();
1646 free (subops);
1647 cvec.release ();
1648 sbitmap_free (candidates);
1649 sbitmap_free (candidates2);
1651 return changed;
1654 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1655 expression, examine the other OPS to see if any of them are comparisons
1656 of the same values, which we may be able to combine or eliminate.
1657 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1659 static bool
1660 eliminate_redundant_comparison (enum tree_code opcode,
1661 vec<operand_entry_t> *ops,
1662 unsigned int currindex,
1663 operand_entry_t curr)
1665 tree op1, op2;
1666 enum tree_code lcode, rcode;
1667 gimple def1, def2;
1668 int i;
1669 operand_entry_t oe;
1671 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1672 return false;
1674 /* Check that CURR is a comparison. */
1675 if (TREE_CODE (curr->op) != SSA_NAME)
1676 return false;
1677 def1 = SSA_NAME_DEF_STMT (curr->op);
1678 if (!is_gimple_assign (def1))
1679 return false;
1680 lcode = gimple_assign_rhs_code (def1);
1681 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1682 return false;
1683 op1 = gimple_assign_rhs1 (def1);
1684 op2 = gimple_assign_rhs2 (def1);
1686 /* Now look for a similar comparison in the remaining OPS. */
1687 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1689 tree t;
1691 if (TREE_CODE (oe->op) != SSA_NAME)
1692 continue;
1693 def2 = SSA_NAME_DEF_STMT (oe->op);
1694 if (!is_gimple_assign (def2))
1695 continue;
1696 rcode = gimple_assign_rhs_code (def2);
1697 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1698 continue;
1700 /* If we got here, we have a match. See if we can combine the
1701 two comparisons. */
1702 if (opcode == BIT_IOR_EXPR)
1703 t = maybe_fold_or_comparisons (lcode, op1, op2,
1704 rcode, gimple_assign_rhs1 (def2),
1705 gimple_assign_rhs2 (def2));
1706 else
1707 t = maybe_fold_and_comparisons (lcode, op1, op2,
1708 rcode, gimple_assign_rhs1 (def2),
1709 gimple_assign_rhs2 (def2));
1710 if (!t)
1711 continue;
1713 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1714 always give us a boolean_type_node value back. If the original
1715 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1716 we need to convert. */
1717 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1718 t = fold_convert (TREE_TYPE (curr->op), t);
1720 if (TREE_CODE (t) != INTEGER_CST
1721 && !operand_equal_p (t, curr->op, 0))
1723 enum tree_code subcode;
1724 tree newop1, newop2;
1725 if (!COMPARISON_CLASS_P (t))
1726 continue;
1727 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1728 STRIP_USELESS_TYPE_CONVERSION (newop1);
1729 STRIP_USELESS_TYPE_CONVERSION (newop2);
1730 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1731 continue;
1734 if (dump_file && (dump_flags & TDF_DETAILS))
1736 fprintf (dump_file, "Equivalence: ");
1737 print_generic_expr (dump_file, curr->op, 0);
1738 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1739 print_generic_expr (dump_file, oe->op, 0);
1740 fprintf (dump_file, " -> ");
1741 print_generic_expr (dump_file, t, 0);
1742 fprintf (dump_file, "\n");
1745 /* Now we can delete oe, as it has been subsumed by the new combined
1746 expression t. */
1747 ops->ordered_remove (i);
1748 reassociate_stats.ops_eliminated ++;
1750 /* If t is the same as curr->op, we're done. Otherwise we must
1751 replace curr->op with t. Special case is if we got a constant
1752 back, in which case we add it to the end instead of in place of
1753 the current entry. */
1754 if (TREE_CODE (t) == INTEGER_CST)
1756 ops->ordered_remove (currindex);
1757 add_to_ops_vec (ops, t);
1759 else if (!operand_equal_p (t, curr->op, 0))
1761 gimple sum;
1762 enum tree_code subcode;
1763 tree newop1;
1764 tree newop2;
1765 gcc_assert (COMPARISON_CLASS_P (t));
1766 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1767 STRIP_USELESS_TYPE_CONVERSION (newop1);
1768 STRIP_USELESS_TYPE_CONVERSION (newop2);
1769 gcc_checking_assert (is_gimple_val (newop1)
1770 && is_gimple_val (newop2));
1771 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1772 curr->op = gimple_get_lhs (sum);
1774 return true;
1777 return false;
1780 /* Perform various identities and other optimizations on the list of
1781 operand entries, stored in OPS. The tree code for the binary
1782 operation between all the operands is OPCODE. */
1784 static void
1785 optimize_ops_list (enum tree_code opcode,
1786 vec<operand_entry_t> *ops)
1788 unsigned int length = ops->length ();
1789 unsigned int i;
1790 operand_entry_t oe;
1791 operand_entry_t oelast = NULL;
1792 bool iterate = false;
1794 if (length == 1)
1795 return;
1797 oelast = ops->last ();
1799 /* If the last two are constants, pop the constants off, merge them
1800 and try the next two. */
1801 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1803 operand_entry_t oelm1 = (*ops)[length - 2];
1805 if (oelm1->rank == 0
1806 && is_gimple_min_invariant (oelm1->op)
1807 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1808 TREE_TYPE (oelast->op)))
1810 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1811 oelm1->op, oelast->op);
1813 if (folded && is_gimple_min_invariant (folded))
1815 if (dump_file && (dump_flags & TDF_DETAILS))
1816 fprintf (dump_file, "Merging constants\n");
1818 ops->pop ();
1819 ops->pop ();
1821 add_to_ops_vec (ops, folded);
1822 reassociate_stats.constants_eliminated++;
1824 optimize_ops_list (opcode, ops);
1825 return;
1830 eliminate_using_constants (opcode, ops);
1831 oelast = NULL;
1833 for (i = 0; ops->iterate (i, &oe);)
1835 bool done = false;
1837 if (eliminate_not_pairs (opcode, ops, i, oe))
1838 return;
1839 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1840 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1841 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1843 if (done)
1844 return;
1845 iterate = true;
1846 oelast = NULL;
1847 continue;
1849 oelast = oe;
1850 i++;
1853 length = ops->length ();
1854 oelast = ops->last ();
1856 if (iterate)
1857 optimize_ops_list (opcode, ops);
1860 /* The following functions are subroutines to optimize_range_tests and allow
1861 it to try to change a logical combination of comparisons into a range
1862 test.
1864 For example, both
1865 X == 2 || X == 5 || X == 3 || X == 4
1867 X >= 2 && X <= 5
1868 are converted to
1869 (unsigned) (X - 2) <= 3
1871 For more information see comments above fold_test_range in fold-const.c,
1872 this implementation is for GIMPLE. */
1874 struct range_entry
1876 tree exp;
1877 tree low;
1878 tree high;
1879 bool in_p;
1880 bool strict_overflow_p;
1881 unsigned int idx, next;
1884 /* This is similar to make_range in fold-const.c, but on top of
1885 GIMPLE instead of trees. If EXP is non-NULL, it should be
1886 an SSA_NAME and STMT argument is ignored, otherwise STMT
1887 argument should be a GIMPLE_COND. */
1889 static void
1890 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1892 int in_p;
1893 tree low, high;
1894 bool is_bool, strict_overflow_p;
1896 r->exp = NULL_TREE;
1897 r->in_p = false;
1898 r->strict_overflow_p = false;
1899 r->low = NULL_TREE;
1900 r->high = NULL_TREE;
1901 if (exp != NULL_TREE
1902 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1903 return;
1905 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1906 and see if we can refine the range. Some of the cases below may not
1907 happen, but it doesn't seem worth worrying about this. We "continue"
1908 the outer loop when we've changed something; otherwise we "break"
1909 the switch, which will "break" the while. */
1910 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1911 high = low;
1912 in_p = 0;
1913 strict_overflow_p = false;
1914 is_bool = false;
1915 if (exp == NULL_TREE)
1916 is_bool = true;
1917 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1919 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1920 is_bool = true;
1921 else
1922 return;
1924 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1925 is_bool = true;
1927 while (1)
1929 enum tree_code code;
1930 tree arg0, arg1, exp_type;
1931 tree nexp;
1932 location_t loc;
1934 if (exp != NULL_TREE)
1936 if (TREE_CODE (exp) != SSA_NAME
1937 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1938 break;
1940 stmt = SSA_NAME_DEF_STMT (exp);
1941 if (!is_gimple_assign (stmt))
1942 break;
1944 code = gimple_assign_rhs_code (stmt);
1945 arg0 = gimple_assign_rhs1 (stmt);
1946 arg1 = gimple_assign_rhs2 (stmt);
1947 exp_type = TREE_TYPE (exp);
1949 else
1951 code = gimple_cond_code (stmt);
1952 arg0 = gimple_cond_lhs (stmt);
1953 arg1 = gimple_cond_rhs (stmt);
1954 exp_type = boolean_type_node;
1957 if (TREE_CODE (arg0) != SSA_NAME)
1958 break;
1959 loc = gimple_location (stmt);
1960 switch (code)
1962 case BIT_NOT_EXPR:
1963 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1964 /* Ensure the range is either +[-,0], +[0,0],
1965 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1966 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1967 or similar expression of unconditional true or
1968 false, it should not be negated. */
1969 && ((high && integer_zerop (high))
1970 || (low && integer_onep (low))))
1972 in_p = !in_p;
1973 exp = arg0;
1974 continue;
1976 break;
1977 case SSA_NAME:
1978 exp = arg0;
1979 continue;
1980 CASE_CONVERT:
1981 if (is_bool)
1982 goto do_default;
1983 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1985 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1986 is_bool = true;
1987 else
1988 return;
1990 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1991 is_bool = true;
1992 goto do_default;
1993 case EQ_EXPR:
1994 case NE_EXPR:
1995 case LT_EXPR:
1996 case LE_EXPR:
1997 case GE_EXPR:
1998 case GT_EXPR:
1999 is_bool = true;
2000 /* FALLTHRU */
2001 default:
2002 if (!is_bool)
2003 return;
2004 do_default:
2005 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2006 &low, &high, &in_p,
2007 &strict_overflow_p);
2008 if (nexp != NULL_TREE)
2010 exp = nexp;
2011 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2012 continue;
2014 break;
2016 break;
2018 if (is_bool)
2020 r->exp = exp;
2021 r->in_p = in_p;
2022 r->low = low;
2023 r->high = high;
2024 r->strict_overflow_p = strict_overflow_p;
2028 /* Comparison function for qsort. Sort entries
2029 without SSA_NAME exp first, then with SSA_NAMEs sorted
2030 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2031 by increasing ->low and if ->low is the same, by increasing
2032 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2033 maximum. */
2035 static int
2036 range_entry_cmp (const void *a, const void *b)
2038 const struct range_entry *p = (const struct range_entry *) a;
2039 const struct range_entry *q = (const struct range_entry *) b;
2041 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2043 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2045 /* Group range_entries for the same SSA_NAME together. */
2046 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2047 return -1;
2048 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2049 return 1;
2050 /* If ->low is different, NULL low goes first, then by
2051 ascending low. */
2052 if (p->low != NULL_TREE)
2054 if (q->low != NULL_TREE)
2056 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2057 p->low, q->low);
2058 if (tem && integer_onep (tem))
2059 return -1;
2060 tem = fold_binary (GT_EXPR, boolean_type_node,
2061 p->low, q->low);
2062 if (tem && integer_onep (tem))
2063 return 1;
2065 else
2066 return 1;
2068 else if (q->low != NULL_TREE)
2069 return -1;
2070 /* If ->high is different, NULL high goes last, before that by
2071 ascending high. */
2072 if (p->high != NULL_TREE)
2074 if (q->high != NULL_TREE)
2076 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2077 p->high, q->high);
2078 if (tem && integer_onep (tem))
2079 return -1;
2080 tem = fold_binary (GT_EXPR, boolean_type_node,
2081 p->high, q->high);
2082 if (tem && integer_onep (tem))
2083 return 1;
2085 else
2086 return -1;
2088 else if (q->high != NULL_TREE)
2089 return 1;
2090 /* If both ranges are the same, sort below by ascending idx. */
2092 else
2093 return 1;
2095 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2096 return -1;
2098 if (p->idx < q->idx)
2099 return -1;
2100 else
2102 gcc_checking_assert (p->idx > q->idx);
2103 return 1;
2107 /* Helper routine of optimize_range_test.
2108 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2109 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2110 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2111 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2112 an array of COUNT pointers to other ranges. Return
2113 true if the range merge has been successful.
2114 If OPCODE is ERROR_MARK, this is called from within
2115 maybe_optimize_range_tests and is performing inter-bb range optimization.
2116 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2117 oe->rank. */
2119 static bool
2120 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2121 struct range_entry **otherrangep,
2122 unsigned int count, enum tree_code opcode,
2123 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2124 bool in_p, tree low, tree high, bool strict_overflow_p)
2126 operand_entry_t oe = (*ops)[range->idx];
2127 tree op = oe->op;
2128 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2129 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2130 location_t loc = gimple_location (stmt);
2131 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2132 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2133 in_p, low, high);
2134 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2135 gimple_stmt_iterator gsi;
2136 unsigned int i;
2138 if (tem == NULL_TREE)
2139 return false;
2141 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2142 warning_at (loc, OPT_Wstrict_overflow,
2143 "assuming signed overflow does not occur "
2144 "when simplifying range test");
2146 if (dump_file && (dump_flags & TDF_DETAILS))
2148 struct range_entry *r;
2149 fprintf (dump_file, "Optimizing range tests ");
2150 print_generic_expr (dump_file, range->exp, 0);
2151 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2152 print_generic_expr (dump_file, range->low, 0);
2153 fprintf (dump_file, ", ");
2154 print_generic_expr (dump_file, range->high, 0);
2155 fprintf (dump_file, "]");
2156 for (i = 0; i < count; i++)
2158 if (otherrange)
2159 r = otherrange + i;
2160 else
2161 r = otherrangep[i];
2162 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2163 print_generic_expr (dump_file, r->low, 0);
2164 fprintf (dump_file, ", ");
2165 print_generic_expr (dump_file, r->high, 0);
2166 fprintf (dump_file, "]");
2168 fprintf (dump_file, "\n into ");
2169 print_generic_expr (dump_file, tem, 0);
2170 fprintf (dump_file, "\n");
2173 if (opcode == BIT_IOR_EXPR
2174 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2175 tem = invert_truthvalue_loc (loc, tem);
2177 tem = fold_convert_loc (loc, optype, tem);
2178 gsi = gsi_for_stmt (stmt);
2179 unsigned int uid = gimple_uid (stmt);
2180 /* In rare cases range->exp can be equal to lhs of stmt.
2181 In that case we have to insert after the stmt rather then before
2182 it. If stmt is a PHI, insert it at the start of the basic block. */
2183 if (op != range->exp)
2185 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2186 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2187 GSI_SAME_STMT);
2188 gsi_prev (&gsi);
2190 else if (gimple_code (stmt) != GIMPLE_PHI)
2192 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2193 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2194 GSI_CONTINUE_LINKING);
2196 else
2198 gsi = gsi_after_labels (gimple_bb (stmt));
2199 if (!gsi_end_p (gsi))
2200 uid = gimple_uid (gsi_stmt (gsi));
2201 else
2203 gsi = gsi_start_bb (gimple_bb (stmt));
2204 uid = 1;
2205 while (!gsi_end_p (gsi))
2207 uid = gimple_uid (gsi_stmt (gsi));
2208 gsi_next (&gsi);
2211 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2212 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2213 GSI_SAME_STMT);
2214 if (gsi_end_p (gsi))
2215 gsi = gsi_last_bb (gimple_bb (stmt));
2216 else
2217 gsi_prev (&gsi);
2219 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2220 if (gimple_uid (gsi_stmt (gsi)))
2221 break;
2222 else
2223 gimple_set_uid (gsi_stmt (gsi), uid);
2225 oe->op = tem;
2226 range->exp = exp;
2227 range->low = low;
2228 range->high = high;
2229 range->in_p = in_p;
2230 range->strict_overflow_p = false;
2232 for (i = 0; i < count; i++)
2234 if (otherrange)
2235 range = otherrange + i;
2236 else
2237 range = otherrangep[i];
2238 oe = (*ops)[range->idx];
2239 /* Now change all the other range test immediate uses, so that
2240 those tests will be optimized away. */
2241 if (opcode == ERROR_MARK)
2243 if (oe->op)
2244 oe->op = build_int_cst (TREE_TYPE (oe->op),
2245 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2246 else
2247 oe->op = (oe->rank == BIT_IOR_EXPR
2248 ? boolean_false_node : boolean_true_node);
2250 else
2251 oe->op = error_mark_node;
2252 range->exp = NULL_TREE;
2254 return true;
2257 /* Optimize X == CST1 || X == CST2
2258 if popcount (CST1 ^ CST2) == 1 into
2259 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2260 Similarly for ranges. E.g.
2261 X != 2 && X != 3 && X != 10 && X != 11
2262 will be transformed by the previous optimization into
2263 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2264 and this loop can transform that into
2265 !(((X & ~8) - 2U) <= 1U). */
2267 static bool
2268 optimize_range_tests_xor (enum tree_code opcode, tree type,
2269 tree lowi, tree lowj, tree highi, tree highj,
2270 vec<operand_entry_t> *ops,
2271 struct range_entry *rangei,
2272 struct range_entry *rangej)
2274 tree lowxor, highxor, tem, exp;
2275 /* Check lowi ^ lowj == highi ^ highj and
2276 popcount (lowi ^ lowj) == 1. */
2277 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2278 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2279 return false;
2280 if (!integer_pow2p (lowxor))
2281 return false;
2282 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2283 if (!tree_int_cst_equal (lowxor, highxor))
2284 return false;
2286 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2287 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2288 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2289 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2290 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2291 NULL, rangei->in_p, lowj, highj,
2292 rangei->strict_overflow_p
2293 || rangej->strict_overflow_p))
2294 return true;
2295 return false;
2298 /* Optimize X == CST1 || X == CST2
2299 if popcount (CST2 - CST1) == 1 into
2300 ((X - CST1) & ~(CST2 - CST1)) == 0.
2301 Similarly for ranges. E.g.
2302 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2303 || X == 75 || X == 45
2304 will be transformed by the previous optimization into
2305 (X - 43U) <= 3U || (X - 75U) <= 3U
2306 and this loop can transform that into
2307 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2308 static bool
2309 optimize_range_tests_diff (enum tree_code opcode, tree type,
2310 tree lowi, tree lowj, tree highi, tree highj,
2311 vec<operand_entry_t> *ops,
2312 struct range_entry *rangei,
2313 struct range_entry *rangej)
2315 tree tem1, tem2, mask;
2316 /* Check highi - lowi == highj - lowj. */
2317 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2318 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2319 return false;
2320 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2321 if (!tree_int_cst_equal (tem1, tem2))
2322 return false;
2323 /* Check popcount (lowj - lowi) == 1. */
2324 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2325 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2326 return false;
2327 if (!integer_pow2p (tem1))
2328 return false;
2330 type = unsigned_type_for (type);
2331 tem1 = fold_convert (type, tem1);
2332 tem2 = fold_convert (type, tem2);
2333 lowi = fold_convert (type, lowi);
2334 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2335 tem1 = fold_binary (MINUS_EXPR, type,
2336 fold_convert (type, rangei->exp), lowi);
2337 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2338 lowj = build_int_cst (type, 0);
2339 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2340 NULL, rangei->in_p, lowj, tem2,
2341 rangei->strict_overflow_p
2342 || rangej->strict_overflow_p))
2343 return true;
2344 return false;
2347 /* It does some common checks for function optimize_range_tests_xor and
2348 optimize_range_tests_diff.
2349 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2350 Else it calls optimize_range_tests_diff. */
2352 static bool
2353 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2354 bool optimize_xor, vec<operand_entry_t> *ops,
2355 struct range_entry *ranges)
2357 int i, j;
2358 bool any_changes = false;
2359 for (i = first; i < length; i++)
2361 tree lowi, highi, lowj, highj, type, tem;
2363 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2364 continue;
2365 type = TREE_TYPE (ranges[i].exp);
2366 if (!INTEGRAL_TYPE_P (type))
2367 continue;
2368 lowi = ranges[i].low;
2369 if (lowi == NULL_TREE)
2370 lowi = TYPE_MIN_VALUE (type);
2371 highi = ranges[i].high;
2372 if (highi == NULL_TREE)
2373 continue;
2374 for (j = i + 1; j < length && j < i + 64; j++)
2376 bool changes;
2377 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2378 continue;
2379 lowj = ranges[j].low;
2380 if (lowj == NULL_TREE)
2381 continue;
2382 highj = ranges[j].high;
2383 if (highj == NULL_TREE)
2384 highj = TYPE_MAX_VALUE (type);
2385 /* Check lowj > highi. */
2386 tem = fold_binary (GT_EXPR, boolean_type_node,
2387 lowj, highi);
2388 if (tem == NULL_TREE || !integer_onep (tem))
2389 continue;
2390 if (optimize_xor)
2391 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2392 highi, highj, ops,
2393 ranges + i, ranges + j);
2394 else
2395 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2396 highi, highj, ops,
2397 ranges + i, ranges + j);
2398 if (changes)
2400 any_changes = true;
2401 break;
2405 return any_changes;
2408 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2409 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2410 EXP on success, NULL otherwise. */
2412 static tree
2413 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2414 wide_int *mask, tree *totallowp)
2416 tree tem = int_const_binop (MINUS_EXPR, high, low);
2417 if (tem == NULL_TREE
2418 || TREE_CODE (tem) != INTEGER_CST
2419 || TREE_OVERFLOW (tem)
2420 || tree_int_cst_sgn (tem) == -1
2421 || compare_tree_int (tem, prec) != -1)
2422 return NULL_TREE;
2424 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2425 *mask = wi::shifted_mask (0, max, false, prec);
2426 if (TREE_CODE (exp) == BIT_AND_EXPR
2427 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2429 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2430 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2431 if (wi::popcount (msk) == 1
2432 && wi::ltu_p (msk, prec - max))
2434 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2435 max += msk.to_uhwi ();
2436 exp = TREE_OPERAND (exp, 0);
2437 if (integer_zerop (low)
2438 && TREE_CODE (exp) == PLUS_EXPR
2439 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2441 tree ret = TREE_OPERAND (exp, 0);
2442 STRIP_NOPS (ret);
2443 widest_int bias
2444 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2445 TYPE_PRECISION (TREE_TYPE (low))));
2446 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2447 if (totallowp)
2449 *totallowp = tbias;
2450 return ret;
2452 else if (!tree_int_cst_lt (totallow, tbias))
2453 return NULL_TREE;
2454 bias = wi::to_widest (tbias);
2455 bias -= wi::to_widest (totallow);
2456 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2458 *mask = wi::lshift (*mask, bias);
2459 return ret;
2464 if (totallowp)
2465 return exp;
2466 if (!tree_int_cst_lt (totallow, low))
2467 return exp;
2468 tem = int_const_binop (MINUS_EXPR, low, totallow);
2469 if (tem == NULL_TREE
2470 || TREE_CODE (tem) != INTEGER_CST
2471 || TREE_OVERFLOW (tem)
2472 || compare_tree_int (tem, prec - max) == 1)
2473 return NULL_TREE;
2475 *mask = wi::lshift (*mask, wi::to_widest (tem));
2476 return exp;
2479 /* Attempt to optimize small range tests using bit test.
2480 E.g.
2481 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2482 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2483 has been by earlier optimizations optimized into:
2484 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2485 As all the 43 through 82 range is less than 64 numbers,
2486 for 64-bit word targets optimize that into:
2487 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2489 static bool
2490 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2491 vec<operand_entry_t> *ops,
2492 struct range_entry *ranges)
2494 int i, j;
2495 bool any_changes = false;
2496 int prec = GET_MODE_BITSIZE (word_mode);
2497 auto_vec<struct range_entry *, 64> candidates;
2499 for (i = first; i < length - 2; i++)
2501 tree lowi, highi, lowj, highj, type;
2503 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2504 continue;
2505 type = TREE_TYPE (ranges[i].exp);
2506 if (!INTEGRAL_TYPE_P (type))
2507 continue;
2508 lowi = ranges[i].low;
2509 if (lowi == NULL_TREE)
2510 lowi = TYPE_MIN_VALUE (type);
2511 highi = ranges[i].high;
2512 if (highi == NULL_TREE)
2513 continue;
2514 wide_int mask;
2515 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2516 highi, &mask, &lowi);
2517 if (exp == NULL_TREE)
2518 continue;
2519 bool strict_overflow_p = ranges[i].strict_overflow_p;
2520 candidates.truncate (0);
2521 int end = MIN (i + 64, length);
2522 for (j = i + 1; j < end; j++)
2524 tree exp2;
2525 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2526 continue;
2527 if (ranges[j].exp == exp)
2529 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2531 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2532 if (exp2 == exp)
2534 else if (TREE_CODE (exp2) == PLUS_EXPR)
2536 exp2 = TREE_OPERAND (exp2, 0);
2537 STRIP_NOPS (exp2);
2538 if (exp2 != exp)
2539 continue;
2541 else
2542 continue;
2544 else
2545 continue;
2546 lowj = ranges[j].low;
2547 if (lowj == NULL_TREE)
2548 continue;
2549 highj = ranges[j].high;
2550 if (highj == NULL_TREE)
2551 highj = TYPE_MAX_VALUE (type);
2552 wide_int mask2;
2553 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2554 highj, &mask2, NULL);
2555 if (exp2 != exp)
2556 continue;
2557 mask |= mask2;
2558 strict_overflow_p |= ranges[j].strict_overflow_p;
2559 candidates.safe_push (&ranges[j]);
2562 /* If we need otherwise 3 or more comparisons, use a bit test. */
2563 if (candidates.length () >= 2)
2565 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2566 wi::to_widest (lowi)
2567 + prec - 1 - wi::clz (mask));
2568 operand_entry_t oe = (*ops)[ranges[i].idx];
2569 tree op = oe->op;
2570 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2571 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2572 location_t loc = gimple_location (stmt);
2573 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2575 /* See if it isn't cheaper to pretend the minimum value of the
2576 range is 0, if maximum value is small enough.
2577 We can avoid then subtraction of the minimum value, but the
2578 mask constant could be perhaps more expensive. */
2579 if (compare_tree_int (lowi, 0) > 0
2580 && compare_tree_int (high, prec) < 0)
2582 int cost_diff;
2583 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2584 rtx reg = gen_raw_REG (word_mode, 10000);
2585 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2586 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2587 GEN_INT (-m)), speed_p);
2588 rtx r = immed_wide_int_const (mask, word_mode);
2589 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2590 speed_p);
2591 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2592 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2593 speed_p);
2594 if (cost_diff > 0)
2596 mask = wi::lshift (mask, m);
2597 lowi = build_zero_cst (TREE_TYPE (lowi));
2601 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2602 false, lowi, high);
2603 if (tem == NULL_TREE || is_gimple_val (tem))
2604 continue;
2605 tree etype = unsigned_type_for (TREE_TYPE (exp));
2606 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2607 fold_convert_loc (loc, etype, exp),
2608 fold_convert_loc (loc, etype, lowi));
2609 exp = fold_convert_loc (loc, integer_type_node, exp);
2610 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2611 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2612 build_int_cst (word_type, 1), exp);
2613 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2614 wide_int_to_tree (word_type, mask));
2615 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2616 build_zero_cst (word_type));
2617 if (is_gimple_val (exp))
2618 continue;
2620 /* The shift might have undefined behavior if TEM is true,
2621 but reassociate_bb isn't prepared to have basic blocks
2622 split when it is running. So, temporarily emit a code
2623 with BIT_IOR_EXPR instead of &&, and fix it up in
2624 branch_fixup. */
2625 gimple_seq seq;
2626 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2627 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2628 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2629 gimple_seq seq2;
2630 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2631 gimple_seq_add_seq_without_update (&seq, seq2);
2632 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2633 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2634 gimple g = gimple_build_assign (make_ssa_name (optype),
2635 BIT_IOR_EXPR, tem, exp);
2636 gimple_set_location (g, loc);
2637 gimple_seq_add_stmt_without_update (&seq, g);
2638 exp = gimple_assign_lhs (g);
2639 tree val = build_zero_cst (optype);
2640 if (update_range_test (&ranges[i], NULL, candidates.address (),
2641 candidates.length (), opcode, ops, exp,
2642 seq, false, val, val, strict_overflow_p))
2644 any_changes = true;
2645 reassoc_branch_fixups.safe_push (tem);
2647 else
2648 gimple_seq_discard (seq);
2651 return any_changes;
2654 /* Optimize range tests, similarly how fold_range_test optimizes
2655 it on trees. The tree code for the binary
2656 operation between all the operands is OPCODE.
2657 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2658 maybe_optimize_range_tests for inter-bb range optimization.
2659 In that case if oe->op is NULL, oe->id is bb->index whose
2660 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2661 the actual opcode. */
2663 static bool
2664 optimize_range_tests (enum tree_code opcode,
2665 vec<operand_entry_t> *ops)
2667 unsigned int length = ops->length (), i, j, first;
2668 operand_entry_t oe;
2669 struct range_entry *ranges;
2670 bool any_changes = false;
2672 if (length == 1)
2673 return false;
2675 ranges = XNEWVEC (struct range_entry, length);
2676 for (i = 0; i < length; i++)
2678 oe = (*ops)[i];
2679 ranges[i].idx = i;
2680 init_range_entry (ranges + i, oe->op,
2681 oe->op ? NULL :
2682 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2683 /* For | invert it now, we will invert it again before emitting
2684 the optimized expression. */
2685 if (opcode == BIT_IOR_EXPR
2686 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2687 ranges[i].in_p = !ranges[i].in_p;
2690 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2691 for (i = 0; i < length; i++)
2692 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2693 break;
2695 /* Try to merge ranges. */
2696 for (first = i; i < length; i++)
2698 tree low = ranges[i].low;
2699 tree high = ranges[i].high;
2700 int in_p = ranges[i].in_p;
2701 bool strict_overflow_p = ranges[i].strict_overflow_p;
2702 int update_fail_count = 0;
2704 for (j = i + 1; j < length; j++)
2706 if (ranges[i].exp != ranges[j].exp)
2707 break;
2708 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2709 ranges[j].in_p, ranges[j].low, ranges[j].high))
2710 break;
2711 strict_overflow_p |= ranges[j].strict_overflow_p;
2714 if (j == i + 1)
2715 continue;
2717 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2718 opcode, ops, ranges[i].exp, NULL, in_p,
2719 low, high, strict_overflow_p))
2721 i = j - 1;
2722 any_changes = true;
2724 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2725 while update_range_test would fail. */
2726 else if (update_fail_count == 64)
2727 i = j - 1;
2728 else
2729 ++update_fail_count;
2732 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2733 ops, ranges);
2735 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2736 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2737 ops, ranges);
2738 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2739 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2740 ops, ranges);
2742 if (any_changes && opcode != ERROR_MARK)
2744 j = 0;
2745 FOR_EACH_VEC_ELT (*ops, i, oe)
2747 if (oe->op == error_mark_node)
2748 continue;
2749 else if (i != j)
2750 (*ops)[j] = oe;
2751 j++;
2753 ops->truncate (j);
2756 XDELETEVEC (ranges);
2757 return any_changes;
2760 /* Return true if STMT is a cast like:
2761 <bb N>:
2763 _123 = (int) _234;
2765 <bb M>:
2766 # _345 = PHI <_123(N), 1(...), 1(...)>
2767 where _234 has bool type, _123 has single use and
2768 bb N has a single successor M. This is commonly used in
2769 the last block of a range test. */
2771 static bool
2772 final_range_test_p (gimple stmt)
2774 basic_block bb, rhs_bb;
2775 edge e;
2776 tree lhs, rhs;
2777 use_operand_p use_p;
2778 gimple use_stmt;
2780 if (!gimple_assign_cast_p (stmt))
2781 return false;
2782 bb = gimple_bb (stmt);
2783 if (!single_succ_p (bb))
2784 return false;
2785 e = single_succ_edge (bb);
2786 if (e->flags & EDGE_COMPLEX)
2787 return false;
2789 lhs = gimple_assign_lhs (stmt);
2790 rhs = gimple_assign_rhs1 (stmt);
2791 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2792 || TREE_CODE (rhs) != SSA_NAME
2793 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2794 return false;
2796 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2797 if (!single_imm_use (lhs, &use_p, &use_stmt))
2798 return false;
2800 if (gimple_code (use_stmt) != GIMPLE_PHI
2801 || gimple_bb (use_stmt) != e->dest)
2802 return false;
2804 /* And that the rhs is defined in the same loop. */
2805 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2806 if (rhs_bb == NULL
2807 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2808 return false;
2810 return true;
2813 /* Return true if BB is suitable basic block for inter-bb range test
2814 optimization. If BACKWARD is true, BB should be the only predecessor
2815 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2816 or compared with to find a common basic block to which all conditions
2817 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2818 be the only predecessor of BB. */
2820 static bool
2821 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2822 bool backward)
2824 edge_iterator ei, ei2;
2825 edge e, e2;
2826 gimple stmt;
2827 gphi_iterator gsi;
2828 bool other_edge_seen = false;
2829 bool is_cond;
2831 if (test_bb == bb)
2832 return false;
2833 /* Check last stmt first. */
2834 stmt = last_stmt (bb);
2835 if (stmt == NULL
2836 || (gimple_code (stmt) != GIMPLE_COND
2837 && (backward || !final_range_test_p (stmt)))
2838 || gimple_visited_p (stmt)
2839 || stmt_could_throw_p (stmt)
2840 || *other_bb == bb)
2841 return false;
2842 is_cond = gimple_code (stmt) == GIMPLE_COND;
2843 if (is_cond)
2845 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2846 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2847 to *OTHER_BB (if not set yet, try to find it out). */
2848 if (EDGE_COUNT (bb->succs) != 2)
2849 return false;
2850 FOR_EACH_EDGE (e, ei, bb->succs)
2852 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2853 return false;
2854 if (e->dest == test_bb)
2856 if (backward)
2857 continue;
2858 else
2859 return false;
2861 if (e->dest == bb)
2862 return false;
2863 if (*other_bb == NULL)
2865 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2866 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2867 return false;
2868 else if (e->dest == e2->dest)
2869 *other_bb = e->dest;
2870 if (*other_bb == NULL)
2871 return false;
2873 if (e->dest == *other_bb)
2874 other_edge_seen = true;
2875 else if (backward)
2876 return false;
2878 if (*other_bb == NULL || !other_edge_seen)
2879 return false;
2881 else if (single_succ (bb) != *other_bb)
2882 return false;
2884 /* Now check all PHIs of *OTHER_BB. */
2885 e = find_edge (bb, *other_bb);
2886 e2 = find_edge (test_bb, *other_bb);
2887 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2889 gphi *phi = gsi.phi ();
2890 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2891 corresponding to BB and TEST_BB predecessor must be the same. */
2892 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2893 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2895 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2896 one of the PHIs should have the lhs of the last stmt in
2897 that block as PHI arg and that PHI should have 0 or 1
2898 corresponding to it in all other range test basic blocks
2899 considered. */
2900 if (!is_cond)
2902 if (gimple_phi_arg_def (phi, e->dest_idx)
2903 == gimple_assign_lhs (stmt)
2904 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2905 || integer_onep (gimple_phi_arg_def (phi,
2906 e2->dest_idx))))
2907 continue;
2909 else
2911 gimple test_last = last_stmt (test_bb);
2912 if (gimple_code (test_last) != GIMPLE_COND
2913 && gimple_phi_arg_def (phi, e2->dest_idx)
2914 == gimple_assign_lhs (test_last)
2915 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2916 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2917 continue;
2920 return false;
2923 return true;
2926 /* Return true if BB doesn't have side-effects that would disallow
2927 range test optimization, all SSA_NAMEs set in the bb are consumed
2928 in the bb and there are no PHIs. */
2930 static bool
2931 no_side_effect_bb (basic_block bb)
2933 gimple_stmt_iterator gsi;
2934 gimple last;
2936 if (!gimple_seq_empty_p (phi_nodes (bb)))
2937 return false;
2938 last = last_stmt (bb);
2939 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2941 gimple stmt = gsi_stmt (gsi);
2942 tree lhs;
2943 imm_use_iterator imm_iter;
2944 use_operand_p use_p;
2946 if (is_gimple_debug (stmt))
2947 continue;
2948 if (gimple_has_side_effects (stmt))
2949 return false;
2950 if (stmt == last)
2951 return true;
2952 if (!is_gimple_assign (stmt))
2953 return false;
2954 lhs = gimple_assign_lhs (stmt);
2955 if (TREE_CODE (lhs) != SSA_NAME)
2956 return false;
2957 if (gimple_assign_rhs_could_trap_p (stmt))
2958 return false;
2959 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2961 gimple use_stmt = USE_STMT (use_p);
2962 if (is_gimple_debug (use_stmt))
2963 continue;
2964 if (gimple_bb (use_stmt) != bb)
2965 return false;
2968 return false;
2971 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2972 return true and fill in *OPS recursively. */
2974 static bool
2975 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2976 struct loop *loop)
2978 gimple stmt = SSA_NAME_DEF_STMT (var);
2979 tree rhs[2];
2980 int i;
2982 if (!is_reassociable_op (stmt, code, loop))
2983 return false;
2985 rhs[0] = gimple_assign_rhs1 (stmt);
2986 rhs[1] = gimple_assign_rhs2 (stmt);
2987 gimple_set_visited (stmt, true);
2988 for (i = 0; i < 2; i++)
2989 if (TREE_CODE (rhs[i]) == SSA_NAME
2990 && !get_ops (rhs[i], code, ops, loop)
2991 && has_single_use (rhs[i]))
2993 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2995 oe->op = rhs[i];
2996 oe->rank = code;
2997 oe->id = 0;
2998 oe->count = 1;
2999 ops->safe_push (oe);
3001 return true;
3004 /* Find the ops that were added by get_ops starting from VAR, see if
3005 they were changed during update_range_test and if yes, create new
3006 stmts. */
3008 static tree
3009 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
3010 unsigned int *pidx, struct loop *loop)
3012 gimple stmt = SSA_NAME_DEF_STMT (var);
3013 tree rhs[4];
3014 int i;
3016 if (!is_reassociable_op (stmt, code, loop))
3017 return NULL;
3019 rhs[0] = gimple_assign_rhs1 (stmt);
3020 rhs[1] = gimple_assign_rhs2 (stmt);
3021 rhs[2] = rhs[0];
3022 rhs[3] = rhs[1];
3023 for (i = 0; i < 2; i++)
3024 if (TREE_CODE (rhs[i]) == SSA_NAME)
3026 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3027 if (rhs[2 + i] == NULL_TREE)
3029 if (has_single_use (rhs[i]))
3030 rhs[2 + i] = ops[(*pidx)++]->op;
3031 else
3032 rhs[2 + i] = rhs[i];
3035 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3036 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3038 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3039 var = make_ssa_name (TREE_TYPE (var));
3040 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3041 rhs[2], rhs[3]);
3042 gimple_set_uid (g, gimple_uid (stmt));
3043 gimple_set_visited (g, true);
3044 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3046 return var;
3049 /* Structure to track the initial value passed to get_ops and
3050 the range in the ops vector for each basic block. */
3052 struct inter_bb_range_test_entry
3054 tree op;
3055 unsigned int first_idx, last_idx;
3058 /* Inter-bb range test optimization. */
3060 static void
3061 maybe_optimize_range_tests (gimple stmt)
3063 basic_block first_bb = gimple_bb (stmt);
3064 basic_block last_bb = first_bb;
3065 basic_block other_bb = NULL;
3066 basic_block bb;
3067 edge_iterator ei;
3068 edge e;
3069 auto_vec<operand_entry_t> ops;
3070 auto_vec<inter_bb_range_test_entry> bbinfo;
3071 bool any_changes = false;
3073 /* Consider only basic blocks that end with GIMPLE_COND or
3074 a cast statement satisfying final_range_test_p. All
3075 but the last bb in the first_bb .. last_bb range
3076 should end with GIMPLE_COND. */
3077 if (gimple_code (stmt) == GIMPLE_COND)
3079 if (EDGE_COUNT (first_bb->succs) != 2)
3080 return;
3082 else if (final_range_test_p (stmt))
3083 other_bb = single_succ (first_bb);
3084 else
3085 return;
3087 if (stmt_could_throw_p (stmt))
3088 return;
3090 /* As relative ordering of post-dominator sons isn't fixed,
3091 maybe_optimize_range_tests can be called first on any
3092 bb in the range we want to optimize. So, start searching
3093 backwards, if first_bb can be set to a predecessor. */
3094 while (single_pred_p (first_bb))
3096 basic_block pred_bb = single_pred (first_bb);
3097 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3098 break;
3099 if (!no_side_effect_bb (first_bb))
3100 break;
3101 first_bb = pred_bb;
3103 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3104 Before starting forward search in last_bb successors, find
3105 out the other_bb. */
3106 if (first_bb == last_bb)
3108 other_bb = NULL;
3109 /* As non-GIMPLE_COND last stmt always terminates the range,
3110 if forward search didn't discover anything, just give up. */
3111 if (gimple_code (stmt) != GIMPLE_COND)
3112 return;
3113 /* Look at both successors. Either it ends with a GIMPLE_COND
3114 and satisfies suitable_cond_bb, or ends with a cast and
3115 other_bb is that cast's successor. */
3116 FOR_EACH_EDGE (e, ei, first_bb->succs)
3117 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3118 || e->dest == first_bb)
3119 return;
3120 else if (single_pred_p (e->dest))
3122 stmt = last_stmt (e->dest);
3123 if (stmt
3124 && gimple_code (stmt) == GIMPLE_COND
3125 && EDGE_COUNT (e->dest->succs) == 2)
3127 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3128 break;
3129 else
3130 other_bb = NULL;
3132 else if (stmt
3133 && final_range_test_p (stmt)
3134 && find_edge (first_bb, single_succ (e->dest)))
3136 other_bb = single_succ (e->dest);
3137 if (other_bb == first_bb)
3138 other_bb = NULL;
3141 if (other_bb == NULL)
3142 return;
3144 /* Now do the forward search, moving last_bb to successor bbs
3145 that aren't other_bb. */
3146 while (EDGE_COUNT (last_bb->succs) == 2)
3148 FOR_EACH_EDGE (e, ei, last_bb->succs)
3149 if (e->dest != other_bb)
3150 break;
3151 if (e == NULL)
3152 break;
3153 if (!single_pred_p (e->dest))
3154 break;
3155 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3156 break;
3157 if (!no_side_effect_bb (e->dest))
3158 break;
3159 last_bb = e->dest;
3161 if (first_bb == last_bb)
3162 return;
3163 /* Here basic blocks first_bb through last_bb's predecessor
3164 end with GIMPLE_COND, all of them have one of the edges to
3165 other_bb and another to another block in the range,
3166 all blocks except first_bb don't have side-effects and
3167 last_bb ends with either GIMPLE_COND, or cast satisfying
3168 final_range_test_p. */
3169 for (bb = last_bb; ; bb = single_pred (bb))
3171 enum tree_code code;
3172 tree lhs, rhs;
3173 inter_bb_range_test_entry bb_ent;
3175 bb_ent.op = NULL_TREE;
3176 bb_ent.first_idx = ops.length ();
3177 bb_ent.last_idx = bb_ent.first_idx;
3178 e = find_edge (bb, other_bb);
3179 stmt = last_stmt (bb);
3180 gimple_set_visited (stmt, true);
3181 if (gimple_code (stmt) != GIMPLE_COND)
3183 use_operand_p use_p;
3184 gimple phi;
3185 edge e2;
3186 unsigned int d;
3188 lhs = gimple_assign_lhs (stmt);
3189 rhs = gimple_assign_rhs1 (stmt);
3190 gcc_assert (bb == last_bb);
3192 /* stmt is
3193 _123 = (int) _234;
3195 followed by:
3196 <bb M>:
3197 # _345 = PHI <_123(N), 1(...), 1(...)>
3199 or 0 instead of 1. If it is 0, the _234
3200 range test is anded together with all the
3201 other range tests, if it is 1, it is ored with
3202 them. */
3203 single_imm_use (lhs, &use_p, &phi);
3204 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3205 e2 = find_edge (first_bb, other_bb);
3206 d = e2->dest_idx;
3207 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3208 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3209 code = BIT_AND_EXPR;
3210 else
3212 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3213 code = BIT_IOR_EXPR;
3216 /* If _234 SSA_NAME_DEF_STMT is
3217 _234 = _567 | _789;
3218 (or &, corresponding to 1/0 in the phi arguments,
3219 push into ops the individual range test arguments
3220 of the bitwise or resp. and, recursively. */
3221 if (!get_ops (rhs, code, &ops,
3222 loop_containing_stmt (stmt))
3223 && has_single_use (rhs))
3225 /* Otherwise, push the _234 range test itself. */
3226 operand_entry_t oe
3227 = (operand_entry_t) pool_alloc (operand_entry_pool);
3229 oe->op = rhs;
3230 oe->rank = code;
3231 oe->id = 0;
3232 oe->count = 1;
3233 ops.safe_push (oe);
3234 bb_ent.last_idx++;
3236 else
3237 bb_ent.last_idx = ops.length ();
3238 bb_ent.op = rhs;
3239 bbinfo.safe_push (bb_ent);
3240 continue;
3242 /* Otherwise stmt is GIMPLE_COND. */
3243 code = gimple_cond_code (stmt);
3244 lhs = gimple_cond_lhs (stmt);
3245 rhs = gimple_cond_rhs (stmt);
3246 if (TREE_CODE (lhs) == SSA_NAME
3247 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3248 && ((code != EQ_EXPR && code != NE_EXPR)
3249 || rhs != boolean_false_node
3250 /* Either push into ops the individual bitwise
3251 or resp. and operands, depending on which
3252 edge is other_bb. */
3253 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3254 ^ (code == EQ_EXPR))
3255 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3256 loop_containing_stmt (stmt))))
3258 /* Or push the GIMPLE_COND stmt itself. */
3259 operand_entry_t oe
3260 = (operand_entry_t) pool_alloc (operand_entry_pool);
3262 oe->op = NULL;
3263 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3264 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3265 /* oe->op = NULL signs that there is no SSA_NAME
3266 for the range test, and oe->id instead is the
3267 basic block number, at which's end the GIMPLE_COND
3268 is. */
3269 oe->id = bb->index;
3270 oe->count = 1;
3271 ops.safe_push (oe);
3272 bb_ent.op = NULL;
3273 bb_ent.last_idx++;
3275 else if (ops.length () > bb_ent.first_idx)
3277 bb_ent.op = lhs;
3278 bb_ent.last_idx = ops.length ();
3280 bbinfo.safe_push (bb_ent);
3281 if (bb == first_bb)
3282 break;
3284 if (ops.length () > 1)
3285 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3286 if (any_changes)
3288 unsigned int idx;
3289 /* update_ops relies on has_single_use predicates returning the
3290 same values as it did during get_ops earlier. Additionally it
3291 never removes statements, only adds new ones and it should walk
3292 from the single imm use and check the predicate already before
3293 making those changes.
3294 On the other side, the handling of GIMPLE_COND directly can turn
3295 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3296 it needs to be done in a separate loop afterwards. */
3297 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3299 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3300 && bbinfo[idx].op != NULL_TREE)
3302 tree new_op;
3304 stmt = last_stmt (bb);
3305 new_op = update_ops (bbinfo[idx].op,
3306 (enum tree_code)
3307 ops[bbinfo[idx].first_idx]->rank,
3308 ops, &bbinfo[idx].first_idx,
3309 loop_containing_stmt (stmt));
3310 if (new_op == NULL_TREE)
3312 gcc_assert (bb == last_bb);
3313 new_op = ops[bbinfo[idx].first_idx++]->op;
3315 if (bbinfo[idx].op != new_op)
3317 imm_use_iterator iter;
3318 use_operand_p use_p;
3319 gimple use_stmt, cast_stmt = NULL;
3321 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3322 if (is_gimple_debug (use_stmt))
3323 continue;
3324 else if (gimple_code (use_stmt) == GIMPLE_COND
3325 || gimple_code (use_stmt) == GIMPLE_PHI)
3326 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3327 SET_USE (use_p, new_op);
3328 else if (gimple_assign_cast_p (use_stmt))
3329 cast_stmt = use_stmt;
3330 else
3331 gcc_unreachable ();
3332 if (cast_stmt)
3334 gcc_assert (bb == last_bb);
3335 tree lhs = gimple_assign_lhs (cast_stmt);
3336 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3337 enum tree_code rhs_code
3338 = gimple_assign_rhs_code (cast_stmt);
3339 gassign *g;
3340 if (is_gimple_min_invariant (new_op))
3342 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3343 g = gimple_build_assign (new_lhs, new_op);
3345 else
3346 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3347 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3348 gimple_set_uid (g, gimple_uid (cast_stmt));
3349 gimple_set_visited (g, true);
3350 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3351 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3352 if (is_gimple_debug (use_stmt))
3353 continue;
3354 else if (gimple_code (use_stmt) == GIMPLE_COND
3355 || gimple_code (use_stmt) == GIMPLE_PHI)
3356 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3357 SET_USE (use_p, new_lhs);
3358 else
3359 gcc_unreachable ();
3363 if (bb == first_bb)
3364 break;
3366 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3368 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3369 && bbinfo[idx].op == NULL_TREE
3370 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3372 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3373 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3374 gimple_cond_make_false (cond_stmt);
3375 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3376 gimple_cond_make_true (cond_stmt);
3377 else
3379 gimple_cond_set_code (cond_stmt, NE_EXPR);
3380 gimple_cond_set_lhs (cond_stmt,
3381 ops[bbinfo[idx].first_idx]->op);
3382 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3384 update_stmt (cond_stmt);
3386 if (bb == first_bb)
3387 break;
3392 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3393 of STMT in it's operands. This is also known as a "destructive
3394 update" operation. */
3396 static bool
3397 is_phi_for_stmt (gimple stmt, tree operand)
3399 gimple def_stmt;
3400 gphi *def_phi;
3401 tree lhs;
3402 use_operand_p arg_p;
3403 ssa_op_iter i;
3405 if (TREE_CODE (operand) != SSA_NAME)
3406 return false;
3408 lhs = gimple_assign_lhs (stmt);
3410 def_stmt = SSA_NAME_DEF_STMT (operand);
3411 def_phi = dyn_cast <gphi *> (def_stmt);
3412 if (!def_phi)
3413 return false;
3415 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3416 if (lhs == USE_FROM_PTR (arg_p))
3417 return true;
3418 return false;
3421 /* Remove def stmt of VAR if VAR has zero uses and recurse
3422 on rhs1 operand if so. */
3424 static void
3425 remove_visited_stmt_chain (tree var)
3427 gimple stmt;
3428 gimple_stmt_iterator gsi;
3430 while (1)
3432 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3433 return;
3434 stmt = SSA_NAME_DEF_STMT (var);
3435 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3437 var = gimple_assign_rhs1 (stmt);
3438 gsi = gsi_for_stmt (stmt);
3439 reassoc_remove_stmt (&gsi);
3440 release_defs (stmt);
3442 else
3443 return;
3447 /* This function checks three consequtive operands in
3448 passed operands vector OPS starting from OPINDEX and
3449 swaps two operands if it is profitable for binary operation
3450 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3452 We pair ops with the same rank if possible.
3454 The alternative we try is to see if STMT is a destructive
3455 update style statement, which is like:
3456 b = phi (a, ...)
3457 a = c + b;
3458 In that case, we want to use the destructive update form to
3459 expose the possible vectorizer sum reduction opportunity.
3460 In that case, the third operand will be the phi node. This
3461 check is not performed if STMT is null.
3463 We could, of course, try to be better as noted above, and do a
3464 lot of work to try to find these opportunities in >3 operand
3465 cases, but it is unlikely to be worth it. */
3467 static void
3468 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3469 unsigned int opindex, gimple stmt)
3471 operand_entry_t oe1, oe2, oe3;
3473 oe1 = ops[opindex];
3474 oe2 = ops[opindex + 1];
3475 oe3 = ops[opindex + 2];
3477 if ((oe1->rank == oe2->rank
3478 && oe2->rank != oe3->rank)
3479 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3480 && !is_phi_for_stmt (stmt, oe1->op)
3481 && !is_phi_for_stmt (stmt, oe2->op)))
3483 struct operand_entry temp = *oe3;
3484 oe3->op = oe1->op;
3485 oe3->rank = oe1->rank;
3486 oe1->op = temp.op;
3487 oe1->rank= temp.rank;
3489 else if ((oe1->rank == oe3->rank
3490 && oe2->rank != oe3->rank)
3491 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3492 && !is_phi_for_stmt (stmt, oe1->op)
3493 && !is_phi_for_stmt (stmt, oe3->op)))
3495 struct operand_entry temp = *oe2;
3496 oe2->op = oe1->op;
3497 oe2->rank = oe1->rank;
3498 oe1->op = temp.op;
3499 oe1->rank = temp.rank;
3503 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3504 two definitions, otherwise return STMT. */
3506 static inline gimple
3507 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3509 if (TREE_CODE (rhs1) == SSA_NAME
3510 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3511 stmt = SSA_NAME_DEF_STMT (rhs1);
3512 if (TREE_CODE (rhs2) == SSA_NAME
3513 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3514 stmt = SSA_NAME_DEF_STMT (rhs2);
3515 return stmt;
3518 /* Recursively rewrite our linearized statements so that the operators
3519 match those in OPS[OPINDEX], putting the computation in rank
3520 order. Return new lhs. */
3522 static tree
3523 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3524 vec<operand_entry_t> ops, bool changed)
3526 tree rhs1 = gimple_assign_rhs1 (stmt);
3527 tree rhs2 = gimple_assign_rhs2 (stmt);
3528 tree lhs = gimple_assign_lhs (stmt);
3529 operand_entry_t oe;
3531 /* The final recursion case for this function is that you have
3532 exactly two operations left.
3533 If we had exactly one op in the entire list to start with, we
3534 would have never called this function, and the tail recursion
3535 rewrites them one at a time. */
3536 if (opindex + 2 == ops.length ())
3538 operand_entry_t oe1, oe2;
3540 oe1 = ops[opindex];
3541 oe2 = ops[opindex + 1];
3543 if (rhs1 != oe1->op || rhs2 != oe2->op)
3545 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3546 unsigned int uid = gimple_uid (stmt);
3548 if (dump_file && (dump_flags & TDF_DETAILS))
3550 fprintf (dump_file, "Transforming ");
3551 print_gimple_stmt (dump_file, stmt, 0, 0);
3554 /* Even when changed is false, reassociation could have e.g. removed
3555 some redundant operations, so unless we are just swapping the
3556 arguments or unless there is no change at all (then we just
3557 return lhs), force creation of a new SSA_NAME. */
3558 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3560 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3561 lhs = make_ssa_name (TREE_TYPE (lhs));
3562 stmt
3563 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3564 oe1->op, oe2->op);
3565 gimple_set_uid (stmt, uid);
3566 gimple_set_visited (stmt, true);
3567 if (insert_point == gsi_stmt (gsi))
3568 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3569 else
3570 insert_stmt_after (stmt, insert_point);
3572 else
3574 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3575 == stmt);
3576 gimple_assign_set_rhs1 (stmt, oe1->op);
3577 gimple_assign_set_rhs2 (stmt, oe2->op);
3578 update_stmt (stmt);
3581 if (rhs1 != oe1->op && rhs1 != oe2->op)
3582 remove_visited_stmt_chain (rhs1);
3584 if (dump_file && (dump_flags & TDF_DETAILS))
3586 fprintf (dump_file, " into ");
3587 print_gimple_stmt (dump_file, stmt, 0, 0);
3590 return lhs;
3593 /* If we hit here, we should have 3 or more ops left. */
3594 gcc_assert (opindex + 2 < ops.length ());
3596 /* Rewrite the next operator. */
3597 oe = ops[opindex];
3599 /* Recurse on the LHS of the binary operator, which is guaranteed to
3600 be the non-leaf side. */
3601 tree new_rhs1
3602 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3603 changed || oe->op != rhs2);
3605 if (oe->op != rhs2 || new_rhs1 != rhs1)
3607 if (dump_file && (dump_flags & TDF_DETAILS))
3609 fprintf (dump_file, "Transforming ");
3610 print_gimple_stmt (dump_file, stmt, 0, 0);
3613 /* If changed is false, this is either opindex == 0
3614 or all outer rhs2's were equal to corresponding oe->op,
3615 and powi_result is NULL.
3616 That means lhs is equivalent before and after reassociation.
3617 Otherwise ensure the old lhs SSA_NAME is not reused and
3618 create a new stmt as well, so that any debug stmts will be
3619 properly adjusted. */
3620 if (changed)
3622 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3623 unsigned int uid = gimple_uid (stmt);
3624 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3626 lhs = make_ssa_name (TREE_TYPE (lhs));
3627 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3628 new_rhs1, oe->op);
3629 gimple_set_uid (stmt, uid);
3630 gimple_set_visited (stmt, true);
3631 if (insert_point == gsi_stmt (gsi))
3632 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3633 else
3634 insert_stmt_after (stmt, insert_point);
3636 else
3638 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3639 == stmt);
3640 gimple_assign_set_rhs1 (stmt, new_rhs1);
3641 gimple_assign_set_rhs2 (stmt, oe->op);
3642 update_stmt (stmt);
3645 if (dump_file && (dump_flags & TDF_DETAILS))
3647 fprintf (dump_file, " into ");
3648 print_gimple_stmt (dump_file, stmt, 0, 0);
3651 return lhs;
3654 /* Find out how many cycles we need to compute statements chain.
3655 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3656 maximum number of independent statements we may execute per cycle. */
3658 static int
3659 get_required_cycles (int ops_num, int cpu_width)
3661 int res;
3662 int elog;
3663 unsigned int rest;
3665 /* While we have more than 2 * cpu_width operands
3666 we may reduce number of operands by cpu_width
3667 per cycle. */
3668 res = ops_num / (2 * cpu_width);
3670 /* Remained operands count may be reduced twice per cycle
3671 until we have only one operand. */
3672 rest = (unsigned)(ops_num - res * cpu_width);
3673 elog = exact_log2 (rest);
3674 if (elog >= 0)
3675 res += elog;
3676 else
3677 res += floor_log2 (rest) + 1;
3679 return res;
3682 /* Returns an optimal number of registers to use for computation of
3683 given statements. */
3685 static int
3686 get_reassociation_width (int ops_num, enum tree_code opc,
3687 machine_mode mode)
3689 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3690 int width;
3691 int width_min;
3692 int cycles_best;
3694 if (param_width > 0)
3695 width = param_width;
3696 else
3697 width = targetm.sched.reassociation_width (opc, mode);
3699 if (width == 1)
3700 return width;
3702 /* Get the minimal time required for sequence computation. */
3703 cycles_best = get_required_cycles (ops_num, width);
3705 /* Check if we may use less width and still compute sequence for
3706 the same time. It will allow us to reduce registers usage.
3707 get_required_cycles is monotonically increasing with lower width
3708 so we can perform a binary search for the minimal width that still
3709 results in the optimal cycle count. */
3710 width_min = 1;
3711 while (width > width_min)
3713 int width_mid = (width + width_min) / 2;
3715 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3716 width = width_mid;
3717 else if (width_min < width_mid)
3718 width_min = width_mid;
3719 else
3720 break;
3723 return width;
3726 /* Recursively rewrite our linearized statements so that the operators
3727 match those in OPS[OPINDEX], putting the computation in rank
3728 order and trying to allow operations to be executed in
3729 parallel. */
3731 static void
3732 rewrite_expr_tree_parallel (gassign *stmt, int width,
3733 vec<operand_entry_t> ops)
3735 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3736 int op_num = ops.length ();
3737 int stmt_num = op_num - 1;
3738 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3739 int op_index = op_num - 1;
3740 int stmt_index = 0;
3741 int ready_stmts_end = 0;
3742 int i = 0;
3743 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3745 /* We start expression rewriting from the top statements.
3746 So, in this loop we create a full list of statements
3747 we will work with. */
3748 stmts[stmt_num - 1] = stmt;
3749 for (i = stmt_num - 2; i >= 0; i--)
3750 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3752 for (i = 0; i < stmt_num; i++)
3754 tree op1, op2;
3756 /* Determine whether we should use results of
3757 already handled statements or not. */
3758 if (ready_stmts_end == 0
3759 && (i - stmt_index >= width || op_index < 1))
3760 ready_stmts_end = i;
3762 /* Now we choose operands for the next statement. Non zero
3763 value in ready_stmts_end means here that we should use
3764 the result of already generated statements as new operand. */
3765 if (ready_stmts_end > 0)
3767 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3768 if (ready_stmts_end > stmt_index)
3769 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3770 else if (op_index >= 0)
3771 op2 = ops[op_index--]->op;
3772 else
3774 gcc_assert (stmt_index < i);
3775 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3778 if (stmt_index >= ready_stmts_end)
3779 ready_stmts_end = 0;
3781 else
3783 if (op_index > 1)
3784 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3785 op2 = ops[op_index--]->op;
3786 op1 = ops[op_index--]->op;
3789 /* If we emit the last statement then we should put
3790 operands into the last statement. It will also
3791 break the loop. */
3792 if (op_index < 0 && stmt_index == i)
3793 i = stmt_num - 1;
3795 if (dump_file && (dump_flags & TDF_DETAILS))
3797 fprintf (dump_file, "Transforming ");
3798 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3801 /* We keep original statement only for the last one. All
3802 others are recreated. */
3803 if (i == stmt_num - 1)
3805 gimple_assign_set_rhs1 (stmts[i], op1);
3806 gimple_assign_set_rhs2 (stmts[i], op2);
3807 update_stmt (stmts[i]);
3809 else
3810 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3812 if (dump_file && (dump_flags & TDF_DETAILS))
3814 fprintf (dump_file, " into ");
3815 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3819 remove_visited_stmt_chain (last_rhs1);
3822 /* Transform STMT, which is really (A +B) + (C + D) into the left
3823 linear form, ((A+B)+C)+D.
3824 Recurse on D if necessary. */
3826 static void
3827 linearize_expr (gimple stmt)
3829 gimple_stmt_iterator gsi;
3830 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3831 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3832 gimple oldbinrhs = binrhs;
3833 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3834 gimple newbinrhs = NULL;
3835 struct loop *loop = loop_containing_stmt (stmt);
3836 tree lhs = gimple_assign_lhs (stmt);
3838 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3839 && is_reassociable_op (binrhs, rhscode, loop));
3841 gsi = gsi_for_stmt (stmt);
3843 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3844 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3845 gimple_assign_rhs_code (binrhs),
3846 gimple_assign_lhs (binlhs),
3847 gimple_assign_rhs2 (binrhs));
3848 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3849 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3850 gimple_set_uid (binrhs, gimple_uid (stmt));
3852 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3853 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3855 if (dump_file && (dump_flags & TDF_DETAILS))
3857 fprintf (dump_file, "Linearized: ");
3858 print_gimple_stmt (dump_file, stmt, 0, 0);
3861 reassociate_stats.linearized++;
3862 update_stmt (stmt);
3864 gsi = gsi_for_stmt (oldbinrhs);
3865 reassoc_remove_stmt (&gsi);
3866 release_defs (oldbinrhs);
3868 gimple_set_visited (stmt, true);
3869 gimple_set_visited (binlhs, true);
3870 gimple_set_visited (binrhs, true);
3872 /* Tail recurse on the new rhs if it still needs reassociation. */
3873 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3874 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3875 want to change the algorithm while converting to tuples. */
3876 linearize_expr (stmt);
3879 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3880 it. Otherwise, return NULL. */
3882 static gimple
3883 get_single_immediate_use (tree lhs)
3885 use_operand_p immuse;
3886 gimple immusestmt;
3888 if (TREE_CODE (lhs) == SSA_NAME
3889 && single_imm_use (lhs, &immuse, &immusestmt)
3890 && is_gimple_assign (immusestmt))
3891 return immusestmt;
3893 return NULL;
3896 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3897 representing the negated value. Insertions of any necessary
3898 instructions go before GSI.
3899 This function is recursive in that, if you hand it "a_5" as the
3900 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3901 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3903 static tree
3904 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3906 gimple negatedefstmt = NULL;
3907 tree resultofnegate;
3908 gimple_stmt_iterator gsi;
3909 unsigned int uid;
3911 /* If we are trying to negate a name, defined by an add, negate the
3912 add operands instead. */
3913 if (TREE_CODE (tonegate) == SSA_NAME)
3914 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3915 if (TREE_CODE (tonegate) == SSA_NAME
3916 && is_gimple_assign (negatedefstmt)
3917 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3918 && has_single_use (gimple_assign_lhs (negatedefstmt))
3919 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3921 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3922 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3923 tree lhs = gimple_assign_lhs (negatedefstmt);
3924 gimple g;
3926 gsi = gsi_for_stmt (negatedefstmt);
3927 rhs1 = negate_value (rhs1, &gsi);
3929 gsi = gsi_for_stmt (negatedefstmt);
3930 rhs2 = negate_value (rhs2, &gsi);
3932 gsi = gsi_for_stmt (negatedefstmt);
3933 lhs = make_ssa_name (TREE_TYPE (lhs));
3934 gimple_set_visited (negatedefstmt, true);
3935 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3936 gimple_set_uid (g, gimple_uid (negatedefstmt));
3937 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3938 return lhs;
3941 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3942 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3943 NULL_TREE, true, GSI_SAME_STMT);
3944 gsi = *gsip;
3945 uid = gimple_uid (gsi_stmt (gsi));
3946 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3948 gimple stmt = gsi_stmt (gsi);
3949 if (gimple_uid (stmt) != 0)
3950 break;
3951 gimple_set_uid (stmt, uid);
3953 return resultofnegate;
3956 /* Return true if we should break up the subtract in STMT into an add
3957 with negate. This is true when we the subtract operands are really
3958 adds, or the subtract itself is used in an add expression. In
3959 either case, breaking up the subtract into an add with negate
3960 exposes the adds to reassociation. */
3962 static bool
3963 should_break_up_subtract (gimple stmt)
3965 tree lhs = gimple_assign_lhs (stmt);
3966 tree binlhs = gimple_assign_rhs1 (stmt);
3967 tree binrhs = gimple_assign_rhs2 (stmt);
3968 gimple immusestmt;
3969 struct loop *loop = loop_containing_stmt (stmt);
3971 if (TREE_CODE (binlhs) == SSA_NAME
3972 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3973 return true;
3975 if (TREE_CODE (binrhs) == SSA_NAME
3976 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3977 return true;
3979 if (TREE_CODE (lhs) == SSA_NAME
3980 && (immusestmt = get_single_immediate_use (lhs))
3981 && is_gimple_assign (immusestmt)
3982 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3983 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3984 return true;
3985 return false;
3988 /* Transform STMT from A - B into A + -B. */
3990 static void
3991 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3993 tree rhs1 = gimple_assign_rhs1 (stmt);
3994 tree rhs2 = gimple_assign_rhs2 (stmt);
3996 if (dump_file && (dump_flags & TDF_DETAILS))
3998 fprintf (dump_file, "Breaking up subtract ");
3999 print_gimple_stmt (dump_file, stmt, 0, 0);
4002 rhs2 = negate_value (rhs2, gsip);
4003 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4004 update_stmt (stmt);
4007 /* Determine whether STMT is a builtin call that raises an SSA name
4008 to an integer power and has only one use. If so, and this is early
4009 reassociation and unsafe math optimizations are permitted, place
4010 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4011 If any of these conditions does not hold, return FALSE. */
4013 static bool
4014 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
4016 tree fndecl, arg1;
4017 REAL_VALUE_TYPE c, cint;
4019 if (!first_pass_instance
4020 || !flag_unsafe_math_optimizations
4021 || !is_gimple_call (stmt)
4022 || !has_single_use (gimple_call_lhs (stmt)))
4023 return false;
4025 fndecl = gimple_call_fndecl (stmt);
4027 if (!fndecl
4028 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
4029 return false;
4031 switch (DECL_FUNCTION_CODE (fndecl))
4033 CASE_FLT_FN (BUILT_IN_POW):
4034 if (flag_errno_math)
4035 return false;
4037 *base = gimple_call_arg (stmt, 0);
4038 arg1 = gimple_call_arg (stmt, 1);
4040 if (TREE_CODE (arg1) != REAL_CST)
4041 return false;
4043 c = TREE_REAL_CST (arg1);
4045 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4046 return false;
4048 *exponent = real_to_integer (&c);
4049 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4050 if (!real_identical (&c, &cint))
4051 return false;
4053 break;
4055 CASE_FLT_FN (BUILT_IN_POWI):
4056 *base = gimple_call_arg (stmt, 0);
4057 arg1 = gimple_call_arg (stmt, 1);
4059 if (!tree_fits_shwi_p (arg1))
4060 return false;
4062 *exponent = tree_to_shwi (arg1);
4063 break;
4065 default:
4066 return false;
4069 /* Expanding negative exponents is generally unproductive, so we don't
4070 complicate matters with those. Exponents of zero and one should
4071 have been handled by expression folding. */
4072 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4073 return false;
4075 return true;
4078 /* Recursively linearize a binary expression that is the RHS of STMT.
4079 Place the operands of the expression tree in the vector named OPS. */
4081 static void
4082 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4083 bool is_associative, bool set_visited)
4085 tree binlhs = gimple_assign_rhs1 (stmt);
4086 tree binrhs = gimple_assign_rhs2 (stmt);
4087 gimple binlhsdef = NULL, binrhsdef = NULL;
4088 bool binlhsisreassoc = false;
4089 bool binrhsisreassoc = false;
4090 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4091 struct loop *loop = loop_containing_stmt (stmt);
4092 tree base = NULL_TREE;
4093 HOST_WIDE_INT exponent = 0;
4095 if (set_visited)
4096 gimple_set_visited (stmt, true);
4098 if (TREE_CODE (binlhs) == SSA_NAME)
4100 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4101 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4102 && !stmt_could_throw_p (binlhsdef));
4105 if (TREE_CODE (binrhs) == SSA_NAME)
4107 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4108 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4109 && !stmt_could_throw_p (binrhsdef));
4112 /* If the LHS is not reassociable, but the RHS is, we need to swap
4113 them. If neither is reassociable, there is nothing we can do, so
4114 just put them in the ops vector. If the LHS is reassociable,
4115 linearize it. If both are reassociable, then linearize the RHS
4116 and the LHS. */
4118 if (!binlhsisreassoc)
4120 tree temp;
4122 /* If this is not a associative operation like division, give up. */
4123 if (!is_associative)
4125 add_to_ops_vec (ops, binrhs);
4126 return;
4129 if (!binrhsisreassoc)
4131 if (rhscode == MULT_EXPR
4132 && TREE_CODE (binrhs) == SSA_NAME
4133 && acceptable_pow_call (binrhsdef, &base, &exponent))
4135 add_repeat_to_ops_vec (ops, base, exponent);
4136 gimple_set_visited (binrhsdef, true);
4138 else
4139 add_to_ops_vec (ops, binrhs);
4141 if (rhscode == MULT_EXPR
4142 && TREE_CODE (binlhs) == SSA_NAME
4143 && acceptable_pow_call (binlhsdef, &base, &exponent))
4145 add_repeat_to_ops_vec (ops, base, exponent);
4146 gimple_set_visited (binlhsdef, true);
4148 else
4149 add_to_ops_vec (ops, binlhs);
4151 return;
4154 if (dump_file && (dump_flags & TDF_DETAILS))
4156 fprintf (dump_file, "swapping operands of ");
4157 print_gimple_stmt (dump_file, stmt, 0, 0);
4160 swap_ssa_operands (stmt,
4161 gimple_assign_rhs1_ptr (stmt),
4162 gimple_assign_rhs2_ptr (stmt));
4163 update_stmt (stmt);
4165 if (dump_file && (dump_flags & TDF_DETAILS))
4167 fprintf (dump_file, " is now ");
4168 print_gimple_stmt (dump_file, stmt, 0, 0);
4171 /* We want to make it so the lhs is always the reassociative op,
4172 so swap. */
4173 temp = binlhs;
4174 binlhs = binrhs;
4175 binrhs = temp;
4177 else if (binrhsisreassoc)
4179 linearize_expr (stmt);
4180 binlhs = gimple_assign_rhs1 (stmt);
4181 binrhs = gimple_assign_rhs2 (stmt);
4184 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4185 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4186 rhscode, loop));
4187 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4188 is_associative, set_visited);
4190 if (rhscode == MULT_EXPR
4191 && TREE_CODE (binrhs) == SSA_NAME
4192 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4194 add_repeat_to_ops_vec (ops, base, exponent);
4195 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4197 else
4198 add_to_ops_vec (ops, binrhs);
4201 /* Repropagate the negates back into subtracts, since no other pass
4202 currently does it. */
4204 static void
4205 repropagate_negates (void)
4207 unsigned int i = 0;
4208 tree negate;
4210 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4212 gimple user = get_single_immediate_use (negate);
4214 if (!user || !is_gimple_assign (user))
4215 continue;
4217 /* The negate operand can be either operand of a PLUS_EXPR
4218 (it can be the LHS if the RHS is a constant for example).
4220 Force the negate operand to the RHS of the PLUS_EXPR, then
4221 transform the PLUS_EXPR into a MINUS_EXPR. */
4222 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4224 /* If the negated operand appears on the LHS of the
4225 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4226 to force the negated operand to the RHS of the PLUS_EXPR. */
4227 if (gimple_assign_rhs1 (user) == negate)
4229 swap_ssa_operands (user,
4230 gimple_assign_rhs1_ptr (user),
4231 gimple_assign_rhs2_ptr (user));
4234 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4235 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4236 if (gimple_assign_rhs2 (user) == negate)
4238 tree rhs1 = gimple_assign_rhs1 (user);
4239 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4240 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4241 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4242 update_stmt (user);
4245 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4247 if (gimple_assign_rhs1 (user) == negate)
4249 /* We have
4250 x = -a
4251 y = x - b
4252 which we transform into
4253 x = a + b
4254 y = -x .
4255 This pushes down the negate which we possibly can merge
4256 into some other operation, hence insert it into the
4257 plus_negates vector. */
4258 gimple feed = SSA_NAME_DEF_STMT (negate);
4259 tree a = gimple_assign_rhs1 (feed);
4260 tree b = gimple_assign_rhs2 (user);
4261 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4262 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4263 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4264 gimple g = gimple_build_assign (x, PLUS_EXPR, a, b);
4265 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4266 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4267 user = gsi_stmt (gsi2);
4268 update_stmt (user);
4269 reassoc_remove_stmt (&gsi);
4270 release_defs (feed);
4271 plus_negates.safe_push (gimple_assign_lhs (user));
4273 else
4275 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4276 rid of one operation. */
4277 gimple feed = SSA_NAME_DEF_STMT (negate);
4278 tree a = gimple_assign_rhs1 (feed);
4279 tree rhs1 = gimple_assign_rhs1 (user);
4280 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4281 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4282 update_stmt (gsi_stmt (gsi));
4288 /* Returns true if OP is of a type for which we can do reassociation.
4289 That is for integral or non-saturating fixed-point types, and for
4290 floating point type when associative-math is enabled. */
4292 static bool
4293 can_reassociate_p (tree op)
4295 tree type = TREE_TYPE (op);
4296 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4297 || NON_SAT_FIXED_POINT_TYPE_P (type)
4298 || (flag_associative_math && FLOAT_TYPE_P (type)))
4299 return true;
4300 return false;
4303 /* Break up subtract operations in block BB.
4305 We do this top down because we don't know whether the subtract is
4306 part of a possible chain of reassociation except at the top.
4308 IE given
4309 d = f + g
4310 c = a + e
4311 b = c - d
4312 q = b - r
4313 k = t - q
4315 we want to break up k = t - q, but we won't until we've transformed q
4316 = b - r, which won't be broken up until we transform b = c - d.
4318 En passant, clear the GIMPLE visited flag on every statement
4319 and set UIDs within each basic block. */
4321 static void
4322 break_up_subtract_bb (basic_block bb)
4324 gimple_stmt_iterator gsi;
4325 basic_block son;
4326 unsigned int uid = 1;
4328 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4330 gimple stmt = gsi_stmt (gsi);
4331 gimple_set_visited (stmt, false);
4332 gimple_set_uid (stmt, uid++);
4334 if (!is_gimple_assign (stmt)
4335 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4336 continue;
4338 /* Look for simple gimple subtract operations. */
4339 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4341 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4342 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4343 continue;
4345 /* Check for a subtract used only in an addition. If this
4346 is the case, transform it into add of a negate for better
4347 reassociation. IE transform C = A-B into C = A + -B if C
4348 is only used in an addition. */
4349 if (should_break_up_subtract (stmt))
4350 break_up_subtract (stmt, &gsi);
4352 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4353 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4354 plus_negates.safe_push (gimple_assign_lhs (stmt));
4356 for (son = first_dom_son (CDI_DOMINATORS, bb);
4357 son;
4358 son = next_dom_son (CDI_DOMINATORS, son))
4359 break_up_subtract_bb (son);
4362 /* Used for repeated factor analysis. */
4363 struct repeat_factor_d
4365 /* An SSA name that occurs in a multiply chain. */
4366 tree factor;
4368 /* Cached rank of the factor. */
4369 unsigned rank;
4371 /* Number of occurrences of the factor in the chain. */
4372 HOST_WIDE_INT count;
4374 /* An SSA name representing the product of this factor and
4375 all factors appearing later in the repeated factor vector. */
4376 tree repr;
4379 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4380 typedef const struct repeat_factor_d *const_repeat_factor_t;
4383 static vec<repeat_factor> repeat_factor_vec;
4385 /* Used for sorting the repeat factor vector. Sort primarily by
4386 ascending occurrence count, secondarily by descending rank. */
4388 static int
4389 compare_repeat_factors (const void *x1, const void *x2)
4391 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4392 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4394 if (rf1->count != rf2->count)
4395 return rf1->count - rf2->count;
4397 return rf2->rank - rf1->rank;
4400 /* Look for repeated operands in OPS in the multiply tree rooted at
4401 STMT. Replace them with an optimal sequence of multiplies and powi
4402 builtin calls, and remove the used operands from OPS. Return an
4403 SSA name representing the value of the replacement sequence. */
4405 static tree
4406 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4408 unsigned i, j, vec_len;
4409 int ii;
4410 operand_entry_t oe;
4411 repeat_factor_t rf1, rf2;
4412 repeat_factor rfnew;
4413 tree result = NULL_TREE;
4414 tree target_ssa, iter_result;
4415 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4416 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4417 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4418 gimple mul_stmt, pow_stmt;
4420 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4421 target. */
4422 if (!powi_fndecl)
4423 return NULL_TREE;
4425 /* Allocate the repeated factor vector. */
4426 repeat_factor_vec.create (10);
4428 /* Scan the OPS vector for all SSA names in the product and build
4429 up a vector of occurrence counts for each factor. */
4430 FOR_EACH_VEC_ELT (*ops, i, oe)
4432 if (TREE_CODE (oe->op) == SSA_NAME)
4434 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4436 if (rf1->factor == oe->op)
4438 rf1->count += oe->count;
4439 break;
4443 if (j >= repeat_factor_vec.length ())
4445 rfnew.factor = oe->op;
4446 rfnew.rank = oe->rank;
4447 rfnew.count = oe->count;
4448 rfnew.repr = NULL_TREE;
4449 repeat_factor_vec.safe_push (rfnew);
4454 /* Sort the repeated factor vector by (a) increasing occurrence count,
4455 and (b) decreasing rank. */
4456 repeat_factor_vec.qsort (compare_repeat_factors);
4458 /* It is generally best to combine as many base factors as possible
4459 into a product before applying __builtin_powi to the result.
4460 However, the sort order chosen for the repeated factor vector
4461 allows us to cache partial results for the product of the base
4462 factors for subsequent use. When we already have a cached partial
4463 result from a previous iteration, it is best to make use of it
4464 before looking for another __builtin_pow opportunity.
4466 As an example, consider x * x * y * y * y * z * z * z * z.
4467 We want to first compose the product x * y * z, raise it to the
4468 second power, then multiply this by y * z, and finally multiply
4469 by z. This can be done in 5 multiplies provided we cache y * z
4470 for use in both expressions:
4472 t1 = y * z
4473 t2 = t1 * x
4474 t3 = t2 * t2
4475 t4 = t1 * t3
4476 result = t4 * z
4478 If we instead ignored the cached y * z and first multiplied by
4479 the __builtin_pow opportunity z * z, we would get the inferior:
4481 t1 = y * z
4482 t2 = t1 * x
4483 t3 = t2 * t2
4484 t4 = z * z
4485 t5 = t3 * t4
4486 result = t5 * y */
4488 vec_len = repeat_factor_vec.length ();
4490 /* Repeatedly look for opportunities to create a builtin_powi call. */
4491 while (true)
4493 HOST_WIDE_INT power;
4495 /* First look for the largest cached product of factors from
4496 preceding iterations. If found, create a builtin_powi for
4497 it if the minimum occurrence count for its factors is at
4498 least 2, or just use this cached product as our next
4499 multiplicand if the minimum occurrence count is 1. */
4500 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4502 if (rf1->repr && rf1->count > 0)
4503 break;
4506 if (j < vec_len)
4508 power = rf1->count;
4510 if (power == 1)
4512 iter_result = rf1->repr;
4514 if (dump_file && (dump_flags & TDF_DETAILS))
4516 unsigned elt;
4517 repeat_factor_t rf;
4518 fputs ("Multiplying by cached product ", dump_file);
4519 for (elt = j; elt < vec_len; elt++)
4521 rf = &repeat_factor_vec[elt];
4522 print_generic_expr (dump_file, rf->factor, 0);
4523 if (elt < vec_len - 1)
4524 fputs (" * ", dump_file);
4526 fputs ("\n", dump_file);
4529 else
4531 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4532 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4533 build_int_cst (integer_type_node,
4534 power));
4535 gimple_call_set_lhs (pow_stmt, iter_result);
4536 gimple_set_location (pow_stmt, gimple_location (stmt));
4537 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4539 if (dump_file && (dump_flags & TDF_DETAILS))
4541 unsigned elt;
4542 repeat_factor_t rf;
4543 fputs ("Building __builtin_pow call for cached product (",
4544 dump_file);
4545 for (elt = j; elt < vec_len; elt++)
4547 rf = &repeat_factor_vec[elt];
4548 print_generic_expr (dump_file, rf->factor, 0);
4549 if (elt < vec_len - 1)
4550 fputs (" * ", dump_file);
4552 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4553 power);
4557 else
4559 /* Otherwise, find the first factor in the repeated factor
4560 vector whose occurrence count is at least 2. If no such
4561 factor exists, there are no builtin_powi opportunities
4562 remaining. */
4563 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4565 if (rf1->count >= 2)
4566 break;
4569 if (j >= vec_len)
4570 break;
4572 power = rf1->count;
4574 if (dump_file && (dump_flags & TDF_DETAILS))
4576 unsigned elt;
4577 repeat_factor_t rf;
4578 fputs ("Building __builtin_pow call for (", dump_file);
4579 for (elt = j; elt < vec_len; elt++)
4581 rf = &repeat_factor_vec[elt];
4582 print_generic_expr (dump_file, rf->factor, 0);
4583 if (elt < vec_len - 1)
4584 fputs (" * ", dump_file);
4586 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4589 reassociate_stats.pows_created++;
4591 /* Visit each element of the vector in reverse order (so that
4592 high-occurrence elements are visited first, and within the
4593 same occurrence count, lower-ranked elements are visited
4594 first). Form a linear product of all elements in this order
4595 whose occurrencce count is at least that of element J.
4596 Record the SSA name representing the product of each element
4597 with all subsequent elements in the vector. */
4598 if (j == vec_len - 1)
4599 rf1->repr = rf1->factor;
4600 else
4602 for (ii = vec_len - 2; ii >= (int)j; ii--)
4604 tree op1, op2;
4606 rf1 = &repeat_factor_vec[ii];
4607 rf2 = &repeat_factor_vec[ii + 1];
4609 /* Init the last factor's representative to be itself. */
4610 if (!rf2->repr)
4611 rf2->repr = rf2->factor;
4613 op1 = rf1->factor;
4614 op2 = rf2->repr;
4616 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4617 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4618 op1, op2);
4619 gimple_set_location (mul_stmt, gimple_location (stmt));
4620 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4621 rf1->repr = target_ssa;
4623 /* Don't reprocess the multiply we just introduced. */
4624 gimple_set_visited (mul_stmt, true);
4628 /* Form a call to __builtin_powi for the maximum product
4629 just formed, raised to the power obtained earlier. */
4630 rf1 = &repeat_factor_vec[j];
4631 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4632 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4633 build_int_cst (integer_type_node,
4634 power));
4635 gimple_call_set_lhs (pow_stmt, iter_result);
4636 gimple_set_location (pow_stmt, gimple_location (stmt));
4637 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4640 /* If we previously formed at least one other builtin_powi call,
4641 form the product of this one and those others. */
4642 if (result)
4644 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4645 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4646 result, iter_result);
4647 gimple_set_location (mul_stmt, gimple_location (stmt));
4648 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4649 gimple_set_visited (mul_stmt, true);
4650 result = new_result;
4652 else
4653 result = iter_result;
4655 /* Decrement the occurrence count of each element in the product
4656 by the count found above, and remove this many copies of each
4657 factor from OPS. */
4658 for (i = j; i < vec_len; i++)
4660 unsigned k = power;
4661 unsigned n;
4663 rf1 = &repeat_factor_vec[i];
4664 rf1->count -= power;
4666 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4668 if (oe->op == rf1->factor)
4670 if (oe->count <= k)
4672 ops->ordered_remove (n);
4673 k -= oe->count;
4675 if (k == 0)
4676 break;
4678 else
4680 oe->count -= k;
4681 break;
4688 /* At this point all elements in the repeated factor vector have a
4689 remaining occurrence count of 0 or 1, and those with a count of 1
4690 don't have cached representatives. Re-sort the ops vector and
4691 clean up. */
4692 ops->qsort (sort_by_operand_rank);
4693 repeat_factor_vec.release ();
4695 /* Return the final product computed herein. Note that there may
4696 still be some elements with single occurrence count left in OPS;
4697 those will be handled by the normal reassociation logic. */
4698 return result;
4701 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4703 static void
4704 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4706 tree rhs1;
4708 if (dump_file && (dump_flags & TDF_DETAILS))
4710 fprintf (dump_file, "Transforming ");
4711 print_gimple_stmt (dump_file, stmt, 0, 0);
4714 rhs1 = gimple_assign_rhs1 (stmt);
4715 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4716 update_stmt (stmt);
4717 remove_visited_stmt_chain (rhs1);
4719 if (dump_file && (dump_flags & TDF_DETAILS))
4721 fprintf (dump_file, " into ");
4722 print_gimple_stmt (dump_file, stmt, 0, 0);
4726 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4728 static void
4729 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4730 tree rhs1, tree rhs2)
4732 if (dump_file && (dump_flags & TDF_DETAILS))
4734 fprintf (dump_file, "Transforming ");
4735 print_gimple_stmt (dump_file, stmt, 0, 0);
4738 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4739 update_stmt (gsi_stmt (*gsi));
4740 remove_visited_stmt_chain (rhs1);
4742 if (dump_file && (dump_flags & TDF_DETAILS))
4744 fprintf (dump_file, " into ");
4745 print_gimple_stmt (dump_file, stmt, 0, 0);
4749 /* Reassociate expressions in basic block BB and its post-dominator as
4750 children. */
4752 static void
4753 reassociate_bb (basic_block bb)
4755 gimple_stmt_iterator gsi;
4756 basic_block son;
4757 gimple stmt = last_stmt (bb);
4759 if (stmt && !gimple_visited_p (stmt))
4760 maybe_optimize_range_tests (stmt);
4762 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4764 stmt = gsi_stmt (gsi);
4766 if (is_gimple_assign (stmt)
4767 && !stmt_could_throw_p (stmt))
4769 tree lhs, rhs1, rhs2;
4770 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4772 /* If this is not a gimple binary expression, there is
4773 nothing for us to do with it. */
4774 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4775 continue;
4777 /* If this was part of an already processed statement,
4778 we don't need to touch it again. */
4779 if (gimple_visited_p (stmt))
4781 /* This statement might have become dead because of previous
4782 reassociations. */
4783 if (has_zero_uses (gimple_get_lhs (stmt)))
4785 reassoc_remove_stmt (&gsi);
4786 release_defs (stmt);
4787 /* We might end up removing the last stmt above which
4788 places the iterator to the end of the sequence.
4789 Reset it to the last stmt in this case which might
4790 be the end of the sequence as well if we removed
4791 the last statement of the sequence. In which case
4792 we need to bail out. */
4793 if (gsi_end_p (gsi))
4795 gsi = gsi_last_bb (bb);
4796 if (gsi_end_p (gsi))
4797 break;
4800 continue;
4803 lhs = gimple_assign_lhs (stmt);
4804 rhs1 = gimple_assign_rhs1 (stmt);
4805 rhs2 = gimple_assign_rhs2 (stmt);
4807 /* For non-bit or min/max operations we can't associate
4808 all types. Verify that here. */
4809 if (rhs_code != BIT_IOR_EXPR
4810 && rhs_code != BIT_AND_EXPR
4811 && rhs_code != BIT_XOR_EXPR
4812 && rhs_code != MIN_EXPR
4813 && rhs_code != MAX_EXPR
4814 && (!can_reassociate_p (lhs)
4815 || !can_reassociate_p (rhs1)
4816 || !can_reassociate_p (rhs2)))
4817 continue;
4819 if (associative_tree_code (rhs_code))
4821 auto_vec<operand_entry_t> ops;
4822 tree powi_result = NULL_TREE;
4824 /* There may be no immediate uses left by the time we
4825 get here because we may have eliminated them all. */
4826 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4827 continue;
4829 gimple_set_visited (stmt, true);
4830 linearize_expr_tree (&ops, stmt, true, true);
4831 ops.qsort (sort_by_operand_rank);
4832 optimize_ops_list (rhs_code, &ops);
4833 if (undistribute_ops_list (rhs_code, &ops,
4834 loop_containing_stmt (stmt)))
4836 ops.qsort (sort_by_operand_rank);
4837 optimize_ops_list (rhs_code, &ops);
4840 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4841 optimize_range_tests (rhs_code, &ops);
4843 if (first_pass_instance
4844 && rhs_code == MULT_EXPR
4845 && flag_unsafe_math_optimizations)
4846 powi_result = attempt_builtin_powi (stmt, &ops);
4848 /* If the operand vector is now empty, all operands were
4849 consumed by the __builtin_powi optimization. */
4850 if (ops.length () == 0)
4851 transform_stmt_to_copy (&gsi, stmt, powi_result);
4852 else if (ops.length () == 1)
4854 tree last_op = ops.last ()->op;
4856 if (powi_result)
4857 transform_stmt_to_multiply (&gsi, stmt, last_op,
4858 powi_result);
4859 else
4860 transform_stmt_to_copy (&gsi, stmt, last_op);
4862 else
4864 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4865 int ops_num = ops.length ();
4866 int width = get_reassociation_width (ops_num, rhs_code, mode);
4867 tree new_lhs = lhs;
4869 if (dump_file && (dump_flags & TDF_DETAILS))
4870 fprintf (dump_file,
4871 "Width = %d was chosen for reassociation\n", width);
4873 if (width > 1
4874 && ops.length () > 3)
4875 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4876 width, ops);
4877 else
4879 /* When there are three operands left, we want
4880 to make sure the ones that get the double
4881 binary op are chosen wisely. */
4882 int len = ops.length ();
4883 if (len >= 3)
4884 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4886 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4887 powi_result != NULL);
4890 /* If we combined some repeated factors into a
4891 __builtin_powi call, multiply that result by the
4892 reassociated operands. */
4893 if (powi_result)
4895 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4896 tree type = TREE_TYPE (lhs);
4897 tree target_ssa = make_temp_ssa_name (type, NULL,
4898 "reassocpow");
4899 gimple_set_lhs (lhs_stmt, target_ssa);
4900 update_stmt (lhs_stmt);
4901 if (lhs != new_lhs)
4902 target_ssa = new_lhs;
4903 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4904 powi_result, target_ssa);
4905 gimple_set_location (mul_stmt, gimple_location (stmt));
4906 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4912 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4913 son;
4914 son = next_dom_son (CDI_POST_DOMINATORS, son))
4915 reassociate_bb (son);
4918 /* Add jumps around shifts for range tests turned into bit tests.
4919 For each SSA_NAME VAR we have code like:
4920 VAR = ...; // final stmt of range comparison
4921 // bit test here...;
4922 OTHERVAR = ...; // final stmt of the bit test sequence
4923 RES = VAR | OTHERVAR;
4924 Turn the above into:
4925 VAR = ...;
4926 if (VAR != 0)
4927 goto <l3>;
4928 else
4929 goto <l2>;
4930 <l2>:
4931 // bit test here...;
4932 OTHERVAR = ...;
4933 <l3>:
4934 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4936 static void
4937 branch_fixup (void)
4939 tree var;
4940 unsigned int i;
4942 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4944 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4945 gimple use_stmt;
4946 use_operand_p use;
4947 bool ok = single_imm_use (var, &use, &use_stmt);
4948 gcc_assert (ok
4949 && is_gimple_assign (use_stmt)
4950 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4951 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4953 basic_block cond_bb = gimple_bb (def_stmt);
4954 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4955 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4957 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4958 gimple g = gimple_build_cond (NE_EXPR, var,
4959 build_zero_cst (TREE_TYPE (var)),
4960 NULL_TREE, NULL_TREE);
4961 location_t loc = gimple_location (use_stmt);
4962 gimple_set_location (g, loc);
4963 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4965 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4966 etrue->probability = REG_BR_PROB_BASE / 2;
4967 etrue->count = cond_bb->count / 2;
4968 edge efalse = find_edge (cond_bb, then_bb);
4969 efalse->flags = EDGE_FALSE_VALUE;
4970 efalse->probability -= etrue->probability;
4971 efalse->count -= etrue->count;
4972 then_bb->count -= etrue->count;
4974 tree othervar = NULL_TREE;
4975 if (gimple_assign_rhs1 (use_stmt) == var)
4976 othervar = gimple_assign_rhs2 (use_stmt);
4977 else if (gimple_assign_rhs2 (use_stmt) == var)
4978 othervar = gimple_assign_rhs1 (use_stmt);
4979 else
4980 gcc_unreachable ();
4981 tree lhs = gimple_assign_lhs (use_stmt);
4982 gphi *phi = create_phi_node (lhs, merge_bb);
4983 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4984 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4985 gsi = gsi_for_stmt (use_stmt);
4986 gsi_remove (&gsi, true);
4988 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4989 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4991 reassoc_branch_fixups.release ();
4994 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4995 void debug_ops_vector (vec<operand_entry_t> ops);
4997 /* Dump the operand entry vector OPS to FILE. */
4999 void
5000 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
5002 operand_entry_t oe;
5003 unsigned int i;
5005 FOR_EACH_VEC_ELT (ops, i, oe)
5007 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5008 print_generic_expr (file, oe->op, 0);
5012 /* Dump the operand entry vector OPS to STDERR. */
5014 DEBUG_FUNCTION void
5015 debug_ops_vector (vec<operand_entry_t> ops)
5017 dump_ops_vector (stderr, ops);
5020 static void
5021 do_reassoc (void)
5023 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5024 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5027 /* Initialize the reassociation pass. */
5029 static void
5030 init_reassoc (void)
5032 int i;
5033 long rank = 2;
5034 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5036 /* Find the loops, so that we can prevent moving calculations in
5037 them. */
5038 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5040 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5042 operand_entry_pool = create_alloc_pool ("operand entry pool",
5043 sizeof (struct operand_entry), 30);
5044 next_operand_entry_id = 0;
5046 /* Reverse RPO (Reverse Post Order) will give us something where
5047 deeper loops come later. */
5048 pre_and_rev_post_order_compute (NULL, bbs, false);
5049 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5050 operand_rank = new hash_map<tree, long>;
5052 /* Give each default definition a distinct rank. This includes
5053 parameters and the static chain. Walk backwards over all
5054 SSA names so that we get proper rank ordering according
5055 to tree_swap_operands_p. */
5056 for (i = num_ssa_names - 1; i > 0; --i)
5058 tree name = ssa_name (i);
5059 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5060 insert_operand_rank (name, ++rank);
5063 /* Set up rank for each BB */
5064 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5065 bb_rank[bbs[i]] = ++rank << 16;
5067 free (bbs);
5068 calculate_dominance_info (CDI_POST_DOMINATORS);
5069 plus_negates = vNULL;
5072 /* Cleanup after the reassociation pass, and print stats if
5073 requested. */
5075 static void
5076 fini_reassoc (void)
5078 statistics_counter_event (cfun, "Linearized",
5079 reassociate_stats.linearized);
5080 statistics_counter_event (cfun, "Constants eliminated",
5081 reassociate_stats.constants_eliminated);
5082 statistics_counter_event (cfun, "Ops eliminated",
5083 reassociate_stats.ops_eliminated);
5084 statistics_counter_event (cfun, "Statements rewritten",
5085 reassociate_stats.rewritten);
5086 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5087 reassociate_stats.pows_encountered);
5088 statistics_counter_event (cfun, "Built-in powi calls created",
5089 reassociate_stats.pows_created);
5091 delete operand_rank;
5092 free_alloc_pool (operand_entry_pool);
5093 free (bb_rank);
5094 plus_negates.release ();
5095 free_dominance_info (CDI_POST_DOMINATORS);
5096 loop_optimizer_finalize ();
5099 /* Gate and execute functions for Reassociation. */
5101 static unsigned int
5102 execute_reassoc (void)
5104 init_reassoc ();
5106 do_reassoc ();
5107 repropagate_negates ();
5108 branch_fixup ();
5110 fini_reassoc ();
5111 return 0;
5114 namespace {
5116 const pass_data pass_data_reassoc =
5118 GIMPLE_PASS, /* type */
5119 "reassoc", /* name */
5120 OPTGROUP_NONE, /* optinfo_flags */
5121 TV_TREE_REASSOC, /* tv_id */
5122 ( PROP_cfg | PROP_ssa ), /* properties_required */
5123 0, /* properties_provided */
5124 0, /* properties_destroyed */
5125 0, /* todo_flags_start */
5126 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5129 class pass_reassoc : public gimple_opt_pass
5131 public:
5132 pass_reassoc (gcc::context *ctxt)
5133 : gimple_opt_pass (pass_data_reassoc, ctxt)
5136 /* opt_pass methods: */
5137 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5138 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5139 virtual unsigned int execute (function *) { return execute_reassoc (); }
5141 }; // class pass_reassoc
5143 } // anon namespace
5145 gimple_opt_pass *
5146 make_pass_reassoc (gcc::context *ctxt)
5148 return new pass_reassoc (ctxt);