libgomp: Use pthread mutexes in the nvptx plugin.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobdab6a6f538afd9e18f6d8ccbcb42cdd466432d05
1 /* Reassociation for trees.
2 Copyright (C) 2005-2015 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "hash-table.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tm_p.h"
28 #include "hash-set.h"
29 #include "machmode.h"
30 #include "vec.h"
31 #include "double-int.h"
32 #include "input.h"
33 #include "alias.h"
34 #include "symtab.h"
35 #include "wide-int.h"
36 #include "inchash.h"
37 #include "tree.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "predict.h"
41 #include "hard-reg-set.h"
42 #include "input.h"
43 #include "function.h"
44 #include "dominance.h"
45 #include "cfg.h"
46 #include "cfganal.h"
47 #include "basic-block.h"
48 #include "gimple-pretty-print.h"
49 #include "tree-inline.h"
50 #include "hash-map.h"
51 #include "tree-ssa-alias.h"
52 #include "internal-fn.h"
53 #include "gimple-fold.h"
54 #include "tree-eh.h"
55 #include "gimple-expr.h"
56 #include "is-a.h"
57 #include "gimple.h"
58 #include "gimple-iterator.h"
59 #include "gimplify-me.h"
60 #include "gimple-ssa.h"
61 #include "tree-cfg.h"
62 #include "tree-phinodes.h"
63 #include "ssa-iterators.h"
64 #include "stringpool.h"
65 #include "tree-ssanames.h"
66 #include "tree-ssa-loop-niter.h"
67 #include "tree-ssa-loop.h"
68 #include "expr.h"
69 #include "tree-dfa.h"
70 #include "tree-ssa.h"
71 #include "tree-iterator.h"
72 #include "tree-pass.h"
73 #include "alloc-pool.h"
74 #include "langhooks.h"
75 #include "cfgloop.h"
76 #include "flags.h"
77 #include "target.h"
78 #include "params.h"
79 #include "diagnostic-core.h"
80 #include "builtins.h"
81 #include "gimplify.h"
82 #include "insn-codes.h"
83 #include "optabs.h"
85 /* This is a simple global reassociation pass. It is, in part, based
86 on the LLVM pass of the same name (They do some things more/less
87 than we do, in different orders, etc).
89 It consists of five steps:
91 1. Breaking up subtract operations into addition + negate, where
92 it would promote the reassociation of adds.
94 2. Left linearization of the expression trees, so that (A+B)+(C+D)
95 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
96 During linearization, we place the operands of the binary
97 expressions into a vector of operand_entry_t
99 3. Optimization of the operand lists, eliminating things like a +
100 -a, a & a, etc.
102 3a. Combine repeated factors with the same occurrence counts
103 into a __builtin_powi call that will later be optimized into
104 an optimal number of multiplies.
106 4. Rewrite the expression trees we linearized and optimized so
107 they are in proper rank order.
109 5. Repropagate negates, as nothing else will clean it up ATM.
111 A bit of theory on #4, since nobody seems to write anything down
112 about why it makes sense to do it the way they do it:
114 We could do this much nicer theoretically, but don't (for reasons
115 explained after how to do it theoretically nice :P).
117 In order to promote the most redundancy elimination, you want
118 binary expressions whose operands are the same rank (or
119 preferably, the same value) exposed to the redundancy eliminator,
120 for possible elimination.
122 So the way to do this if we really cared, is to build the new op
123 tree from the leaves to the roots, merging as you go, and putting the
124 new op on the end of the worklist, until you are left with one
125 thing on the worklist.
127 IE if you have to rewrite the following set of operands (listed with
128 rank in parentheses), with opcode PLUS_EXPR:
130 a (1), b (1), c (1), d (2), e (2)
133 We start with our merge worklist empty, and the ops list with all of
134 those on it.
136 You want to first merge all leaves of the same rank, as much as
137 possible.
139 So first build a binary op of
141 mergetmp = a + b, and put "mergetmp" on the merge worklist.
143 Because there is no three operand form of PLUS_EXPR, c is not going to
144 be exposed to redundancy elimination as a rank 1 operand.
146 So you might as well throw it on the merge worklist (you could also
147 consider it to now be a rank two operand, and merge it with d and e,
148 but in this case, you then have evicted e from a binary op. So at
149 least in this situation, you can't win.)
151 Then build a binary op of d + e
152 mergetmp2 = d + e
154 and put mergetmp2 on the merge worklist.
156 so merge worklist = {mergetmp, c, mergetmp2}
158 Continue building binary ops of these operations until you have only
159 one operation left on the worklist.
161 So we have
163 build binary op
164 mergetmp3 = mergetmp + c
166 worklist = {mergetmp2, mergetmp3}
168 mergetmp4 = mergetmp2 + mergetmp3
170 worklist = {mergetmp4}
172 because we have one operation left, we can now just set the original
173 statement equal to the result of that operation.
175 This will at least expose a + b and d + e to redundancy elimination
176 as binary operations.
178 For extra points, you can reuse the old statements to build the
179 mergetmps, since you shouldn't run out.
181 So why don't we do this?
183 Because it's expensive, and rarely will help. Most trees we are
184 reassociating have 3 or less ops. If they have 2 ops, they already
185 will be written into a nice single binary op. If you have 3 ops, a
186 single simple check suffices to tell you whether the first two are of the
187 same rank. If so, you know to order it
189 mergetmp = op1 + op2
190 newstmt = mergetmp + op3
192 instead of
193 mergetmp = op2 + op3
194 newstmt = mergetmp + op1
196 If all three are of the same rank, you can't expose them all in a
197 single binary operator anyway, so the above is *still* the best you
198 can do.
200 Thus, this is what we do. When we have three ops left, we check to see
201 what order to put them in, and call it a day. As a nod to vector sum
202 reduction, we check if any of the ops are really a phi node that is a
203 destructive update for the associating op, and keep the destructive
204 update together for vector sum reduction recognition. */
207 /* Statistics */
208 static struct
210 int linearized;
211 int constants_eliminated;
212 int ops_eliminated;
213 int rewritten;
214 int pows_encountered;
215 int pows_created;
216 } reassociate_stats;
218 /* Operator, rank pair. */
219 typedef struct operand_entry
221 unsigned int rank;
222 int id;
223 tree op;
224 unsigned int count;
225 } *operand_entry_t;
227 static alloc_pool operand_entry_pool;
229 /* This is used to assign a unique ID to each struct operand_entry
230 so that qsort results are identical on different hosts. */
231 static int next_operand_entry_id;
233 /* Starting rank number for a given basic block, so that we can rank
234 operations using unmovable instructions in that BB based on the bb
235 depth. */
236 static long *bb_rank;
238 /* Operand->rank hashtable. */
239 static hash_map<tree, long> *operand_rank;
241 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
242 all basic blocks the CFG should be adjusted - basic blocks
243 split right after that SSA_NAME's definition statement and before
244 the only use, which must be a bit ior. */
245 static vec<tree> reassoc_branch_fixups;
247 /* Forward decls. */
248 static long get_rank (tree);
249 static bool reassoc_stmt_dominates_stmt_p (gimple, gimple);
251 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
252 possibly added by gsi_remove. */
254 bool
255 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
257 gimple stmt = gsi_stmt (*gsi);
259 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
260 return gsi_remove (gsi, true);
262 gimple_stmt_iterator prev = *gsi;
263 gsi_prev (&prev);
264 unsigned uid = gimple_uid (stmt);
265 basic_block bb = gimple_bb (stmt);
266 bool ret = gsi_remove (gsi, true);
267 if (!gsi_end_p (prev))
268 gsi_next (&prev);
269 else
270 prev = gsi_start_bb (bb);
271 gimple end_stmt = gsi_stmt (*gsi);
272 while ((stmt = gsi_stmt (prev)) != end_stmt)
274 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
275 gimple_set_uid (stmt, uid);
276 gsi_next (&prev);
278 return ret;
281 /* Bias amount for loop-carried phis. We want this to be larger than
282 the depth of any reassociation tree we can see, but not larger than
283 the rank difference between two blocks. */
284 #define PHI_LOOP_BIAS (1 << 15)
286 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
287 an innermost loop, and the phi has only a single use which is inside
288 the loop, then the rank is the block rank of the loop latch plus an
289 extra bias for the loop-carried dependence. This causes expressions
290 calculated into an accumulator variable to be independent for each
291 iteration of the loop. If STMT is some other phi, the rank is the
292 block rank of its containing block. */
293 static long
294 phi_rank (gimple stmt)
296 basic_block bb = gimple_bb (stmt);
297 struct loop *father = bb->loop_father;
298 tree res;
299 unsigned i;
300 use_operand_p use;
301 gimple use_stmt;
303 /* We only care about real loops (those with a latch). */
304 if (!father->latch)
305 return bb_rank[bb->index];
307 /* Interesting phis must be in headers of innermost loops. */
308 if (bb != father->header
309 || father->inner)
310 return bb_rank[bb->index];
312 /* Ignore virtual SSA_NAMEs. */
313 res = gimple_phi_result (stmt);
314 if (virtual_operand_p (res))
315 return bb_rank[bb->index];
317 /* The phi definition must have a single use, and that use must be
318 within the loop. Otherwise this isn't an accumulator pattern. */
319 if (!single_imm_use (res, &use, &use_stmt)
320 || gimple_bb (use_stmt)->loop_father != father)
321 return bb_rank[bb->index];
323 /* Look for phi arguments from within the loop. If found, bias this phi. */
324 for (i = 0; i < gimple_phi_num_args (stmt); i++)
326 tree arg = gimple_phi_arg_def (stmt, i);
327 if (TREE_CODE (arg) == SSA_NAME
328 && !SSA_NAME_IS_DEFAULT_DEF (arg))
330 gimple def_stmt = SSA_NAME_DEF_STMT (arg);
331 if (gimple_bb (def_stmt)->loop_father == father)
332 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
336 /* Must be an uninteresting phi. */
337 return bb_rank[bb->index];
340 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
341 loop-carried dependence of an innermost loop, return TRUE; else
342 return FALSE. */
343 static bool
344 loop_carried_phi (tree exp)
346 gimple phi_stmt;
347 long block_rank;
349 if (TREE_CODE (exp) != SSA_NAME
350 || SSA_NAME_IS_DEFAULT_DEF (exp))
351 return false;
353 phi_stmt = SSA_NAME_DEF_STMT (exp);
355 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
356 return false;
358 /* Non-loop-carried phis have block rank. Loop-carried phis have
359 an additional bias added in. If this phi doesn't have block rank,
360 it's biased and should not be propagated. */
361 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
363 if (phi_rank (phi_stmt) != block_rank)
364 return true;
366 return false;
369 /* Return the maximum of RANK and the rank that should be propagated
370 from expression OP. For most operands, this is just the rank of OP.
371 For loop-carried phis, the value is zero to avoid undoing the bias
372 in favor of the phi. */
373 static long
374 propagate_rank (long rank, tree op)
376 long op_rank;
378 if (loop_carried_phi (op))
379 return rank;
381 op_rank = get_rank (op);
383 return MAX (rank, op_rank);
386 /* Look up the operand rank structure for expression E. */
388 static inline long
389 find_operand_rank (tree e)
391 long *slot = operand_rank->get (e);
392 return slot ? *slot : -1;
395 /* Insert {E,RANK} into the operand rank hashtable. */
397 static inline void
398 insert_operand_rank (tree e, long rank)
400 gcc_assert (rank > 0);
401 gcc_assert (!operand_rank->put (e, rank));
404 /* Given an expression E, return the rank of the expression. */
406 static long
407 get_rank (tree e)
409 /* Constants have rank 0. */
410 if (is_gimple_min_invariant (e))
411 return 0;
413 /* SSA_NAME's have the rank of the expression they are the result
415 For globals and uninitialized values, the rank is 0.
416 For function arguments, use the pre-setup rank.
417 For PHI nodes, stores, asm statements, etc, we use the rank of
418 the BB.
419 For simple operations, the rank is the maximum rank of any of
420 its operands, or the bb_rank, whichever is less.
421 I make no claims that this is optimal, however, it gives good
422 results. */
424 /* We make an exception to the normal ranking system to break
425 dependences of accumulator variables in loops. Suppose we
426 have a simple one-block loop containing:
428 x_1 = phi(x_0, x_2)
429 b = a + x_1
430 c = b + d
431 x_2 = c + e
433 As shown, each iteration of the calculation into x is fully
434 dependent upon the iteration before it. We would prefer to
435 see this in the form:
437 x_1 = phi(x_0, x_2)
438 b = a + d
439 c = b + e
440 x_2 = c + x_1
442 If the loop is unrolled, the calculations of b and c from
443 different iterations can be interleaved.
445 To obtain this result during reassociation, we bias the rank
446 of the phi definition x_1 upward, when it is recognized as an
447 accumulator pattern. The artificial rank causes it to be
448 added last, providing the desired independence. */
450 if (TREE_CODE (e) == SSA_NAME)
452 gimple stmt;
453 long rank;
454 int i, n;
455 tree op;
457 if (SSA_NAME_IS_DEFAULT_DEF (e))
458 return find_operand_rank (e);
460 stmt = SSA_NAME_DEF_STMT (e);
461 if (gimple_code (stmt) == GIMPLE_PHI)
462 return phi_rank (stmt);
464 if (!is_gimple_assign (stmt)
465 || gimple_vdef (stmt))
466 return bb_rank[gimple_bb (stmt)->index];
468 /* If we already have a rank for this expression, use that. */
469 rank = find_operand_rank (e);
470 if (rank != -1)
471 return rank;
473 /* Otherwise, find the maximum rank for the operands. As an
474 exception, remove the bias from loop-carried phis when propagating
475 the rank so that dependent operations are not also biased. */
476 rank = 0;
477 if (gimple_assign_single_p (stmt))
479 tree rhs = gimple_assign_rhs1 (stmt);
480 n = TREE_OPERAND_LENGTH (rhs);
481 if (n == 0)
482 rank = propagate_rank (rank, rhs);
483 else
485 for (i = 0; i < n; i++)
487 op = TREE_OPERAND (rhs, i);
489 if (op != NULL_TREE)
490 rank = propagate_rank (rank, op);
494 else
496 n = gimple_num_ops (stmt);
497 for (i = 1; i < n; i++)
499 op = gimple_op (stmt, i);
500 gcc_assert (op);
501 rank = propagate_rank (rank, op);
505 if (dump_file && (dump_flags & TDF_DETAILS))
507 fprintf (dump_file, "Rank for ");
508 print_generic_expr (dump_file, e, 0);
509 fprintf (dump_file, " is %ld\n", (rank + 1));
512 /* Note the rank in the hashtable so we don't recompute it. */
513 insert_operand_rank (e, (rank + 1));
514 return (rank + 1);
517 /* Globals, etc, are rank 0 */
518 return 0;
522 /* We want integer ones to end up last no matter what, since they are
523 the ones we can do the most with. */
524 #define INTEGER_CONST_TYPE 1 << 3
525 #define FLOAT_CONST_TYPE 1 << 2
526 #define OTHER_CONST_TYPE 1 << 1
528 /* Classify an invariant tree into integer, float, or other, so that
529 we can sort them to be near other constants of the same type. */
530 static inline int
531 constant_type (tree t)
533 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
534 return INTEGER_CONST_TYPE;
535 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
536 return FLOAT_CONST_TYPE;
537 else
538 return OTHER_CONST_TYPE;
541 /* qsort comparison function to sort operand entries PA and PB by rank
542 so that the sorted array is ordered by rank in decreasing order. */
543 static int
544 sort_by_operand_rank (const void *pa, const void *pb)
546 const operand_entry_t oea = *(const operand_entry_t *)pa;
547 const operand_entry_t oeb = *(const operand_entry_t *)pb;
549 /* It's nicer for optimize_expression if constants that are likely
550 to fold when added/multiplied//whatever are put next to each
551 other. Since all constants have rank 0, order them by type. */
552 if (oeb->rank == 0 && oea->rank == 0)
554 if (constant_type (oeb->op) != constant_type (oea->op))
555 return constant_type (oeb->op) - constant_type (oea->op);
556 else
557 /* To make sorting result stable, we use unique IDs to determine
558 order. */
559 return oeb->id - oea->id;
562 /* Lastly, make sure the versions that are the same go next to each
563 other. */
564 if ((oeb->rank - oea->rank == 0)
565 && TREE_CODE (oea->op) == SSA_NAME
566 && TREE_CODE (oeb->op) == SSA_NAME)
568 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
569 versions of removed SSA_NAMEs, so if possible, prefer to sort
570 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
571 See PR60418. */
572 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
573 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
574 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
576 gimple stmta = SSA_NAME_DEF_STMT (oea->op);
577 gimple stmtb = SSA_NAME_DEF_STMT (oeb->op);
578 basic_block bba = gimple_bb (stmta);
579 basic_block bbb = gimple_bb (stmtb);
580 if (bbb != bba)
582 if (bb_rank[bbb->index] != bb_rank[bba->index])
583 return bb_rank[bbb->index] - bb_rank[bba->index];
585 else
587 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
588 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
589 if (da != db)
590 return da ? 1 : -1;
594 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
595 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
596 else
597 return oeb->id - oea->id;
600 if (oeb->rank != oea->rank)
601 return oeb->rank - oea->rank;
602 else
603 return oeb->id - oea->id;
606 /* Add an operand entry to *OPS for the tree operand OP. */
608 static void
609 add_to_ops_vec (vec<operand_entry_t> *ops, tree op)
611 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
613 oe->op = op;
614 oe->rank = get_rank (op);
615 oe->id = next_operand_entry_id++;
616 oe->count = 1;
617 ops->safe_push (oe);
620 /* Add an operand entry to *OPS for the tree operand OP with repeat
621 count REPEAT. */
623 static void
624 add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op,
625 HOST_WIDE_INT repeat)
627 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
629 oe->op = op;
630 oe->rank = get_rank (op);
631 oe->id = next_operand_entry_id++;
632 oe->count = repeat;
633 ops->safe_push (oe);
635 reassociate_stats.pows_encountered++;
638 /* Return true if STMT is reassociable operation containing a binary
639 operation with tree code CODE, and is inside LOOP. */
641 static bool
642 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
644 basic_block bb = gimple_bb (stmt);
646 if (gimple_bb (stmt) == NULL)
647 return false;
649 if (!flow_bb_inside_loop_p (loop, bb))
650 return false;
652 if (is_gimple_assign (stmt)
653 && gimple_assign_rhs_code (stmt) == code
654 && has_single_use (gimple_assign_lhs (stmt)))
655 return true;
657 return false;
661 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
662 operand of the negate operation. Otherwise, return NULL. */
664 static tree
665 get_unary_op (tree name, enum tree_code opcode)
667 gimple stmt = SSA_NAME_DEF_STMT (name);
669 if (!is_gimple_assign (stmt))
670 return NULL_TREE;
672 if (gimple_assign_rhs_code (stmt) == opcode)
673 return gimple_assign_rhs1 (stmt);
674 return NULL_TREE;
677 /* If CURR and LAST are a pair of ops that OPCODE allows us to
678 eliminate through equivalences, do so, remove them from OPS, and
679 return true. Otherwise, return false. */
681 static bool
682 eliminate_duplicate_pair (enum tree_code opcode,
683 vec<operand_entry_t> *ops,
684 bool *all_done,
685 unsigned int i,
686 operand_entry_t curr,
687 operand_entry_t last)
690 /* If we have two of the same op, and the opcode is & |, min, or max,
691 we can eliminate one of them.
692 If we have two of the same op, and the opcode is ^, we can
693 eliminate both of them. */
695 if (last && last->op == curr->op)
697 switch (opcode)
699 case MAX_EXPR:
700 case MIN_EXPR:
701 case BIT_IOR_EXPR:
702 case BIT_AND_EXPR:
703 if (dump_file && (dump_flags & TDF_DETAILS))
705 fprintf (dump_file, "Equivalence: ");
706 print_generic_expr (dump_file, curr->op, 0);
707 fprintf (dump_file, " [&|minmax] ");
708 print_generic_expr (dump_file, last->op, 0);
709 fprintf (dump_file, " -> ");
710 print_generic_stmt (dump_file, last->op, 0);
713 ops->ordered_remove (i);
714 reassociate_stats.ops_eliminated ++;
716 return true;
718 case BIT_XOR_EXPR:
719 if (dump_file && (dump_flags & TDF_DETAILS))
721 fprintf (dump_file, "Equivalence: ");
722 print_generic_expr (dump_file, curr->op, 0);
723 fprintf (dump_file, " ^ ");
724 print_generic_expr (dump_file, last->op, 0);
725 fprintf (dump_file, " -> nothing\n");
728 reassociate_stats.ops_eliminated += 2;
730 if (ops->length () == 2)
732 ops->create (0);
733 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
734 *all_done = true;
736 else
738 ops->ordered_remove (i-1);
739 ops->ordered_remove (i-1);
742 return true;
744 default:
745 break;
748 return false;
751 static vec<tree> plus_negates;
753 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
754 expression, look in OPS for a corresponding positive operation to cancel
755 it out. If we find one, remove the other from OPS, replace
756 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
757 return false. */
759 static bool
760 eliminate_plus_minus_pair (enum tree_code opcode,
761 vec<operand_entry_t> *ops,
762 unsigned int currindex,
763 operand_entry_t curr)
765 tree negateop;
766 tree notop;
767 unsigned int i;
768 operand_entry_t oe;
770 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
771 return false;
773 negateop = get_unary_op (curr->op, NEGATE_EXPR);
774 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
775 if (negateop == NULL_TREE && notop == NULL_TREE)
776 return false;
778 /* Any non-negated version will have a rank that is one less than
779 the current rank. So once we hit those ranks, if we don't find
780 one, we can stop. */
782 for (i = currindex + 1;
783 ops->iterate (i, &oe)
784 && oe->rank >= curr->rank - 1 ;
785 i++)
787 if (oe->op == negateop)
790 if (dump_file && (dump_flags & TDF_DETAILS))
792 fprintf (dump_file, "Equivalence: ");
793 print_generic_expr (dump_file, negateop, 0);
794 fprintf (dump_file, " + -");
795 print_generic_expr (dump_file, oe->op, 0);
796 fprintf (dump_file, " -> 0\n");
799 ops->ordered_remove (i);
800 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
801 ops->ordered_remove (currindex);
802 reassociate_stats.ops_eliminated ++;
804 return true;
806 else if (oe->op == notop)
808 tree op_type = TREE_TYPE (oe->op);
810 if (dump_file && (dump_flags & TDF_DETAILS))
812 fprintf (dump_file, "Equivalence: ");
813 print_generic_expr (dump_file, notop, 0);
814 fprintf (dump_file, " + ~");
815 print_generic_expr (dump_file, oe->op, 0);
816 fprintf (dump_file, " -> -1\n");
819 ops->ordered_remove (i);
820 add_to_ops_vec (ops, build_int_cst_type (op_type, -1));
821 ops->ordered_remove (currindex);
822 reassociate_stats.ops_eliminated ++;
824 return true;
828 /* CURR->OP is a negate expr in a plus expr: save it for later
829 inspection in repropagate_negates(). */
830 if (negateop != NULL_TREE)
831 plus_negates.safe_push (curr->op);
833 return false;
836 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
837 bitwise not expression, look in OPS for a corresponding operand to
838 cancel it out. If we find one, remove the other from OPS, replace
839 OPS[CURRINDEX] with 0, and return true. Otherwise, return
840 false. */
842 static bool
843 eliminate_not_pairs (enum tree_code opcode,
844 vec<operand_entry_t> *ops,
845 unsigned int currindex,
846 operand_entry_t curr)
848 tree notop;
849 unsigned int i;
850 operand_entry_t oe;
852 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
853 || TREE_CODE (curr->op) != SSA_NAME)
854 return false;
856 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
857 if (notop == NULL_TREE)
858 return false;
860 /* Any non-not version will have a rank that is one less than
861 the current rank. So once we hit those ranks, if we don't find
862 one, we can stop. */
864 for (i = currindex + 1;
865 ops->iterate (i, &oe)
866 && oe->rank >= curr->rank - 1;
867 i++)
869 if (oe->op == notop)
871 if (dump_file && (dump_flags & TDF_DETAILS))
873 fprintf (dump_file, "Equivalence: ");
874 print_generic_expr (dump_file, notop, 0);
875 if (opcode == BIT_AND_EXPR)
876 fprintf (dump_file, " & ~");
877 else if (opcode == BIT_IOR_EXPR)
878 fprintf (dump_file, " | ~");
879 print_generic_expr (dump_file, oe->op, 0);
880 if (opcode == BIT_AND_EXPR)
881 fprintf (dump_file, " -> 0\n");
882 else if (opcode == BIT_IOR_EXPR)
883 fprintf (dump_file, " -> -1\n");
886 if (opcode == BIT_AND_EXPR)
887 oe->op = build_zero_cst (TREE_TYPE (oe->op));
888 else if (opcode == BIT_IOR_EXPR)
889 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
891 reassociate_stats.ops_eliminated += ops->length () - 1;
892 ops->truncate (0);
893 ops->quick_push (oe);
894 return true;
898 return false;
901 /* Use constant value that may be present in OPS to try to eliminate
902 operands. Note that this function is only really used when we've
903 eliminated ops for other reasons, or merged constants. Across
904 single statements, fold already does all of this, plus more. There
905 is little point in duplicating logic, so I've only included the
906 identities that I could ever construct testcases to trigger. */
908 static void
909 eliminate_using_constants (enum tree_code opcode,
910 vec<operand_entry_t> *ops)
912 operand_entry_t oelast = ops->last ();
913 tree type = TREE_TYPE (oelast->op);
915 if (oelast->rank == 0
916 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
918 switch (opcode)
920 case BIT_AND_EXPR:
921 if (integer_zerop (oelast->op))
923 if (ops->length () != 1)
925 if (dump_file && (dump_flags & TDF_DETAILS))
926 fprintf (dump_file, "Found & 0, removing all other ops\n");
928 reassociate_stats.ops_eliminated += ops->length () - 1;
930 ops->truncate (0);
931 ops->quick_push (oelast);
932 return;
935 else if (integer_all_onesp (oelast->op))
937 if (ops->length () != 1)
939 if (dump_file && (dump_flags & TDF_DETAILS))
940 fprintf (dump_file, "Found & -1, removing\n");
941 ops->pop ();
942 reassociate_stats.ops_eliminated++;
945 break;
946 case BIT_IOR_EXPR:
947 if (integer_all_onesp (oelast->op))
949 if (ops->length () != 1)
951 if (dump_file && (dump_flags & TDF_DETAILS))
952 fprintf (dump_file, "Found | -1, removing all other ops\n");
954 reassociate_stats.ops_eliminated += ops->length () - 1;
956 ops->truncate (0);
957 ops->quick_push (oelast);
958 return;
961 else if (integer_zerop (oelast->op))
963 if (ops->length () != 1)
965 if (dump_file && (dump_flags & TDF_DETAILS))
966 fprintf (dump_file, "Found | 0, removing\n");
967 ops->pop ();
968 reassociate_stats.ops_eliminated++;
971 break;
972 case MULT_EXPR:
973 if (integer_zerop (oelast->op)
974 || (FLOAT_TYPE_P (type)
975 && !HONOR_NANS (type)
976 && !HONOR_SIGNED_ZEROS (type)
977 && real_zerop (oelast->op)))
979 if (ops->length () != 1)
981 if (dump_file && (dump_flags & TDF_DETAILS))
982 fprintf (dump_file, "Found * 0, removing all other ops\n");
984 reassociate_stats.ops_eliminated += ops->length () - 1;
985 ops->truncate (1);
986 ops->quick_push (oelast);
987 return;
990 else if (integer_onep (oelast->op)
991 || (FLOAT_TYPE_P (type)
992 && !HONOR_SNANS (type)
993 && real_onep (oelast->op)))
995 if (ops->length () != 1)
997 if (dump_file && (dump_flags & TDF_DETAILS))
998 fprintf (dump_file, "Found * 1, removing\n");
999 ops->pop ();
1000 reassociate_stats.ops_eliminated++;
1001 return;
1004 break;
1005 case BIT_XOR_EXPR:
1006 case PLUS_EXPR:
1007 case MINUS_EXPR:
1008 if (integer_zerop (oelast->op)
1009 || (FLOAT_TYPE_P (type)
1010 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1011 && fold_real_zero_addition_p (type, oelast->op,
1012 opcode == MINUS_EXPR)))
1014 if (ops->length () != 1)
1016 if (dump_file && (dump_flags & TDF_DETAILS))
1017 fprintf (dump_file, "Found [|^+] 0, removing\n");
1018 ops->pop ();
1019 reassociate_stats.ops_eliminated++;
1020 return;
1023 break;
1024 default:
1025 break;
1031 static void linearize_expr_tree (vec<operand_entry_t> *, gimple,
1032 bool, bool);
1034 /* Structure for tracking and counting operands. */
1035 typedef struct oecount_s {
1036 int cnt;
1037 int id;
1038 enum tree_code oecode;
1039 tree op;
1040 } oecount;
1043 /* The heap for the oecount hashtable and the sorted list of operands. */
1044 static vec<oecount> cvec;
1047 /* Oecount hashtable helpers. */
1049 struct oecount_hasher
1051 typedef int value_type;
1052 typedef int compare_type;
1053 typedef int store_values_directly;
1054 static inline hashval_t hash (const value_type &);
1055 static inline bool equal (const value_type &, const compare_type &);
1056 static bool is_deleted (int &v) { return v == 1; }
1057 static void mark_deleted (int &e) { e = 1; }
1058 static bool is_empty (int &v) { return v == 0; }
1059 static void mark_empty (int &e) { e = 0; }
1060 static void remove (int &) {}
1063 /* Hash function for oecount. */
1065 inline hashval_t
1066 oecount_hasher::hash (const value_type &p)
1068 const oecount *c = &cvec[p - 42];
1069 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1072 /* Comparison function for oecount. */
1074 inline bool
1075 oecount_hasher::equal (const value_type &p1, const compare_type &p2)
1077 const oecount *c1 = &cvec[p1 - 42];
1078 const oecount *c2 = &cvec[p2 - 42];
1079 return (c1->oecode == c2->oecode
1080 && c1->op == c2->op);
1083 /* Comparison function for qsort sorting oecount elements by count. */
1085 static int
1086 oecount_cmp (const void *p1, const void *p2)
1088 const oecount *c1 = (const oecount *)p1;
1089 const oecount *c2 = (const oecount *)p2;
1090 if (c1->cnt != c2->cnt)
1091 return c1->cnt - c2->cnt;
1092 else
1093 /* If counts are identical, use unique IDs to stabilize qsort. */
1094 return c1->id - c2->id;
1097 /* Return TRUE iff STMT represents a builtin call that raises OP
1098 to some exponent. */
1100 static bool
1101 stmt_is_power_of_op (gimple stmt, tree op)
1103 tree fndecl;
1105 if (!is_gimple_call (stmt))
1106 return false;
1108 fndecl = gimple_call_fndecl (stmt);
1110 if (!fndecl
1111 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
1112 return false;
1114 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1116 CASE_FLT_FN (BUILT_IN_POW):
1117 CASE_FLT_FN (BUILT_IN_POWI):
1118 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1120 default:
1121 return false;
1125 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1126 in place and return the result. Assumes that stmt_is_power_of_op
1127 was previously called for STMT and returned TRUE. */
1129 static HOST_WIDE_INT
1130 decrement_power (gimple stmt)
1132 REAL_VALUE_TYPE c, cint;
1133 HOST_WIDE_INT power;
1134 tree arg1;
1136 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
1138 CASE_FLT_FN (BUILT_IN_POW):
1139 arg1 = gimple_call_arg (stmt, 1);
1140 c = TREE_REAL_CST (arg1);
1141 power = real_to_integer (&c) - 1;
1142 real_from_integer (&cint, VOIDmode, power, SIGNED);
1143 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1144 return power;
1146 CASE_FLT_FN (BUILT_IN_POWI):
1147 arg1 = gimple_call_arg (stmt, 1);
1148 power = TREE_INT_CST_LOW (arg1) - 1;
1149 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1150 return power;
1152 default:
1153 gcc_unreachable ();
1157 /* Find the single immediate use of STMT's LHS, and replace it
1158 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1159 replace *DEF with OP as well. */
1161 static void
1162 propagate_op_to_single_use (tree op, gimple stmt, tree *def)
1164 tree lhs;
1165 gimple use_stmt;
1166 use_operand_p use;
1167 gimple_stmt_iterator gsi;
1169 if (is_gimple_call (stmt))
1170 lhs = gimple_call_lhs (stmt);
1171 else
1172 lhs = gimple_assign_lhs (stmt);
1174 gcc_assert (has_single_use (lhs));
1175 single_imm_use (lhs, &use, &use_stmt);
1176 if (lhs == *def)
1177 *def = op;
1178 SET_USE (use, op);
1179 if (TREE_CODE (op) != SSA_NAME)
1180 update_stmt (use_stmt);
1181 gsi = gsi_for_stmt (stmt);
1182 unlink_stmt_vdef (stmt);
1183 reassoc_remove_stmt (&gsi);
1184 release_defs (stmt);
1187 /* Walks the linear chain with result *DEF searching for an operation
1188 with operand OP and code OPCODE removing that from the chain. *DEF
1189 is updated if there is only one operand but no operation left. */
1191 static void
1192 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1194 gimple stmt = SSA_NAME_DEF_STMT (*def);
1198 tree name;
1200 if (opcode == MULT_EXPR
1201 && stmt_is_power_of_op (stmt, op))
1203 if (decrement_power (stmt) == 1)
1204 propagate_op_to_single_use (op, stmt, def);
1205 return;
1208 name = gimple_assign_rhs1 (stmt);
1210 /* If this is the operation we look for and one of the operands
1211 is ours simply propagate the other operand into the stmts
1212 single use. */
1213 if (gimple_assign_rhs_code (stmt) == opcode
1214 && (name == op
1215 || gimple_assign_rhs2 (stmt) == op))
1217 if (name == op)
1218 name = gimple_assign_rhs2 (stmt);
1219 propagate_op_to_single_use (name, stmt, def);
1220 return;
1223 /* We might have a multiply of two __builtin_pow* calls, and
1224 the operand might be hiding in the rightmost one. */
1225 if (opcode == MULT_EXPR
1226 && gimple_assign_rhs_code (stmt) == opcode
1227 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1228 && has_single_use (gimple_assign_rhs2 (stmt)))
1230 gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1231 if (stmt_is_power_of_op (stmt2, op))
1233 if (decrement_power (stmt2) == 1)
1234 propagate_op_to_single_use (op, stmt2, def);
1235 return;
1239 /* Continue walking the chain. */
1240 gcc_assert (name != op
1241 && TREE_CODE (name) == SSA_NAME);
1242 stmt = SSA_NAME_DEF_STMT (name);
1244 while (1);
1247 /* Returns true if statement S1 dominates statement S2. Like
1248 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1250 static bool
1251 reassoc_stmt_dominates_stmt_p (gimple s1, gimple s2)
1253 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1255 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1256 SSA_NAME. Assume it lives at the beginning of function and
1257 thus dominates everything. */
1258 if (!bb1 || s1 == s2)
1259 return true;
1261 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1262 if (!bb2)
1263 return false;
1265 if (bb1 == bb2)
1267 /* PHIs in the same basic block are assumed to be
1268 executed all in parallel, if only one stmt is a PHI,
1269 it dominates the other stmt in the same basic block. */
1270 if (gimple_code (s1) == GIMPLE_PHI)
1271 return true;
1273 if (gimple_code (s2) == GIMPLE_PHI)
1274 return false;
1276 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1278 if (gimple_uid (s1) < gimple_uid (s2))
1279 return true;
1281 if (gimple_uid (s1) > gimple_uid (s2))
1282 return false;
1284 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1285 unsigned int uid = gimple_uid (s1);
1286 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1288 gimple s = gsi_stmt (gsi);
1289 if (gimple_uid (s) != uid)
1290 break;
1291 if (s == s2)
1292 return true;
1295 return false;
1298 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1301 /* Insert STMT after INSERT_POINT. */
1303 static void
1304 insert_stmt_after (gimple stmt, gimple insert_point)
1306 gimple_stmt_iterator gsi;
1307 basic_block bb;
1309 if (gimple_code (insert_point) == GIMPLE_PHI)
1310 bb = gimple_bb (insert_point);
1311 else if (!stmt_ends_bb_p (insert_point))
1313 gsi = gsi_for_stmt (insert_point);
1314 gimple_set_uid (stmt, gimple_uid (insert_point));
1315 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1316 return;
1318 else
1319 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1320 thus if it must end a basic block, it should be a call that can
1321 throw, or some assignment that can throw. If it throws, the LHS
1322 of it will not be initialized though, so only valid places using
1323 the SSA_NAME should be dominated by the fallthru edge. */
1324 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1325 gsi = gsi_after_labels (bb);
1326 if (gsi_end_p (gsi))
1328 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1329 gimple_set_uid (stmt,
1330 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1332 else
1333 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1334 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1337 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1338 the result. Places the statement after the definition of either
1339 OP1 or OP2. Returns the new statement. */
1341 static gimple
1342 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1344 gimple op1def = NULL, op2def = NULL;
1345 gimple_stmt_iterator gsi;
1346 tree op;
1347 gassign *sum;
1349 /* Create the addition statement. */
1350 op = make_ssa_name (type);
1351 sum = gimple_build_assign (op, opcode, op1, op2);
1353 /* Find an insertion place and insert. */
1354 if (TREE_CODE (op1) == SSA_NAME)
1355 op1def = SSA_NAME_DEF_STMT (op1);
1356 if (TREE_CODE (op2) == SSA_NAME)
1357 op2def = SSA_NAME_DEF_STMT (op2);
1358 if ((!op1def || gimple_nop_p (op1def))
1359 && (!op2def || gimple_nop_p (op2def)))
1361 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1362 if (gsi_end_p (gsi))
1364 gimple_stmt_iterator gsi2
1365 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1366 gimple_set_uid (sum,
1367 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1369 else
1370 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1371 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1373 else
1375 gimple insert_point;
1376 if ((!op1def || gimple_nop_p (op1def))
1377 || (op2def && !gimple_nop_p (op2def)
1378 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1379 insert_point = op2def;
1380 else
1381 insert_point = op1def;
1382 insert_stmt_after (sum, insert_point);
1384 update_stmt (sum);
1386 return sum;
1389 /* Perform un-distribution of divisions and multiplications.
1390 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1391 to (A + B) / X for real X.
1393 The algorithm is organized as follows.
1395 - First we walk the addition chain *OPS looking for summands that
1396 are defined by a multiplication or a real division. This results
1397 in the candidates bitmap with relevant indices into *OPS.
1399 - Second we build the chains of multiplications or divisions for
1400 these candidates, counting the number of occurrences of (operand, code)
1401 pairs in all of the candidates chains.
1403 - Third we sort the (operand, code) pairs by number of occurrence and
1404 process them starting with the pair with the most uses.
1406 * For each such pair we walk the candidates again to build a
1407 second candidate bitmap noting all multiplication/division chains
1408 that have at least one occurrence of (operand, code).
1410 * We build an alternate addition chain only covering these
1411 candidates with one (operand, code) operation removed from their
1412 multiplication/division chain.
1414 * The first candidate gets replaced by the alternate addition chain
1415 multiplied/divided by the operand.
1417 * All candidate chains get disabled for further processing and
1418 processing of (operand, code) pairs continues.
1420 The alternate addition chains built are re-processed by the main
1421 reassociation algorithm which allows optimizing a * x * y + b * y * x
1422 to (a + b ) * x * y in one invocation of the reassociation pass. */
1424 static bool
1425 undistribute_ops_list (enum tree_code opcode,
1426 vec<operand_entry_t> *ops, struct loop *loop)
1428 unsigned int length = ops->length ();
1429 operand_entry_t oe1;
1430 unsigned i, j;
1431 sbitmap candidates, candidates2;
1432 unsigned nr_candidates, nr_candidates2;
1433 sbitmap_iterator sbi0;
1434 vec<operand_entry_t> *subops;
1435 bool changed = false;
1436 int next_oecount_id = 0;
1438 if (length <= 1
1439 || opcode != PLUS_EXPR)
1440 return false;
1442 /* Build a list of candidates to process. */
1443 candidates = sbitmap_alloc (length);
1444 bitmap_clear (candidates);
1445 nr_candidates = 0;
1446 FOR_EACH_VEC_ELT (*ops, i, oe1)
1448 enum tree_code dcode;
1449 gimple oe1def;
1451 if (TREE_CODE (oe1->op) != SSA_NAME)
1452 continue;
1453 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1454 if (!is_gimple_assign (oe1def))
1455 continue;
1456 dcode = gimple_assign_rhs_code (oe1def);
1457 if ((dcode != MULT_EXPR
1458 && dcode != RDIV_EXPR)
1459 || !is_reassociable_op (oe1def, dcode, loop))
1460 continue;
1462 bitmap_set_bit (candidates, i);
1463 nr_candidates++;
1466 if (nr_candidates < 2)
1468 sbitmap_free (candidates);
1469 return false;
1472 if (dump_file && (dump_flags & TDF_DETAILS))
1474 fprintf (dump_file, "searching for un-distribute opportunities ");
1475 print_generic_expr (dump_file,
1476 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1477 fprintf (dump_file, " %d\n", nr_candidates);
1480 /* Build linearized sub-operand lists and the counting table. */
1481 cvec.create (0);
1483 hash_table<oecount_hasher> ctable (15);
1485 /* ??? Macro arguments cannot have multi-argument template types in
1486 them. This typedef is needed to workaround that limitation. */
1487 typedef vec<operand_entry_t> vec_operand_entry_t_heap;
1488 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1489 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1491 gimple oedef;
1492 enum tree_code oecode;
1493 unsigned j;
1495 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1496 oecode = gimple_assign_rhs_code (oedef);
1497 linearize_expr_tree (&subops[i], oedef,
1498 associative_tree_code (oecode), false);
1500 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1502 oecount c;
1503 int *slot;
1504 int idx;
1505 c.oecode = oecode;
1506 c.cnt = 1;
1507 c.id = next_oecount_id++;
1508 c.op = oe1->op;
1509 cvec.safe_push (c);
1510 idx = cvec.length () + 41;
1511 slot = ctable.find_slot (idx, INSERT);
1512 if (!*slot)
1514 *slot = idx;
1516 else
1518 cvec.pop ();
1519 cvec[*slot - 42].cnt++;
1524 /* Sort the counting table. */
1525 cvec.qsort (oecount_cmp);
1527 if (dump_file && (dump_flags & TDF_DETAILS))
1529 oecount *c;
1530 fprintf (dump_file, "Candidates:\n");
1531 FOR_EACH_VEC_ELT (cvec, j, c)
1533 fprintf (dump_file, " %u %s: ", c->cnt,
1534 c->oecode == MULT_EXPR
1535 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1536 print_generic_expr (dump_file, c->op, 0);
1537 fprintf (dump_file, "\n");
1541 /* Process the (operand, code) pairs in order of most occurrence. */
1542 candidates2 = sbitmap_alloc (length);
1543 while (!cvec.is_empty ())
1545 oecount *c = &cvec.last ();
1546 if (c->cnt < 2)
1547 break;
1549 /* Now collect the operands in the outer chain that contain
1550 the common operand in their inner chain. */
1551 bitmap_clear (candidates2);
1552 nr_candidates2 = 0;
1553 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1555 gimple oedef;
1556 enum tree_code oecode;
1557 unsigned j;
1558 tree op = (*ops)[i]->op;
1560 /* If we undistributed in this chain already this may be
1561 a constant. */
1562 if (TREE_CODE (op) != SSA_NAME)
1563 continue;
1565 oedef = SSA_NAME_DEF_STMT (op);
1566 oecode = gimple_assign_rhs_code (oedef);
1567 if (oecode != c->oecode)
1568 continue;
1570 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1572 if (oe1->op == c->op)
1574 bitmap_set_bit (candidates2, i);
1575 ++nr_candidates2;
1576 break;
1581 if (nr_candidates2 >= 2)
1583 operand_entry_t oe1, oe2;
1584 gimple prod;
1585 int first = bitmap_first_set_bit (candidates2);
1587 /* Build the new addition chain. */
1588 oe1 = (*ops)[first];
1589 if (dump_file && (dump_flags & TDF_DETAILS))
1591 fprintf (dump_file, "Building (");
1592 print_generic_expr (dump_file, oe1->op, 0);
1594 zero_one_operation (&oe1->op, c->oecode, c->op);
1595 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1597 gimple sum;
1598 oe2 = (*ops)[i];
1599 if (dump_file && (dump_flags & TDF_DETAILS))
1601 fprintf (dump_file, " + ");
1602 print_generic_expr (dump_file, oe2->op, 0);
1604 zero_one_operation (&oe2->op, c->oecode, c->op);
1605 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1606 oe1->op, oe2->op, opcode);
1607 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1608 oe2->rank = 0;
1609 oe1->op = gimple_get_lhs (sum);
1612 /* Apply the multiplication/division. */
1613 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1614 oe1->op, c->op, c->oecode);
1615 if (dump_file && (dump_flags & TDF_DETAILS))
1617 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1618 print_generic_expr (dump_file, c->op, 0);
1619 fprintf (dump_file, "\n");
1622 /* Record it in the addition chain and disable further
1623 undistribution with this op. */
1624 oe1->op = gimple_assign_lhs (prod);
1625 oe1->rank = get_rank (oe1->op);
1626 subops[first].release ();
1628 changed = true;
1631 cvec.pop ();
1634 for (i = 0; i < ops->length (); ++i)
1635 subops[i].release ();
1636 free (subops);
1637 cvec.release ();
1638 sbitmap_free (candidates);
1639 sbitmap_free (candidates2);
1641 return changed;
1644 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1645 expression, examine the other OPS to see if any of them are comparisons
1646 of the same values, which we may be able to combine or eliminate.
1647 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1649 static bool
1650 eliminate_redundant_comparison (enum tree_code opcode,
1651 vec<operand_entry_t> *ops,
1652 unsigned int currindex,
1653 operand_entry_t curr)
1655 tree op1, op2;
1656 enum tree_code lcode, rcode;
1657 gimple def1, def2;
1658 int i;
1659 operand_entry_t oe;
1661 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1662 return false;
1664 /* Check that CURR is a comparison. */
1665 if (TREE_CODE (curr->op) != SSA_NAME)
1666 return false;
1667 def1 = SSA_NAME_DEF_STMT (curr->op);
1668 if (!is_gimple_assign (def1))
1669 return false;
1670 lcode = gimple_assign_rhs_code (def1);
1671 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1672 return false;
1673 op1 = gimple_assign_rhs1 (def1);
1674 op2 = gimple_assign_rhs2 (def1);
1676 /* Now look for a similar comparison in the remaining OPS. */
1677 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1679 tree t;
1681 if (TREE_CODE (oe->op) != SSA_NAME)
1682 continue;
1683 def2 = SSA_NAME_DEF_STMT (oe->op);
1684 if (!is_gimple_assign (def2))
1685 continue;
1686 rcode = gimple_assign_rhs_code (def2);
1687 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1688 continue;
1690 /* If we got here, we have a match. See if we can combine the
1691 two comparisons. */
1692 if (opcode == BIT_IOR_EXPR)
1693 t = maybe_fold_or_comparisons (lcode, op1, op2,
1694 rcode, gimple_assign_rhs1 (def2),
1695 gimple_assign_rhs2 (def2));
1696 else
1697 t = maybe_fold_and_comparisons (lcode, op1, op2,
1698 rcode, gimple_assign_rhs1 (def2),
1699 gimple_assign_rhs2 (def2));
1700 if (!t)
1701 continue;
1703 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1704 always give us a boolean_type_node value back. If the original
1705 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1706 we need to convert. */
1707 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1708 t = fold_convert (TREE_TYPE (curr->op), t);
1710 if (TREE_CODE (t) != INTEGER_CST
1711 && !operand_equal_p (t, curr->op, 0))
1713 enum tree_code subcode;
1714 tree newop1, newop2;
1715 if (!COMPARISON_CLASS_P (t))
1716 continue;
1717 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1718 STRIP_USELESS_TYPE_CONVERSION (newop1);
1719 STRIP_USELESS_TYPE_CONVERSION (newop2);
1720 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1721 continue;
1724 if (dump_file && (dump_flags & TDF_DETAILS))
1726 fprintf (dump_file, "Equivalence: ");
1727 print_generic_expr (dump_file, curr->op, 0);
1728 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1729 print_generic_expr (dump_file, oe->op, 0);
1730 fprintf (dump_file, " -> ");
1731 print_generic_expr (dump_file, t, 0);
1732 fprintf (dump_file, "\n");
1735 /* Now we can delete oe, as it has been subsumed by the new combined
1736 expression t. */
1737 ops->ordered_remove (i);
1738 reassociate_stats.ops_eliminated ++;
1740 /* If t is the same as curr->op, we're done. Otherwise we must
1741 replace curr->op with t. Special case is if we got a constant
1742 back, in which case we add it to the end instead of in place of
1743 the current entry. */
1744 if (TREE_CODE (t) == INTEGER_CST)
1746 ops->ordered_remove (currindex);
1747 add_to_ops_vec (ops, t);
1749 else if (!operand_equal_p (t, curr->op, 0))
1751 gimple sum;
1752 enum tree_code subcode;
1753 tree newop1;
1754 tree newop2;
1755 gcc_assert (COMPARISON_CLASS_P (t));
1756 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1757 STRIP_USELESS_TYPE_CONVERSION (newop1);
1758 STRIP_USELESS_TYPE_CONVERSION (newop2);
1759 gcc_checking_assert (is_gimple_val (newop1)
1760 && is_gimple_val (newop2));
1761 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1762 curr->op = gimple_get_lhs (sum);
1764 return true;
1767 return false;
1770 /* Perform various identities and other optimizations on the list of
1771 operand entries, stored in OPS. The tree code for the binary
1772 operation between all the operands is OPCODE. */
1774 static void
1775 optimize_ops_list (enum tree_code opcode,
1776 vec<operand_entry_t> *ops)
1778 unsigned int length = ops->length ();
1779 unsigned int i;
1780 operand_entry_t oe;
1781 operand_entry_t oelast = NULL;
1782 bool iterate = false;
1784 if (length == 1)
1785 return;
1787 oelast = ops->last ();
1789 /* If the last two are constants, pop the constants off, merge them
1790 and try the next two. */
1791 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1793 operand_entry_t oelm1 = (*ops)[length - 2];
1795 if (oelm1->rank == 0
1796 && is_gimple_min_invariant (oelm1->op)
1797 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1798 TREE_TYPE (oelast->op)))
1800 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1801 oelm1->op, oelast->op);
1803 if (folded && is_gimple_min_invariant (folded))
1805 if (dump_file && (dump_flags & TDF_DETAILS))
1806 fprintf (dump_file, "Merging constants\n");
1808 ops->pop ();
1809 ops->pop ();
1811 add_to_ops_vec (ops, folded);
1812 reassociate_stats.constants_eliminated++;
1814 optimize_ops_list (opcode, ops);
1815 return;
1820 eliminate_using_constants (opcode, ops);
1821 oelast = NULL;
1823 for (i = 0; ops->iterate (i, &oe);)
1825 bool done = false;
1827 if (eliminate_not_pairs (opcode, ops, i, oe))
1828 return;
1829 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1830 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1831 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1833 if (done)
1834 return;
1835 iterate = true;
1836 oelast = NULL;
1837 continue;
1839 oelast = oe;
1840 i++;
1843 length = ops->length ();
1844 oelast = ops->last ();
1846 if (iterate)
1847 optimize_ops_list (opcode, ops);
1850 /* The following functions are subroutines to optimize_range_tests and allow
1851 it to try to change a logical combination of comparisons into a range
1852 test.
1854 For example, both
1855 X == 2 || X == 5 || X == 3 || X == 4
1857 X >= 2 && X <= 5
1858 are converted to
1859 (unsigned) (X - 2) <= 3
1861 For more information see comments above fold_test_range in fold-const.c,
1862 this implementation is for GIMPLE. */
1864 struct range_entry
1866 tree exp;
1867 tree low;
1868 tree high;
1869 bool in_p;
1870 bool strict_overflow_p;
1871 unsigned int idx, next;
1874 /* This is similar to make_range in fold-const.c, but on top of
1875 GIMPLE instead of trees. If EXP is non-NULL, it should be
1876 an SSA_NAME and STMT argument is ignored, otherwise STMT
1877 argument should be a GIMPLE_COND. */
1879 static void
1880 init_range_entry (struct range_entry *r, tree exp, gimple stmt)
1882 int in_p;
1883 tree low, high;
1884 bool is_bool, strict_overflow_p;
1886 r->exp = NULL_TREE;
1887 r->in_p = false;
1888 r->strict_overflow_p = false;
1889 r->low = NULL_TREE;
1890 r->high = NULL_TREE;
1891 if (exp != NULL_TREE
1892 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1893 return;
1895 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1896 and see if we can refine the range. Some of the cases below may not
1897 happen, but it doesn't seem worth worrying about this. We "continue"
1898 the outer loop when we've changed something; otherwise we "break"
1899 the switch, which will "break" the while. */
1900 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1901 high = low;
1902 in_p = 0;
1903 strict_overflow_p = false;
1904 is_bool = false;
1905 if (exp == NULL_TREE)
1906 is_bool = true;
1907 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1909 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1910 is_bool = true;
1911 else
1912 return;
1914 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
1915 is_bool = true;
1917 while (1)
1919 enum tree_code code;
1920 tree arg0, arg1, exp_type;
1921 tree nexp;
1922 location_t loc;
1924 if (exp != NULL_TREE)
1926 if (TREE_CODE (exp) != SSA_NAME
1927 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
1928 break;
1930 stmt = SSA_NAME_DEF_STMT (exp);
1931 if (!is_gimple_assign (stmt))
1932 break;
1934 code = gimple_assign_rhs_code (stmt);
1935 arg0 = gimple_assign_rhs1 (stmt);
1936 arg1 = gimple_assign_rhs2 (stmt);
1937 exp_type = TREE_TYPE (exp);
1939 else
1941 code = gimple_cond_code (stmt);
1942 arg0 = gimple_cond_lhs (stmt);
1943 arg1 = gimple_cond_rhs (stmt);
1944 exp_type = boolean_type_node;
1947 if (TREE_CODE (arg0) != SSA_NAME)
1948 break;
1949 loc = gimple_location (stmt);
1950 switch (code)
1952 case BIT_NOT_EXPR:
1953 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
1954 /* Ensure the range is either +[-,0], +[0,0],
1955 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
1956 -[1,1]. If it is e.g. +[-,-] or -[-,-]
1957 or similar expression of unconditional true or
1958 false, it should not be negated. */
1959 && ((high && integer_zerop (high))
1960 || (low && integer_onep (low))))
1962 in_p = !in_p;
1963 exp = arg0;
1964 continue;
1966 break;
1967 case SSA_NAME:
1968 exp = arg0;
1969 continue;
1970 CASE_CONVERT:
1971 if (is_bool)
1972 goto do_default;
1973 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
1975 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
1976 is_bool = true;
1977 else
1978 return;
1980 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
1981 is_bool = true;
1982 goto do_default;
1983 case EQ_EXPR:
1984 case NE_EXPR:
1985 case LT_EXPR:
1986 case LE_EXPR:
1987 case GE_EXPR:
1988 case GT_EXPR:
1989 is_bool = true;
1990 /* FALLTHRU */
1991 default:
1992 if (!is_bool)
1993 return;
1994 do_default:
1995 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
1996 &low, &high, &in_p,
1997 &strict_overflow_p);
1998 if (nexp != NULL_TREE)
2000 exp = nexp;
2001 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2002 continue;
2004 break;
2006 break;
2008 if (is_bool)
2010 r->exp = exp;
2011 r->in_p = in_p;
2012 r->low = low;
2013 r->high = high;
2014 r->strict_overflow_p = strict_overflow_p;
2018 /* Comparison function for qsort. Sort entries
2019 without SSA_NAME exp first, then with SSA_NAMEs sorted
2020 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2021 by increasing ->low and if ->low is the same, by increasing
2022 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2023 maximum. */
2025 static int
2026 range_entry_cmp (const void *a, const void *b)
2028 const struct range_entry *p = (const struct range_entry *) a;
2029 const struct range_entry *q = (const struct range_entry *) b;
2031 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2033 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2035 /* Group range_entries for the same SSA_NAME together. */
2036 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2037 return -1;
2038 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2039 return 1;
2040 /* If ->low is different, NULL low goes first, then by
2041 ascending low. */
2042 if (p->low != NULL_TREE)
2044 if (q->low != NULL_TREE)
2046 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2047 p->low, q->low);
2048 if (tem && integer_onep (tem))
2049 return -1;
2050 tem = fold_binary (GT_EXPR, boolean_type_node,
2051 p->low, q->low);
2052 if (tem && integer_onep (tem))
2053 return 1;
2055 else
2056 return 1;
2058 else if (q->low != NULL_TREE)
2059 return -1;
2060 /* If ->high is different, NULL high goes last, before that by
2061 ascending high. */
2062 if (p->high != NULL_TREE)
2064 if (q->high != NULL_TREE)
2066 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2067 p->high, q->high);
2068 if (tem && integer_onep (tem))
2069 return -1;
2070 tem = fold_binary (GT_EXPR, boolean_type_node,
2071 p->high, q->high);
2072 if (tem && integer_onep (tem))
2073 return 1;
2075 else
2076 return -1;
2078 else if (q->high != NULL_TREE)
2079 return 1;
2080 /* If both ranges are the same, sort below by ascending idx. */
2082 else
2083 return 1;
2085 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2086 return -1;
2088 if (p->idx < q->idx)
2089 return -1;
2090 else
2092 gcc_checking_assert (p->idx > q->idx);
2093 return 1;
2097 /* Helper routine of optimize_range_test.
2098 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2099 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2100 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2101 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2102 an array of COUNT pointers to other ranges. Return
2103 true if the range merge has been successful.
2104 If OPCODE is ERROR_MARK, this is called from within
2105 maybe_optimize_range_tests and is performing inter-bb range optimization.
2106 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2107 oe->rank. */
2109 static bool
2110 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2111 struct range_entry **otherrangep,
2112 unsigned int count, enum tree_code opcode,
2113 vec<operand_entry_t> *ops, tree exp, gimple_seq seq,
2114 bool in_p, tree low, tree high, bool strict_overflow_p)
2116 operand_entry_t oe = (*ops)[range->idx];
2117 tree op = oe->op;
2118 gimple stmt = op ? SSA_NAME_DEF_STMT (op) :
2119 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2120 location_t loc = gimple_location (stmt);
2121 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2122 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2123 in_p, low, high);
2124 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2125 gimple_stmt_iterator gsi;
2126 unsigned int i;
2128 if (tem == NULL_TREE)
2129 return false;
2131 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2132 warning_at (loc, OPT_Wstrict_overflow,
2133 "assuming signed overflow does not occur "
2134 "when simplifying range test");
2136 if (dump_file && (dump_flags & TDF_DETAILS))
2138 struct range_entry *r;
2139 fprintf (dump_file, "Optimizing range tests ");
2140 print_generic_expr (dump_file, range->exp, 0);
2141 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2142 print_generic_expr (dump_file, range->low, 0);
2143 fprintf (dump_file, ", ");
2144 print_generic_expr (dump_file, range->high, 0);
2145 fprintf (dump_file, "]");
2146 for (i = 0; i < count; i++)
2148 if (otherrange)
2149 r = otherrange + i;
2150 else
2151 r = otherrangep[i];
2152 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2153 print_generic_expr (dump_file, r->low, 0);
2154 fprintf (dump_file, ", ");
2155 print_generic_expr (dump_file, r->high, 0);
2156 fprintf (dump_file, "]");
2158 fprintf (dump_file, "\n into ");
2159 print_generic_expr (dump_file, tem, 0);
2160 fprintf (dump_file, "\n");
2163 if (opcode == BIT_IOR_EXPR
2164 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2165 tem = invert_truthvalue_loc (loc, tem);
2167 tem = fold_convert_loc (loc, optype, tem);
2168 gsi = gsi_for_stmt (stmt);
2169 /* In rare cases range->exp can be equal to lhs of stmt.
2170 In that case we have to insert after the stmt rather then before
2171 it. */
2172 if (op == range->exp)
2174 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2175 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2176 GSI_CONTINUE_LINKING);
2178 else
2180 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2181 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2182 GSI_SAME_STMT);
2183 gsi_prev (&gsi);
2185 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2186 if (gimple_uid (gsi_stmt (gsi)))
2187 break;
2188 else
2189 gimple_set_uid (gsi_stmt (gsi), gimple_uid (stmt));
2191 oe->op = tem;
2192 range->exp = exp;
2193 range->low = low;
2194 range->high = high;
2195 range->in_p = in_p;
2196 range->strict_overflow_p = false;
2198 for (i = 0; i < count; i++)
2200 if (otherrange)
2201 range = otherrange + i;
2202 else
2203 range = otherrangep[i];
2204 oe = (*ops)[range->idx];
2205 /* Now change all the other range test immediate uses, so that
2206 those tests will be optimized away. */
2207 if (opcode == ERROR_MARK)
2209 if (oe->op)
2210 oe->op = build_int_cst (TREE_TYPE (oe->op),
2211 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2212 else
2213 oe->op = (oe->rank == BIT_IOR_EXPR
2214 ? boolean_false_node : boolean_true_node);
2216 else
2217 oe->op = error_mark_node;
2218 range->exp = NULL_TREE;
2220 return true;
2223 /* Optimize X == CST1 || X == CST2
2224 if popcount (CST1 ^ CST2) == 1 into
2225 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2226 Similarly for ranges. E.g.
2227 X != 2 && X != 3 && X != 10 && X != 11
2228 will be transformed by the previous optimization into
2229 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2230 and this loop can transform that into
2231 !(((X & ~8) - 2U) <= 1U). */
2233 static bool
2234 optimize_range_tests_xor (enum tree_code opcode, tree type,
2235 tree lowi, tree lowj, tree highi, tree highj,
2236 vec<operand_entry_t> *ops,
2237 struct range_entry *rangei,
2238 struct range_entry *rangej)
2240 tree lowxor, highxor, tem, exp;
2241 /* Check lowi ^ lowj == highi ^ highj and
2242 popcount (lowi ^ lowj) == 1. */
2243 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2244 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2245 return false;
2246 if (!integer_pow2p (lowxor))
2247 return false;
2248 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2249 if (!tree_int_cst_equal (lowxor, highxor))
2250 return false;
2252 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2253 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2254 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2255 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2256 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2257 NULL, rangei->in_p, lowj, highj,
2258 rangei->strict_overflow_p
2259 || rangej->strict_overflow_p))
2260 return true;
2261 return false;
2264 /* Optimize X == CST1 || X == CST2
2265 if popcount (CST2 - CST1) == 1 into
2266 ((X - CST1) & ~(CST2 - CST1)) == 0.
2267 Similarly for ranges. E.g.
2268 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2269 || X == 75 || X == 45
2270 will be transformed by the previous optimization into
2271 (X - 43U) <= 3U || (X - 75U) <= 3U
2272 and this loop can transform that into
2273 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2274 static bool
2275 optimize_range_tests_diff (enum tree_code opcode, tree type,
2276 tree lowi, tree lowj, tree highi, tree highj,
2277 vec<operand_entry_t> *ops,
2278 struct range_entry *rangei,
2279 struct range_entry *rangej)
2281 tree tem1, tem2, mask;
2282 /* Check highi - lowi == highj - lowj. */
2283 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2284 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2285 return false;
2286 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2287 if (!tree_int_cst_equal (tem1, tem2))
2288 return false;
2289 /* Check popcount (lowj - lowi) == 1. */
2290 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2291 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2292 return false;
2293 if (!integer_pow2p (tem1))
2294 return false;
2296 type = unsigned_type_for (type);
2297 tem1 = fold_convert (type, tem1);
2298 tem2 = fold_convert (type, tem2);
2299 lowi = fold_convert (type, lowi);
2300 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2301 tem1 = fold_binary (MINUS_EXPR, type,
2302 fold_convert (type, rangei->exp), lowi);
2303 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2304 lowj = build_int_cst (type, 0);
2305 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2306 NULL, rangei->in_p, lowj, tem2,
2307 rangei->strict_overflow_p
2308 || rangej->strict_overflow_p))
2309 return true;
2310 return false;
2313 /* It does some common checks for function optimize_range_tests_xor and
2314 optimize_range_tests_diff.
2315 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2316 Else it calls optimize_range_tests_diff. */
2318 static bool
2319 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2320 bool optimize_xor, vec<operand_entry_t> *ops,
2321 struct range_entry *ranges)
2323 int i, j;
2324 bool any_changes = false;
2325 for (i = first; i < length; i++)
2327 tree lowi, highi, lowj, highj, type, tem;
2329 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2330 continue;
2331 type = TREE_TYPE (ranges[i].exp);
2332 if (!INTEGRAL_TYPE_P (type))
2333 continue;
2334 lowi = ranges[i].low;
2335 if (lowi == NULL_TREE)
2336 lowi = TYPE_MIN_VALUE (type);
2337 highi = ranges[i].high;
2338 if (highi == NULL_TREE)
2339 continue;
2340 for (j = i + 1; j < length && j < i + 64; j++)
2342 bool changes;
2343 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2344 continue;
2345 lowj = ranges[j].low;
2346 if (lowj == NULL_TREE)
2347 continue;
2348 highj = ranges[j].high;
2349 if (highj == NULL_TREE)
2350 highj = TYPE_MAX_VALUE (type);
2351 /* Check lowj > highi. */
2352 tem = fold_binary (GT_EXPR, boolean_type_node,
2353 lowj, highi);
2354 if (tem == NULL_TREE || !integer_onep (tem))
2355 continue;
2356 if (optimize_xor)
2357 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2358 highi, highj, ops,
2359 ranges + i, ranges + j);
2360 else
2361 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2362 highi, highj, ops,
2363 ranges + i, ranges + j);
2364 if (changes)
2366 any_changes = true;
2367 break;
2371 return any_changes;
2374 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2375 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2376 EXP on success, NULL otherwise. */
2378 static tree
2379 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2380 wide_int *mask, tree *totallowp)
2382 tree tem = int_const_binop (MINUS_EXPR, high, low);
2383 if (tem == NULL_TREE
2384 || TREE_CODE (tem) != INTEGER_CST
2385 || TREE_OVERFLOW (tem)
2386 || tree_int_cst_sgn (tem) == -1
2387 || compare_tree_int (tem, prec) != -1)
2388 return NULL_TREE;
2390 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2391 *mask = wi::shifted_mask (0, max, false, prec);
2392 if (TREE_CODE (exp) == BIT_AND_EXPR
2393 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2395 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2396 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2397 if (wi::popcount (msk) == 1
2398 && wi::ltu_p (msk, prec - max))
2400 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2401 max += msk.to_uhwi ();
2402 exp = TREE_OPERAND (exp, 0);
2403 if (integer_zerop (low)
2404 && TREE_CODE (exp) == PLUS_EXPR
2405 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2407 widest_int bias
2408 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2409 TYPE_PRECISION (TREE_TYPE (low))));
2410 tree tbias = wide_int_to_tree (TREE_TYPE (low), bias);
2411 if (totallowp)
2413 *totallowp = tbias;
2414 exp = TREE_OPERAND (exp, 0);
2415 STRIP_NOPS (exp);
2416 return exp;
2418 else if (!tree_int_cst_lt (totallow, tbias))
2419 return NULL_TREE;
2420 bias -= wi::to_widest (totallow);
2421 if (wi::ges_p (bias, 0) && wi::lts_p (bias, prec - max))
2423 *mask = wi::lshift (*mask, bias);
2424 exp = TREE_OPERAND (exp, 0);
2425 STRIP_NOPS (exp);
2426 return exp;
2431 if (totallowp)
2432 return exp;
2433 if (!tree_int_cst_lt (totallow, low))
2434 return exp;
2435 tem = int_const_binop (MINUS_EXPR, low, totallow);
2436 if (tem == NULL_TREE
2437 || TREE_CODE (tem) != INTEGER_CST
2438 || TREE_OVERFLOW (tem)
2439 || compare_tree_int (tem, prec - max) == 1)
2440 return NULL_TREE;
2442 *mask = wi::lshift (*mask, wi::to_widest (tem));
2443 return exp;
2446 /* Attempt to optimize small range tests using bit test.
2447 E.g.
2448 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2449 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2450 has been by earlier optimizations optimized into:
2451 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2452 As all the 43 through 82 range is less than 64 numbers,
2453 for 64-bit word targets optimize that into:
2454 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2456 static bool
2457 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2458 vec<operand_entry_t> *ops,
2459 struct range_entry *ranges)
2461 int i, j;
2462 bool any_changes = false;
2463 int prec = GET_MODE_BITSIZE (word_mode);
2464 auto_vec<struct range_entry *, 64> candidates;
2466 for (i = first; i < length - 2; i++)
2468 tree lowi, highi, lowj, highj, type;
2470 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2471 continue;
2472 type = TREE_TYPE (ranges[i].exp);
2473 if (!INTEGRAL_TYPE_P (type))
2474 continue;
2475 lowi = ranges[i].low;
2476 if (lowi == NULL_TREE)
2477 lowi = TYPE_MIN_VALUE (type);
2478 highi = ranges[i].high;
2479 if (highi == NULL_TREE)
2480 continue;
2481 wide_int mask;
2482 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2483 highi, &mask, &lowi);
2484 if (exp == NULL_TREE)
2485 continue;
2486 bool strict_overflow_p = ranges[i].strict_overflow_p;
2487 candidates.truncate (0);
2488 int end = MIN (i + 64, length);
2489 for (j = i + 1; j < end; j++)
2491 tree exp2;
2492 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2493 continue;
2494 if (ranges[j].exp == exp)
2496 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2498 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2499 if (exp2 == exp)
2501 else if (TREE_CODE (exp2) == PLUS_EXPR)
2503 exp2 = TREE_OPERAND (exp2, 0);
2504 STRIP_NOPS (exp2);
2505 if (exp2 != exp)
2506 continue;
2508 else
2509 continue;
2511 else
2512 continue;
2513 lowj = ranges[j].low;
2514 if (lowj == NULL_TREE)
2515 continue;
2516 highj = ranges[j].high;
2517 if (highj == NULL_TREE)
2518 highj = TYPE_MAX_VALUE (type);
2519 wide_int mask2;
2520 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2521 highj, &mask2, NULL);
2522 if (exp2 != exp)
2523 continue;
2524 mask |= mask2;
2525 strict_overflow_p |= ranges[j].strict_overflow_p;
2526 candidates.safe_push (&ranges[j]);
2529 /* If we need otherwise 3 or more comparisons, use a bit test. */
2530 if (candidates.length () >= 2)
2532 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2533 wi::to_widest (lowi)
2534 + prec - 1 - wi::clz (mask));
2535 operand_entry_t oe = (*ops)[ranges[i].idx];
2536 tree op = oe->op;
2537 gimple stmt = op ? SSA_NAME_DEF_STMT (op)
2538 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2539 location_t loc = gimple_location (stmt);
2540 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2542 /* See if it isn't cheaper to pretend the minimum value of the
2543 range is 0, if maximum value is small enough.
2544 We can avoid then subtraction of the minimum value, but the
2545 mask constant could be perhaps more expensive. */
2546 if (compare_tree_int (lowi, 0) > 0
2547 && compare_tree_int (high, prec) < 0)
2549 int cost_diff;
2550 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2551 rtx reg = gen_raw_REG (word_mode, 10000);
2552 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2553 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2554 GEN_INT (-m)), speed_p);
2555 rtx r = immed_wide_int_const (mask, word_mode);
2556 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2557 speed_p);
2558 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2559 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2560 speed_p);
2561 if (cost_diff > 0)
2563 mask = wi::lshift (mask, m);
2564 lowi = build_zero_cst (TREE_TYPE (lowi));
2568 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2569 false, lowi, high);
2570 if (tem == NULL_TREE || is_gimple_val (tem))
2571 continue;
2572 tree etype = unsigned_type_for (TREE_TYPE (exp));
2573 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2574 fold_convert_loc (loc, etype, exp),
2575 fold_convert_loc (loc, etype, lowi));
2576 exp = fold_convert_loc (loc, integer_type_node, exp);
2577 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2578 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2579 build_int_cst (word_type, 1), exp);
2580 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2581 wide_int_to_tree (word_type, mask));
2582 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2583 build_zero_cst (word_type));
2584 if (is_gimple_val (exp))
2585 continue;
2587 /* The shift might have undefined behavior if TEM is true,
2588 but reassociate_bb isn't prepared to have basic blocks
2589 split when it is running. So, temporarily emit a code
2590 with BIT_IOR_EXPR instead of &&, and fix it up in
2591 branch_fixup. */
2592 gimple_seq seq;
2593 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2594 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2595 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2596 gimple_seq seq2;
2597 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2598 gimple_seq_add_seq_without_update (&seq, seq2);
2599 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2600 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2601 gimple g = gimple_build_assign (make_ssa_name (optype),
2602 BIT_IOR_EXPR, tem, exp);
2603 gimple_set_location (g, loc);
2604 gimple_seq_add_stmt_without_update (&seq, g);
2605 exp = gimple_assign_lhs (g);
2606 tree val = build_zero_cst (optype);
2607 if (update_range_test (&ranges[i], NULL, candidates.address (),
2608 candidates.length (), opcode, ops, exp,
2609 seq, false, val, val, strict_overflow_p))
2611 any_changes = true;
2612 reassoc_branch_fixups.safe_push (tem);
2614 else
2615 gimple_seq_discard (seq);
2618 return any_changes;
2621 /* Optimize range tests, similarly how fold_range_test optimizes
2622 it on trees. The tree code for the binary
2623 operation between all the operands is OPCODE.
2624 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2625 maybe_optimize_range_tests for inter-bb range optimization.
2626 In that case if oe->op is NULL, oe->id is bb->index whose
2627 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2628 the actual opcode. */
2630 static bool
2631 optimize_range_tests (enum tree_code opcode,
2632 vec<operand_entry_t> *ops)
2634 unsigned int length = ops->length (), i, j, first;
2635 operand_entry_t oe;
2636 struct range_entry *ranges;
2637 bool any_changes = false;
2639 if (length == 1)
2640 return false;
2642 ranges = XNEWVEC (struct range_entry, length);
2643 for (i = 0; i < length; i++)
2645 oe = (*ops)[i];
2646 ranges[i].idx = i;
2647 init_range_entry (ranges + i, oe->op,
2648 oe->op ? NULL :
2649 last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2650 /* For | invert it now, we will invert it again before emitting
2651 the optimized expression. */
2652 if (opcode == BIT_IOR_EXPR
2653 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2654 ranges[i].in_p = !ranges[i].in_p;
2657 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2658 for (i = 0; i < length; i++)
2659 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2660 break;
2662 /* Try to merge ranges. */
2663 for (first = i; i < length; i++)
2665 tree low = ranges[i].low;
2666 tree high = ranges[i].high;
2667 int in_p = ranges[i].in_p;
2668 bool strict_overflow_p = ranges[i].strict_overflow_p;
2669 int update_fail_count = 0;
2671 for (j = i + 1; j < length; j++)
2673 if (ranges[i].exp != ranges[j].exp)
2674 break;
2675 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2676 ranges[j].in_p, ranges[j].low, ranges[j].high))
2677 break;
2678 strict_overflow_p |= ranges[j].strict_overflow_p;
2681 if (j == i + 1)
2682 continue;
2684 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2685 opcode, ops, ranges[i].exp, NULL, in_p,
2686 low, high, strict_overflow_p))
2688 i = j - 1;
2689 any_changes = true;
2691 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2692 while update_range_test would fail. */
2693 else if (update_fail_count == 64)
2694 i = j - 1;
2695 else
2696 ++update_fail_count;
2699 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2700 ops, ranges);
2702 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2703 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2704 ops, ranges);
2705 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2706 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2707 ops, ranges);
2709 if (any_changes && opcode != ERROR_MARK)
2711 j = 0;
2712 FOR_EACH_VEC_ELT (*ops, i, oe)
2714 if (oe->op == error_mark_node)
2715 continue;
2716 else if (i != j)
2717 (*ops)[j] = oe;
2718 j++;
2720 ops->truncate (j);
2723 XDELETEVEC (ranges);
2724 return any_changes;
2727 /* Return true if STMT is a cast like:
2728 <bb N>:
2730 _123 = (int) _234;
2732 <bb M>:
2733 # _345 = PHI <_123(N), 1(...), 1(...)>
2734 where _234 has bool type, _123 has single use and
2735 bb N has a single successor M. This is commonly used in
2736 the last block of a range test. */
2738 static bool
2739 final_range_test_p (gimple stmt)
2741 basic_block bb, rhs_bb;
2742 edge e;
2743 tree lhs, rhs;
2744 use_operand_p use_p;
2745 gimple use_stmt;
2747 if (!gimple_assign_cast_p (stmt))
2748 return false;
2749 bb = gimple_bb (stmt);
2750 if (!single_succ_p (bb))
2751 return false;
2752 e = single_succ_edge (bb);
2753 if (e->flags & EDGE_COMPLEX)
2754 return false;
2756 lhs = gimple_assign_lhs (stmt);
2757 rhs = gimple_assign_rhs1 (stmt);
2758 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2759 || TREE_CODE (rhs) != SSA_NAME
2760 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
2761 return false;
2763 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
2764 if (!single_imm_use (lhs, &use_p, &use_stmt))
2765 return false;
2767 if (gimple_code (use_stmt) != GIMPLE_PHI
2768 || gimple_bb (use_stmt) != e->dest)
2769 return false;
2771 /* And that the rhs is defined in the same loop. */
2772 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
2773 if (rhs_bb == NULL
2774 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
2775 return false;
2777 return true;
2780 /* Return true if BB is suitable basic block for inter-bb range test
2781 optimization. If BACKWARD is true, BB should be the only predecessor
2782 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
2783 or compared with to find a common basic block to which all conditions
2784 branch to if true resp. false. If BACKWARD is false, TEST_BB should
2785 be the only predecessor of BB. */
2787 static bool
2788 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
2789 bool backward)
2791 edge_iterator ei, ei2;
2792 edge e, e2;
2793 gimple stmt;
2794 gphi_iterator gsi;
2795 bool other_edge_seen = false;
2796 bool is_cond;
2798 if (test_bb == bb)
2799 return false;
2800 /* Check last stmt first. */
2801 stmt = last_stmt (bb);
2802 if (stmt == NULL
2803 || (gimple_code (stmt) != GIMPLE_COND
2804 && (backward || !final_range_test_p (stmt)))
2805 || gimple_visited_p (stmt)
2806 || stmt_could_throw_p (stmt)
2807 || *other_bb == bb)
2808 return false;
2809 is_cond = gimple_code (stmt) == GIMPLE_COND;
2810 if (is_cond)
2812 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
2813 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
2814 to *OTHER_BB (if not set yet, try to find it out). */
2815 if (EDGE_COUNT (bb->succs) != 2)
2816 return false;
2817 FOR_EACH_EDGE (e, ei, bb->succs)
2819 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2820 return false;
2821 if (e->dest == test_bb)
2823 if (backward)
2824 continue;
2825 else
2826 return false;
2828 if (e->dest == bb)
2829 return false;
2830 if (*other_bb == NULL)
2832 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
2833 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
2834 return false;
2835 else if (e->dest == e2->dest)
2836 *other_bb = e->dest;
2837 if (*other_bb == NULL)
2838 return false;
2840 if (e->dest == *other_bb)
2841 other_edge_seen = true;
2842 else if (backward)
2843 return false;
2845 if (*other_bb == NULL || !other_edge_seen)
2846 return false;
2848 else if (single_succ (bb) != *other_bb)
2849 return false;
2851 /* Now check all PHIs of *OTHER_BB. */
2852 e = find_edge (bb, *other_bb);
2853 e2 = find_edge (test_bb, *other_bb);
2854 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2856 gphi *phi = gsi.phi ();
2857 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
2858 corresponding to BB and TEST_BB predecessor must be the same. */
2859 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
2860 gimple_phi_arg_def (phi, e2->dest_idx), 0))
2862 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
2863 one of the PHIs should have the lhs of the last stmt in
2864 that block as PHI arg and that PHI should have 0 or 1
2865 corresponding to it in all other range test basic blocks
2866 considered. */
2867 if (!is_cond)
2869 if (gimple_phi_arg_def (phi, e->dest_idx)
2870 == gimple_assign_lhs (stmt)
2871 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
2872 || integer_onep (gimple_phi_arg_def (phi,
2873 e2->dest_idx))))
2874 continue;
2876 else
2878 gimple test_last = last_stmt (test_bb);
2879 if (gimple_code (test_last) != GIMPLE_COND
2880 && gimple_phi_arg_def (phi, e2->dest_idx)
2881 == gimple_assign_lhs (test_last)
2882 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
2883 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
2884 continue;
2887 return false;
2890 return true;
2893 /* Return true if BB doesn't have side-effects that would disallow
2894 range test optimization, all SSA_NAMEs set in the bb are consumed
2895 in the bb and there are no PHIs. */
2897 static bool
2898 no_side_effect_bb (basic_block bb)
2900 gimple_stmt_iterator gsi;
2901 gimple last;
2903 if (!gimple_seq_empty_p (phi_nodes (bb)))
2904 return false;
2905 last = last_stmt (bb);
2906 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2908 gimple stmt = gsi_stmt (gsi);
2909 tree lhs;
2910 imm_use_iterator imm_iter;
2911 use_operand_p use_p;
2913 if (is_gimple_debug (stmt))
2914 continue;
2915 if (gimple_has_side_effects (stmt))
2916 return false;
2917 if (stmt == last)
2918 return true;
2919 if (!is_gimple_assign (stmt))
2920 return false;
2921 lhs = gimple_assign_lhs (stmt);
2922 if (TREE_CODE (lhs) != SSA_NAME)
2923 return false;
2924 if (gimple_assign_rhs_could_trap_p (stmt))
2925 return false;
2926 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2928 gimple use_stmt = USE_STMT (use_p);
2929 if (is_gimple_debug (use_stmt))
2930 continue;
2931 if (gimple_bb (use_stmt) != bb)
2932 return false;
2935 return false;
2938 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
2939 return true and fill in *OPS recursively. */
2941 static bool
2942 get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops,
2943 struct loop *loop)
2945 gimple stmt = SSA_NAME_DEF_STMT (var);
2946 tree rhs[2];
2947 int i;
2949 if (!is_reassociable_op (stmt, code, loop))
2950 return false;
2952 rhs[0] = gimple_assign_rhs1 (stmt);
2953 rhs[1] = gimple_assign_rhs2 (stmt);
2954 gimple_set_visited (stmt, true);
2955 for (i = 0; i < 2; i++)
2956 if (TREE_CODE (rhs[i]) == SSA_NAME
2957 && !get_ops (rhs[i], code, ops, loop)
2958 && has_single_use (rhs[i]))
2960 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
2962 oe->op = rhs[i];
2963 oe->rank = code;
2964 oe->id = 0;
2965 oe->count = 1;
2966 ops->safe_push (oe);
2968 return true;
2971 /* Find the ops that were added by get_ops starting from VAR, see if
2972 they were changed during update_range_test and if yes, create new
2973 stmts. */
2975 static tree
2976 update_ops (tree var, enum tree_code code, vec<operand_entry_t> ops,
2977 unsigned int *pidx, struct loop *loop)
2979 gimple stmt = SSA_NAME_DEF_STMT (var);
2980 tree rhs[4];
2981 int i;
2983 if (!is_reassociable_op (stmt, code, loop))
2984 return NULL;
2986 rhs[0] = gimple_assign_rhs1 (stmt);
2987 rhs[1] = gimple_assign_rhs2 (stmt);
2988 rhs[2] = rhs[0];
2989 rhs[3] = rhs[1];
2990 for (i = 0; i < 2; i++)
2991 if (TREE_CODE (rhs[i]) == SSA_NAME)
2993 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
2994 if (rhs[2 + i] == NULL_TREE)
2996 if (has_single_use (rhs[i]))
2997 rhs[2 + i] = ops[(*pidx)++]->op;
2998 else
2999 rhs[2 + i] = rhs[i];
3002 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3003 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3005 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3006 var = make_ssa_name (TREE_TYPE (var));
3007 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3008 rhs[2], rhs[3]);
3009 gimple_set_uid (g, gimple_uid (stmt));
3010 gimple_set_visited (g, true);
3011 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3013 return var;
3016 /* Structure to track the initial value passed to get_ops and
3017 the range in the ops vector for each basic block. */
3019 struct inter_bb_range_test_entry
3021 tree op;
3022 unsigned int first_idx, last_idx;
3025 /* Inter-bb range test optimization. */
3027 static void
3028 maybe_optimize_range_tests (gimple stmt)
3030 basic_block first_bb = gimple_bb (stmt);
3031 basic_block last_bb = first_bb;
3032 basic_block other_bb = NULL;
3033 basic_block bb;
3034 edge_iterator ei;
3035 edge e;
3036 auto_vec<operand_entry_t> ops;
3037 auto_vec<inter_bb_range_test_entry> bbinfo;
3038 bool any_changes = false;
3040 /* Consider only basic blocks that end with GIMPLE_COND or
3041 a cast statement satisfying final_range_test_p. All
3042 but the last bb in the first_bb .. last_bb range
3043 should end with GIMPLE_COND. */
3044 if (gimple_code (stmt) == GIMPLE_COND)
3046 if (EDGE_COUNT (first_bb->succs) != 2)
3047 return;
3049 else if (final_range_test_p (stmt))
3050 other_bb = single_succ (first_bb);
3051 else
3052 return;
3054 if (stmt_could_throw_p (stmt))
3055 return;
3057 /* As relative ordering of post-dominator sons isn't fixed,
3058 maybe_optimize_range_tests can be called first on any
3059 bb in the range we want to optimize. So, start searching
3060 backwards, if first_bb can be set to a predecessor. */
3061 while (single_pred_p (first_bb))
3063 basic_block pred_bb = single_pred (first_bb);
3064 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3065 break;
3066 if (!no_side_effect_bb (first_bb))
3067 break;
3068 first_bb = pred_bb;
3070 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3071 Before starting forward search in last_bb successors, find
3072 out the other_bb. */
3073 if (first_bb == last_bb)
3075 other_bb = NULL;
3076 /* As non-GIMPLE_COND last stmt always terminates the range,
3077 if forward search didn't discover anything, just give up. */
3078 if (gimple_code (stmt) != GIMPLE_COND)
3079 return;
3080 /* Look at both successors. Either it ends with a GIMPLE_COND
3081 and satisfies suitable_cond_bb, or ends with a cast and
3082 other_bb is that cast's successor. */
3083 FOR_EACH_EDGE (e, ei, first_bb->succs)
3084 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3085 || e->dest == first_bb)
3086 return;
3087 else if (single_pred_p (e->dest))
3089 stmt = last_stmt (e->dest);
3090 if (stmt
3091 && gimple_code (stmt) == GIMPLE_COND
3092 && EDGE_COUNT (e->dest->succs) == 2)
3094 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3095 break;
3096 else
3097 other_bb = NULL;
3099 else if (stmt
3100 && final_range_test_p (stmt)
3101 && find_edge (first_bb, single_succ (e->dest)))
3103 other_bb = single_succ (e->dest);
3104 if (other_bb == first_bb)
3105 other_bb = NULL;
3108 if (other_bb == NULL)
3109 return;
3111 /* Now do the forward search, moving last_bb to successor bbs
3112 that aren't other_bb. */
3113 while (EDGE_COUNT (last_bb->succs) == 2)
3115 FOR_EACH_EDGE (e, ei, last_bb->succs)
3116 if (e->dest != other_bb)
3117 break;
3118 if (e == NULL)
3119 break;
3120 if (!single_pred_p (e->dest))
3121 break;
3122 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3123 break;
3124 if (!no_side_effect_bb (e->dest))
3125 break;
3126 last_bb = e->dest;
3128 if (first_bb == last_bb)
3129 return;
3130 /* Here basic blocks first_bb through last_bb's predecessor
3131 end with GIMPLE_COND, all of them have one of the edges to
3132 other_bb and another to another block in the range,
3133 all blocks except first_bb don't have side-effects and
3134 last_bb ends with either GIMPLE_COND, or cast satisfying
3135 final_range_test_p. */
3136 for (bb = last_bb; ; bb = single_pred (bb))
3138 enum tree_code code;
3139 tree lhs, rhs;
3140 inter_bb_range_test_entry bb_ent;
3142 bb_ent.op = NULL_TREE;
3143 bb_ent.first_idx = ops.length ();
3144 bb_ent.last_idx = bb_ent.first_idx;
3145 e = find_edge (bb, other_bb);
3146 stmt = last_stmt (bb);
3147 gimple_set_visited (stmt, true);
3148 if (gimple_code (stmt) != GIMPLE_COND)
3150 use_operand_p use_p;
3151 gimple phi;
3152 edge e2;
3153 unsigned int d;
3155 lhs = gimple_assign_lhs (stmt);
3156 rhs = gimple_assign_rhs1 (stmt);
3157 gcc_assert (bb == last_bb);
3159 /* stmt is
3160 _123 = (int) _234;
3162 followed by:
3163 <bb M>:
3164 # _345 = PHI <_123(N), 1(...), 1(...)>
3166 or 0 instead of 1. If it is 0, the _234
3167 range test is anded together with all the
3168 other range tests, if it is 1, it is ored with
3169 them. */
3170 single_imm_use (lhs, &use_p, &phi);
3171 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3172 e2 = find_edge (first_bb, other_bb);
3173 d = e2->dest_idx;
3174 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3175 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3176 code = BIT_AND_EXPR;
3177 else
3179 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3180 code = BIT_IOR_EXPR;
3183 /* If _234 SSA_NAME_DEF_STMT is
3184 _234 = _567 | _789;
3185 (or &, corresponding to 1/0 in the phi arguments,
3186 push into ops the individual range test arguments
3187 of the bitwise or resp. and, recursively. */
3188 if (!get_ops (rhs, code, &ops,
3189 loop_containing_stmt (stmt))
3190 && has_single_use (rhs))
3192 /* Otherwise, push the _234 range test itself. */
3193 operand_entry_t oe
3194 = (operand_entry_t) pool_alloc (operand_entry_pool);
3196 oe->op = rhs;
3197 oe->rank = code;
3198 oe->id = 0;
3199 oe->count = 1;
3200 ops.safe_push (oe);
3201 bb_ent.last_idx++;
3203 else
3204 bb_ent.last_idx = ops.length ();
3205 bb_ent.op = rhs;
3206 bbinfo.safe_push (bb_ent);
3207 continue;
3209 /* Otherwise stmt is GIMPLE_COND. */
3210 code = gimple_cond_code (stmt);
3211 lhs = gimple_cond_lhs (stmt);
3212 rhs = gimple_cond_rhs (stmt);
3213 if (TREE_CODE (lhs) == SSA_NAME
3214 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3215 && ((code != EQ_EXPR && code != NE_EXPR)
3216 || rhs != boolean_false_node
3217 /* Either push into ops the individual bitwise
3218 or resp. and operands, depending on which
3219 edge is other_bb. */
3220 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3221 ^ (code == EQ_EXPR))
3222 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3223 loop_containing_stmt (stmt))))
3225 /* Or push the GIMPLE_COND stmt itself. */
3226 operand_entry_t oe
3227 = (operand_entry_t) pool_alloc (operand_entry_pool);
3229 oe->op = NULL;
3230 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3231 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3232 /* oe->op = NULL signs that there is no SSA_NAME
3233 for the range test, and oe->id instead is the
3234 basic block number, at which's end the GIMPLE_COND
3235 is. */
3236 oe->id = bb->index;
3237 oe->count = 1;
3238 ops.safe_push (oe);
3239 bb_ent.op = NULL;
3240 bb_ent.last_idx++;
3242 else if (ops.length () > bb_ent.first_idx)
3244 bb_ent.op = lhs;
3245 bb_ent.last_idx = ops.length ();
3247 bbinfo.safe_push (bb_ent);
3248 if (bb == first_bb)
3249 break;
3251 if (ops.length () > 1)
3252 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3253 if (any_changes)
3255 unsigned int idx;
3256 /* update_ops relies on has_single_use predicates returning the
3257 same values as it did during get_ops earlier. Additionally it
3258 never removes statements, only adds new ones and it should walk
3259 from the single imm use and check the predicate already before
3260 making those changes.
3261 On the other side, the handling of GIMPLE_COND directly can turn
3262 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3263 it needs to be done in a separate loop afterwards. */
3264 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3266 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3267 && bbinfo[idx].op != NULL_TREE)
3269 tree new_op;
3271 stmt = last_stmt (bb);
3272 new_op = update_ops (bbinfo[idx].op,
3273 (enum tree_code)
3274 ops[bbinfo[idx].first_idx]->rank,
3275 ops, &bbinfo[idx].first_idx,
3276 loop_containing_stmt (stmt));
3277 if (new_op == NULL_TREE)
3279 gcc_assert (bb == last_bb);
3280 new_op = ops[bbinfo[idx].first_idx++]->op;
3282 if (bbinfo[idx].op != new_op)
3284 imm_use_iterator iter;
3285 use_operand_p use_p;
3286 gimple use_stmt, cast_stmt = NULL;
3288 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3289 if (is_gimple_debug (use_stmt))
3290 continue;
3291 else if (gimple_code (use_stmt) == GIMPLE_COND
3292 || gimple_code (use_stmt) == GIMPLE_PHI)
3293 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3294 SET_USE (use_p, new_op);
3295 else if (gimple_assign_cast_p (use_stmt))
3296 cast_stmt = use_stmt;
3297 else
3298 gcc_unreachable ();
3299 if (cast_stmt)
3301 gcc_assert (bb == last_bb);
3302 tree lhs = gimple_assign_lhs (cast_stmt);
3303 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3304 enum tree_code rhs_code
3305 = gimple_assign_rhs_code (cast_stmt);
3306 gassign *g;
3307 if (is_gimple_min_invariant (new_op))
3309 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3310 g = gimple_build_assign (new_lhs, new_op);
3312 else
3313 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3314 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3315 gimple_set_uid (g, gimple_uid (cast_stmt));
3316 gimple_set_visited (g, true);
3317 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3318 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3319 if (is_gimple_debug (use_stmt))
3320 continue;
3321 else if (gimple_code (use_stmt) == GIMPLE_COND
3322 || gimple_code (use_stmt) == GIMPLE_PHI)
3323 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3324 SET_USE (use_p, new_lhs);
3325 else
3326 gcc_unreachable ();
3330 if (bb == first_bb)
3331 break;
3333 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3335 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3336 && bbinfo[idx].op == NULL_TREE
3337 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3339 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3340 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3341 gimple_cond_make_false (cond_stmt);
3342 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3343 gimple_cond_make_true (cond_stmt);
3344 else
3346 gimple_cond_set_code (cond_stmt, NE_EXPR);
3347 gimple_cond_set_lhs (cond_stmt,
3348 ops[bbinfo[idx].first_idx]->op);
3349 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3351 update_stmt (cond_stmt);
3353 if (bb == first_bb)
3354 break;
3359 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3360 of STMT in it's operands. This is also known as a "destructive
3361 update" operation. */
3363 static bool
3364 is_phi_for_stmt (gimple stmt, tree operand)
3366 gimple def_stmt;
3367 gphi *def_phi;
3368 tree lhs;
3369 use_operand_p arg_p;
3370 ssa_op_iter i;
3372 if (TREE_CODE (operand) != SSA_NAME)
3373 return false;
3375 lhs = gimple_assign_lhs (stmt);
3377 def_stmt = SSA_NAME_DEF_STMT (operand);
3378 def_phi = dyn_cast <gphi *> (def_stmt);
3379 if (!def_phi)
3380 return false;
3382 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3383 if (lhs == USE_FROM_PTR (arg_p))
3384 return true;
3385 return false;
3388 /* Remove def stmt of VAR if VAR has zero uses and recurse
3389 on rhs1 operand if so. */
3391 static void
3392 remove_visited_stmt_chain (tree var)
3394 gimple stmt;
3395 gimple_stmt_iterator gsi;
3397 while (1)
3399 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3400 return;
3401 stmt = SSA_NAME_DEF_STMT (var);
3402 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3404 var = gimple_assign_rhs1 (stmt);
3405 gsi = gsi_for_stmt (stmt);
3406 reassoc_remove_stmt (&gsi);
3407 release_defs (stmt);
3409 else
3410 return;
3414 /* This function checks three consequtive operands in
3415 passed operands vector OPS starting from OPINDEX and
3416 swaps two operands if it is profitable for binary operation
3417 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3419 We pair ops with the same rank if possible.
3421 The alternative we try is to see if STMT is a destructive
3422 update style statement, which is like:
3423 b = phi (a, ...)
3424 a = c + b;
3425 In that case, we want to use the destructive update form to
3426 expose the possible vectorizer sum reduction opportunity.
3427 In that case, the third operand will be the phi node. This
3428 check is not performed if STMT is null.
3430 We could, of course, try to be better as noted above, and do a
3431 lot of work to try to find these opportunities in >3 operand
3432 cases, but it is unlikely to be worth it. */
3434 static void
3435 swap_ops_for_binary_stmt (vec<operand_entry_t> ops,
3436 unsigned int opindex, gimple stmt)
3438 operand_entry_t oe1, oe2, oe3;
3440 oe1 = ops[opindex];
3441 oe2 = ops[opindex + 1];
3442 oe3 = ops[opindex + 2];
3444 if ((oe1->rank == oe2->rank
3445 && oe2->rank != oe3->rank)
3446 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3447 && !is_phi_for_stmt (stmt, oe1->op)
3448 && !is_phi_for_stmt (stmt, oe2->op)))
3450 struct operand_entry temp = *oe3;
3451 oe3->op = oe1->op;
3452 oe3->rank = oe1->rank;
3453 oe1->op = temp.op;
3454 oe1->rank= temp.rank;
3456 else if ((oe1->rank == oe3->rank
3457 && oe2->rank != oe3->rank)
3458 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3459 && !is_phi_for_stmt (stmt, oe1->op)
3460 && !is_phi_for_stmt (stmt, oe3->op)))
3462 struct operand_entry temp = *oe2;
3463 oe2->op = oe1->op;
3464 oe2->rank = oe1->rank;
3465 oe1->op = temp.op;
3466 oe1->rank = temp.rank;
3470 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3471 two definitions, otherwise return STMT. */
3473 static inline gimple
3474 find_insert_point (gimple stmt, tree rhs1, tree rhs2)
3476 if (TREE_CODE (rhs1) == SSA_NAME
3477 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3478 stmt = SSA_NAME_DEF_STMT (rhs1);
3479 if (TREE_CODE (rhs2) == SSA_NAME
3480 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3481 stmt = SSA_NAME_DEF_STMT (rhs2);
3482 return stmt;
3485 /* Recursively rewrite our linearized statements so that the operators
3486 match those in OPS[OPINDEX], putting the computation in rank
3487 order. Return new lhs. */
3489 static tree
3490 rewrite_expr_tree (gimple stmt, unsigned int opindex,
3491 vec<operand_entry_t> ops, bool changed)
3493 tree rhs1 = gimple_assign_rhs1 (stmt);
3494 tree rhs2 = gimple_assign_rhs2 (stmt);
3495 tree lhs = gimple_assign_lhs (stmt);
3496 operand_entry_t oe;
3498 /* The final recursion case for this function is that you have
3499 exactly two operations left.
3500 If we had one exactly one op in the entire list to start with, we
3501 would have never called this function, and the tail recursion
3502 rewrites them one at a time. */
3503 if (opindex + 2 == ops.length ())
3505 operand_entry_t oe1, oe2;
3507 oe1 = ops[opindex];
3508 oe2 = ops[opindex + 1];
3510 if (rhs1 != oe1->op || rhs2 != oe2->op)
3512 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3513 unsigned int uid = gimple_uid (stmt);
3515 if (dump_file && (dump_flags & TDF_DETAILS))
3517 fprintf (dump_file, "Transforming ");
3518 print_gimple_stmt (dump_file, stmt, 0, 0);
3521 if (changed)
3523 gimple insert_point = find_insert_point (stmt, oe1->op, oe2->op);
3524 lhs = make_ssa_name (TREE_TYPE (lhs));
3525 stmt
3526 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3527 oe1->op, oe2->op);
3528 gimple_set_uid (stmt, uid);
3529 gimple_set_visited (stmt, true);
3530 if (insert_point == gsi_stmt (gsi))
3531 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3532 else
3533 insert_stmt_after (stmt, insert_point);
3535 else
3537 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3538 == stmt);
3539 gimple_assign_set_rhs1 (stmt, oe1->op);
3540 gimple_assign_set_rhs2 (stmt, oe2->op);
3541 update_stmt (stmt);
3544 if (rhs1 != oe1->op && rhs1 != oe2->op)
3545 remove_visited_stmt_chain (rhs1);
3547 if (dump_file && (dump_flags & TDF_DETAILS))
3549 fprintf (dump_file, " into ");
3550 print_gimple_stmt (dump_file, stmt, 0, 0);
3553 return lhs;
3556 /* If we hit here, we should have 3 or more ops left. */
3557 gcc_assert (opindex + 2 < ops.length ());
3559 /* Rewrite the next operator. */
3560 oe = ops[opindex];
3562 /* Recurse on the LHS of the binary operator, which is guaranteed to
3563 be the non-leaf side. */
3564 tree new_rhs1
3565 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3566 changed || oe->op != rhs2);
3568 if (oe->op != rhs2 || new_rhs1 != rhs1)
3570 if (dump_file && (dump_flags & TDF_DETAILS))
3572 fprintf (dump_file, "Transforming ");
3573 print_gimple_stmt (dump_file, stmt, 0, 0);
3576 /* If changed is false, this is either opindex == 0
3577 or all outer rhs2's were equal to corresponding oe->op,
3578 and powi_result is NULL.
3579 That means lhs is equivalent before and after reassociation.
3580 Otherwise ensure the old lhs SSA_NAME is not reused and
3581 create a new stmt as well, so that any debug stmts will be
3582 properly adjusted. */
3583 if (changed)
3585 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3586 unsigned int uid = gimple_uid (stmt);
3587 gimple insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3589 lhs = make_ssa_name (TREE_TYPE (lhs));
3590 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3591 new_rhs1, oe->op);
3592 gimple_set_uid (stmt, uid);
3593 gimple_set_visited (stmt, true);
3594 if (insert_point == gsi_stmt (gsi))
3595 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3596 else
3597 insert_stmt_after (stmt, insert_point);
3599 else
3601 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3602 == stmt);
3603 gimple_assign_set_rhs1 (stmt, new_rhs1);
3604 gimple_assign_set_rhs2 (stmt, oe->op);
3605 update_stmt (stmt);
3608 if (dump_file && (dump_flags & TDF_DETAILS))
3610 fprintf (dump_file, " into ");
3611 print_gimple_stmt (dump_file, stmt, 0, 0);
3614 return lhs;
3617 /* Find out how many cycles we need to compute statements chain.
3618 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3619 maximum number of independent statements we may execute per cycle. */
3621 static int
3622 get_required_cycles (int ops_num, int cpu_width)
3624 int res;
3625 int elog;
3626 unsigned int rest;
3628 /* While we have more than 2 * cpu_width operands
3629 we may reduce number of operands by cpu_width
3630 per cycle. */
3631 res = ops_num / (2 * cpu_width);
3633 /* Remained operands count may be reduced twice per cycle
3634 until we have only one operand. */
3635 rest = (unsigned)(ops_num - res * cpu_width);
3636 elog = exact_log2 (rest);
3637 if (elog >= 0)
3638 res += elog;
3639 else
3640 res += floor_log2 (rest) + 1;
3642 return res;
3645 /* Returns an optimal number of registers to use for computation of
3646 given statements. */
3648 static int
3649 get_reassociation_width (int ops_num, enum tree_code opc,
3650 machine_mode mode)
3652 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3653 int width;
3654 int width_min;
3655 int cycles_best;
3657 if (param_width > 0)
3658 width = param_width;
3659 else
3660 width = targetm.sched.reassociation_width (opc, mode);
3662 if (width == 1)
3663 return width;
3665 /* Get the minimal time required for sequence computation. */
3666 cycles_best = get_required_cycles (ops_num, width);
3668 /* Check if we may use less width and still compute sequence for
3669 the same time. It will allow us to reduce registers usage.
3670 get_required_cycles is monotonically increasing with lower width
3671 so we can perform a binary search for the minimal width that still
3672 results in the optimal cycle count. */
3673 width_min = 1;
3674 while (width > width_min)
3676 int width_mid = (width + width_min) / 2;
3678 if (get_required_cycles (ops_num, width_mid) == cycles_best)
3679 width = width_mid;
3680 else if (width_min < width_mid)
3681 width_min = width_mid;
3682 else
3683 break;
3686 return width;
3689 /* Recursively rewrite our linearized statements so that the operators
3690 match those in OPS[OPINDEX], putting the computation in rank
3691 order and trying to allow operations to be executed in
3692 parallel. */
3694 static void
3695 rewrite_expr_tree_parallel (gassign *stmt, int width,
3696 vec<operand_entry_t> ops)
3698 enum tree_code opcode = gimple_assign_rhs_code (stmt);
3699 int op_num = ops.length ();
3700 int stmt_num = op_num - 1;
3701 gimple *stmts = XALLOCAVEC (gimple, stmt_num);
3702 int op_index = op_num - 1;
3703 int stmt_index = 0;
3704 int ready_stmts_end = 0;
3705 int i = 0;
3706 tree last_rhs1 = gimple_assign_rhs1 (stmt);
3708 /* We start expression rewriting from the top statements.
3709 So, in this loop we create a full list of statements
3710 we will work with. */
3711 stmts[stmt_num - 1] = stmt;
3712 for (i = stmt_num - 2; i >= 0; i--)
3713 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
3715 for (i = 0; i < stmt_num; i++)
3717 tree op1, op2;
3719 /* Determine whether we should use results of
3720 already handled statements or not. */
3721 if (ready_stmts_end == 0
3722 && (i - stmt_index >= width || op_index < 1))
3723 ready_stmts_end = i;
3725 /* Now we choose operands for the next statement. Non zero
3726 value in ready_stmts_end means here that we should use
3727 the result of already generated statements as new operand. */
3728 if (ready_stmts_end > 0)
3730 op1 = gimple_assign_lhs (stmts[stmt_index++]);
3731 if (ready_stmts_end > stmt_index)
3732 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3733 else if (op_index >= 0)
3734 op2 = ops[op_index--]->op;
3735 else
3737 gcc_assert (stmt_index < i);
3738 op2 = gimple_assign_lhs (stmts[stmt_index++]);
3741 if (stmt_index >= ready_stmts_end)
3742 ready_stmts_end = 0;
3744 else
3746 if (op_index > 1)
3747 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
3748 op2 = ops[op_index--]->op;
3749 op1 = ops[op_index--]->op;
3752 /* If we emit the last statement then we should put
3753 operands into the last statement. It will also
3754 break the loop. */
3755 if (op_index < 0 && stmt_index == i)
3756 i = stmt_num - 1;
3758 if (dump_file && (dump_flags & TDF_DETAILS))
3760 fprintf (dump_file, "Transforming ");
3761 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3764 /* We keep original statement only for the last one. All
3765 others are recreated. */
3766 if (i == stmt_num - 1)
3768 gimple_assign_set_rhs1 (stmts[i], op1);
3769 gimple_assign_set_rhs2 (stmts[i], op2);
3770 update_stmt (stmts[i]);
3772 else
3773 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
3775 if (dump_file && (dump_flags & TDF_DETAILS))
3777 fprintf (dump_file, " into ");
3778 print_gimple_stmt (dump_file, stmts[i], 0, 0);
3782 remove_visited_stmt_chain (last_rhs1);
3785 /* Transform STMT, which is really (A +B) + (C + D) into the left
3786 linear form, ((A+B)+C)+D.
3787 Recurse on D if necessary. */
3789 static void
3790 linearize_expr (gimple stmt)
3792 gimple_stmt_iterator gsi;
3793 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3794 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3795 gimple oldbinrhs = binrhs;
3796 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
3797 gimple newbinrhs = NULL;
3798 struct loop *loop = loop_containing_stmt (stmt);
3799 tree lhs = gimple_assign_lhs (stmt);
3801 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
3802 && is_reassociable_op (binrhs, rhscode, loop));
3804 gsi = gsi_for_stmt (stmt);
3806 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
3807 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
3808 gimple_assign_rhs_code (binrhs),
3809 gimple_assign_lhs (binlhs),
3810 gimple_assign_rhs2 (binrhs));
3811 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
3812 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
3813 gimple_set_uid (binrhs, gimple_uid (stmt));
3815 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
3816 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3818 if (dump_file && (dump_flags & TDF_DETAILS))
3820 fprintf (dump_file, "Linearized: ");
3821 print_gimple_stmt (dump_file, stmt, 0, 0);
3824 reassociate_stats.linearized++;
3825 update_stmt (stmt);
3827 gsi = gsi_for_stmt (oldbinrhs);
3828 reassoc_remove_stmt (&gsi);
3829 release_defs (oldbinrhs);
3831 gimple_set_visited (stmt, true);
3832 gimple_set_visited (binlhs, true);
3833 gimple_set_visited (binrhs, true);
3835 /* Tail recurse on the new rhs if it still needs reassociation. */
3836 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
3837 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
3838 want to change the algorithm while converting to tuples. */
3839 linearize_expr (stmt);
3842 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
3843 it. Otherwise, return NULL. */
3845 static gimple
3846 get_single_immediate_use (tree lhs)
3848 use_operand_p immuse;
3849 gimple immusestmt;
3851 if (TREE_CODE (lhs) == SSA_NAME
3852 && single_imm_use (lhs, &immuse, &immusestmt)
3853 && is_gimple_assign (immusestmt))
3854 return immusestmt;
3856 return NULL;
3859 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
3860 representing the negated value. Insertions of any necessary
3861 instructions go before GSI.
3862 This function is recursive in that, if you hand it "a_5" as the
3863 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
3864 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
3866 static tree
3867 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
3869 gimple negatedefstmt = NULL;
3870 tree resultofnegate;
3871 gimple_stmt_iterator gsi;
3872 unsigned int uid;
3874 /* If we are trying to negate a name, defined by an add, negate the
3875 add operands instead. */
3876 if (TREE_CODE (tonegate) == SSA_NAME)
3877 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
3878 if (TREE_CODE (tonegate) == SSA_NAME
3879 && is_gimple_assign (negatedefstmt)
3880 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
3881 && has_single_use (gimple_assign_lhs (negatedefstmt))
3882 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
3884 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
3885 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
3886 tree lhs = gimple_assign_lhs (negatedefstmt);
3887 gimple g;
3889 gsi = gsi_for_stmt (negatedefstmt);
3890 rhs1 = negate_value (rhs1, &gsi);
3892 gsi = gsi_for_stmt (negatedefstmt);
3893 rhs2 = negate_value (rhs2, &gsi);
3895 gsi = gsi_for_stmt (negatedefstmt);
3896 lhs = make_ssa_name (TREE_TYPE (lhs));
3897 gimple_set_visited (negatedefstmt, true);
3898 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
3899 gimple_set_uid (g, gimple_uid (negatedefstmt));
3900 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3901 return lhs;
3904 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
3905 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
3906 NULL_TREE, true, GSI_SAME_STMT);
3907 gsi = *gsip;
3908 uid = gimple_uid (gsi_stmt (gsi));
3909 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
3911 gimple stmt = gsi_stmt (gsi);
3912 if (gimple_uid (stmt) != 0)
3913 break;
3914 gimple_set_uid (stmt, uid);
3916 return resultofnegate;
3919 /* Return true if we should break up the subtract in STMT into an add
3920 with negate. This is true when we the subtract operands are really
3921 adds, or the subtract itself is used in an add expression. In
3922 either case, breaking up the subtract into an add with negate
3923 exposes the adds to reassociation. */
3925 static bool
3926 should_break_up_subtract (gimple stmt)
3928 tree lhs = gimple_assign_lhs (stmt);
3929 tree binlhs = gimple_assign_rhs1 (stmt);
3930 tree binrhs = gimple_assign_rhs2 (stmt);
3931 gimple immusestmt;
3932 struct loop *loop = loop_containing_stmt (stmt);
3934 if (TREE_CODE (binlhs) == SSA_NAME
3935 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
3936 return true;
3938 if (TREE_CODE (binrhs) == SSA_NAME
3939 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
3940 return true;
3942 if (TREE_CODE (lhs) == SSA_NAME
3943 && (immusestmt = get_single_immediate_use (lhs))
3944 && is_gimple_assign (immusestmt)
3945 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
3946 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
3947 return true;
3948 return false;
3951 /* Transform STMT from A - B into A + -B. */
3953 static void
3954 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
3956 tree rhs1 = gimple_assign_rhs1 (stmt);
3957 tree rhs2 = gimple_assign_rhs2 (stmt);
3959 if (dump_file && (dump_flags & TDF_DETAILS))
3961 fprintf (dump_file, "Breaking up subtract ");
3962 print_gimple_stmt (dump_file, stmt, 0, 0);
3965 rhs2 = negate_value (rhs2, gsip);
3966 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
3967 update_stmt (stmt);
3970 /* Determine whether STMT is a builtin call that raises an SSA name
3971 to an integer power and has only one use. If so, and this is early
3972 reassociation and unsafe math optimizations are permitted, place
3973 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
3974 If any of these conditions does not hold, return FALSE. */
3976 static bool
3977 acceptable_pow_call (gimple stmt, tree *base, HOST_WIDE_INT *exponent)
3979 tree fndecl, arg1;
3980 REAL_VALUE_TYPE c, cint;
3982 if (!first_pass_instance
3983 || !flag_unsafe_math_optimizations
3984 || !is_gimple_call (stmt)
3985 || !has_single_use (gimple_call_lhs (stmt)))
3986 return false;
3988 fndecl = gimple_call_fndecl (stmt);
3990 if (!fndecl
3991 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL)
3992 return false;
3994 switch (DECL_FUNCTION_CODE (fndecl))
3996 CASE_FLT_FN (BUILT_IN_POW):
3997 if (flag_errno_math)
3998 return false;
4000 *base = gimple_call_arg (stmt, 0);
4001 arg1 = gimple_call_arg (stmt, 1);
4003 if (TREE_CODE (arg1) != REAL_CST)
4004 return false;
4006 c = TREE_REAL_CST (arg1);
4008 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4009 return false;
4011 *exponent = real_to_integer (&c);
4012 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4013 if (!real_identical (&c, &cint))
4014 return false;
4016 break;
4018 CASE_FLT_FN (BUILT_IN_POWI):
4019 *base = gimple_call_arg (stmt, 0);
4020 arg1 = gimple_call_arg (stmt, 1);
4022 if (!tree_fits_shwi_p (arg1))
4023 return false;
4025 *exponent = tree_to_shwi (arg1);
4026 break;
4028 default:
4029 return false;
4032 /* Expanding negative exponents is generally unproductive, so we don't
4033 complicate matters with those. Exponents of zero and one should
4034 have been handled by expression folding. */
4035 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4036 return false;
4038 return true;
4041 /* Recursively linearize a binary expression that is the RHS of STMT.
4042 Place the operands of the expression tree in the vector named OPS. */
4044 static void
4045 linearize_expr_tree (vec<operand_entry_t> *ops, gimple stmt,
4046 bool is_associative, bool set_visited)
4048 tree binlhs = gimple_assign_rhs1 (stmt);
4049 tree binrhs = gimple_assign_rhs2 (stmt);
4050 gimple binlhsdef = NULL, binrhsdef = NULL;
4051 bool binlhsisreassoc = false;
4052 bool binrhsisreassoc = false;
4053 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4054 struct loop *loop = loop_containing_stmt (stmt);
4055 tree base = NULL_TREE;
4056 HOST_WIDE_INT exponent = 0;
4058 if (set_visited)
4059 gimple_set_visited (stmt, true);
4061 if (TREE_CODE (binlhs) == SSA_NAME)
4063 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4064 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4065 && !stmt_could_throw_p (binlhsdef));
4068 if (TREE_CODE (binrhs) == SSA_NAME)
4070 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4071 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4072 && !stmt_could_throw_p (binrhsdef));
4075 /* If the LHS is not reassociable, but the RHS is, we need to swap
4076 them. If neither is reassociable, there is nothing we can do, so
4077 just put them in the ops vector. If the LHS is reassociable,
4078 linearize it. If both are reassociable, then linearize the RHS
4079 and the LHS. */
4081 if (!binlhsisreassoc)
4083 tree temp;
4085 /* If this is not a associative operation like division, give up. */
4086 if (!is_associative)
4088 add_to_ops_vec (ops, binrhs);
4089 return;
4092 if (!binrhsisreassoc)
4094 if (rhscode == MULT_EXPR
4095 && TREE_CODE (binrhs) == SSA_NAME
4096 && acceptable_pow_call (binrhsdef, &base, &exponent))
4098 add_repeat_to_ops_vec (ops, base, exponent);
4099 gimple_set_visited (binrhsdef, true);
4101 else
4102 add_to_ops_vec (ops, binrhs);
4104 if (rhscode == MULT_EXPR
4105 && TREE_CODE (binlhs) == SSA_NAME
4106 && acceptable_pow_call (binlhsdef, &base, &exponent))
4108 add_repeat_to_ops_vec (ops, base, exponent);
4109 gimple_set_visited (binlhsdef, true);
4111 else
4112 add_to_ops_vec (ops, binlhs);
4114 return;
4117 if (dump_file && (dump_flags & TDF_DETAILS))
4119 fprintf (dump_file, "swapping operands of ");
4120 print_gimple_stmt (dump_file, stmt, 0, 0);
4123 swap_ssa_operands (stmt,
4124 gimple_assign_rhs1_ptr (stmt),
4125 gimple_assign_rhs2_ptr (stmt));
4126 update_stmt (stmt);
4128 if (dump_file && (dump_flags & TDF_DETAILS))
4130 fprintf (dump_file, " is now ");
4131 print_gimple_stmt (dump_file, stmt, 0, 0);
4134 /* We want to make it so the lhs is always the reassociative op,
4135 so swap. */
4136 temp = binlhs;
4137 binlhs = binrhs;
4138 binrhs = temp;
4140 else if (binrhsisreassoc)
4142 linearize_expr (stmt);
4143 binlhs = gimple_assign_rhs1 (stmt);
4144 binrhs = gimple_assign_rhs2 (stmt);
4147 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4148 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4149 rhscode, loop));
4150 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4151 is_associative, set_visited);
4153 if (rhscode == MULT_EXPR
4154 && TREE_CODE (binrhs) == SSA_NAME
4155 && acceptable_pow_call (SSA_NAME_DEF_STMT (binrhs), &base, &exponent))
4157 add_repeat_to_ops_vec (ops, base, exponent);
4158 gimple_set_visited (SSA_NAME_DEF_STMT (binrhs), true);
4160 else
4161 add_to_ops_vec (ops, binrhs);
4164 /* Repropagate the negates back into subtracts, since no other pass
4165 currently does it. */
4167 static void
4168 repropagate_negates (void)
4170 unsigned int i = 0;
4171 tree negate;
4173 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4175 gimple user = get_single_immediate_use (negate);
4177 if (!user || !is_gimple_assign (user))
4178 continue;
4180 /* The negate operand can be either operand of a PLUS_EXPR
4181 (it can be the LHS if the RHS is a constant for example).
4183 Force the negate operand to the RHS of the PLUS_EXPR, then
4184 transform the PLUS_EXPR into a MINUS_EXPR. */
4185 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4187 /* If the negated operand appears on the LHS of the
4188 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4189 to force the negated operand to the RHS of the PLUS_EXPR. */
4190 if (gimple_assign_rhs1 (user) == negate)
4192 swap_ssa_operands (user,
4193 gimple_assign_rhs1_ptr (user),
4194 gimple_assign_rhs2_ptr (user));
4197 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4198 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4199 if (gimple_assign_rhs2 (user) == negate)
4201 tree rhs1 = gimple_assign_rhs1 (user);
4202 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
4203 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4204 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4205 update_stmt (user);
4208 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4210 if (gimple_assign_rhs1 (user) == negate)
4212 /* We have
4213 x = -a
4214 y = x - b
4215 which we transform into
4216 x = a + b
4217 y = -x .
4218 This pushes down the negate which we possibly can merge
4219 into some other operation, hence insert it into the
4220 plus_negates vector. */
4221 gimple feed = SSA_NAME_DEF_STMT (negate);
4222 tree a = gimple_assign_rhs1 (feed);
4223 tree b = gimple_assign_rhs2 (user);
4224 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4225 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4226 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4227 gimple g = gimple_build_assign (x, PLUS_EXPR, a, b);
4228 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4229 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4230 user = gsi_stmt (gsi2);
4231 update_stmt (user);
4232 reassoc_remove_stmt (&gsi);
4233 release_defs (feed);
4234 plus_negates.safe_push (gimple_assign_lhs (user));
4236 else
4238 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4239 rid of one operation. */
4240 gimple feed = SSA_NAME_DEF_STMT (negate);
4241 tree a = gimple_assign_rhs1 (feed);
4242 tree rhs1 = gimple_assign_rhs1 (user);
4243 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4244 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4245 update_stmt (gsi_stmt (gsi));
4251 /* Returns true if OP is of a type for which we can do reassociation.
4252 That is for integral or non-saturating fixed-point types, and for
4253 floating point type when associative-math is enabled. */
4255 static bool
4256 can_reassociate_p (tree op)
4258 tree type = TREE_TYPE (op);
4259 if ((INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4260 || NON_SAT_FIXED_POINT_TYPE_P (type)
4261 || (flag_associative_math && FLOAT_TYPE_P (type)))
4262 return true;
4263 return false;
4266 /* Break up subtract operations in block BB.
4268 We do this top down because we don't know whether the subtract is
4269 part of a possible chain of reassociation except at the top.
4271 IE given
4272 d = f + g
4273 c = a + e
4274 b = c - d
4275 q = b - r
4276 k = t - q
4278 we want to break up k = t - q, but we won't until we've transformed q
4279 = b - r, which won't be broken up until we transform b = c - d.
4281 En passant, clear the GIMPLE visited flag on every statement
4282 and set UIDs within each basic block. */
4284 static void
4285 break_up_subtract_bb (basic_block bb)
4287 gimple_stmt_iterator gsi;
4288 basic_block son;
4289 unsigned int uid = 1;
4291 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4293 gimple stmt = gsi_stmt (gsi);
4294 gimple_set_visited (stmt, false);
4295 gimple_set_uid (stmt, uid++);
4297 if (!is_gimple_assign (stmt)
4298 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4299 continue;
4301 /* Look for simple gimple subtract operations. */
4302 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4304 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4305 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4306 continue;
4308 /* Check for a subtract used only in an addition. If this
4309 is the case, transform it into add of a negate for better
4310 reassociation. IE transform C = A-B into C = A + -B if C
4311 is only used in an addition. */
4312 if (should_break_up_subtract (stmt))
4313 break_up_subtract (stmt, &gsi);
4315 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4316 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4317 plus_negates.safe_push (gimple_assign_lhs (stmt));
4319 for (son = first_dom_son (CDI_DOMINATORS, bb);
4320 son;
4321 son = next_dom_son (CDI_DOMINATORS, son))
4322 break_up_subtract_bb (son);
4325 /* Used for repeated factor analysis. */
4326 struct repeat_factor_d
4328 /* An SSA name that occurs in a multiply chain. */
4329 tree factor;
4331 /* Cached rank of the factor. */
4332 unsigned rank;
4334 /* Number of occurrences of the factor in the chain. */
4335 HOST_WIDE_INT count;
4337 /* An SSA name representing the product of this factor and
4338 all factors appearing later in the repeated factor vector. */
4339 tree repr;
4342 typedef struct repeat_factor_d repeat_factor, *repeat_factor_t;
4343 typedef const struct repeat_factor_d *const_repeat_factor_t;
4346 static vec<repeat_factor> repeat_factor_vec;
4348 /* Used for sorting the repeat factor vector. Sort primarily by
4349 ascending occurrence count, secondarily by descending rank. */
4351 static int
4352 compare_repeat_factors (const void *x1, const void *x2)
4354 const_repeat_factor_t rf1 = (const_repeat_factor_t) x1;
4355 const_repeat_factor_t rf2 = (const_repeat_factor_t) x2;
4357 if (rf1->count != rf2->count)
4358 return rf1->count - rf2->count;
4360 return rf2->rank - rf1->rank;
4363 /* Look for repeated operands in OPS in the multiply tree rooted at
4364 STMT. Replace them with an optimal sequence of multiplies and powi
4365 builtin calls, and remove the used operands from OPS. Return an
4366 SSA name representing the value of the replacement sequence. */
4368 static tree
4369 attempt_builtin_powi (gimple stmt, vec<operand_entry_t> *ops)
4371 unsigned i, j, vec_len;
4372 int ii;
4373 operand_entry_t oe;
4374 repeat_factor_t rf1, rf2;
4375 repeat_factor rfnew;
4376 tree result = NULL_TREE;
4377 tree target_ssa, iter_result;
4378 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4379 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4380 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4381 gimple mul_stmt, pow_stmt;
4383 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4384 target. */
4385 if (!powi_fndecl)
4386 return NULL_TREE;
4388 /* Allocate the repeated factor vector. */
4389 repeat_factor_vec.create (10);
4391 /* Scan the OPS vector for all SSA names in the product and build
4392 up a vector of occurrence counts for each factor. */
4393 FOR_EACH_VEC_ELT (*ops, i, oe)
4395 if (TREE_CODE (oe->op) == SSA_NAME)
4397 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4399 if (rf1->factor == oe->op)
4401 rf1->count += oe->count;
4402 break;
4406 if (j >= repeat_factor_vec.length ())
4408 rfnew.factor = oe->op;
4409 rfnew.rank = oe->rank;
4410 rfnew.count = oe->count;
4411 rfnew.repr = NULL_TREE;
4412 repeat_factor_vec.safe_push (rfnew);
4417 /* Sort the repeated factor vector by (a) increasing occurrence count,
4418 and (b) decreasing rank. */
4419 repeat_factor_vec.qsort (compare_repeat_factors);
4421 /* It is generally best to combine as many base factors as possible
4422 into a product before applying __builtin_powi to the result.
4423 However, the sort order chosen for the repeated factor vector
4424 allows us to cache partial results for the product of the base
4425 factors for subsequent use. When we already have a cached partial
4426 result from a previous iteration, it is best to make use of it
4427 before looking for another __builtin_pow opportunity.
4429 As an example, consider x * x * y * y * y * z * z * z * z.
4430 We want to first compose the product x * y * z, raise it to the
4431 second power, then multiply this by y * z, and finally multiply
4432 by z. This can be done in 5 multiplies provided we cache y * z
4433 for use in both expressions:
4435 t1 = y * z
4436 t2 = t1 * x
4437 t3 = t2 * t2
4438 t4 = t1 * t3
4439 result = t4 * z
4441 If we instead ignored the cached y * z and first multiplied by
4442 the __builtin_pow opportunity z * z, we would get the inferior:
4444 t1 = y * z
4445 t2 = t1 * x
4446 t3 = t2 * t2
4447 t4 = z * z
4448 t5 = t3 * t4
4449 result = t5 * y */
4451 vec_len = repeat_factor_vec.length ();
4453 /* Repeatedly look for opportunities to create a builtin_powi call. */
4454 while (true)
4456 HOST_WIDE_INT power;
4458 /* First look for the largest cached product of factors from
4459 preceding iterations. If found, create a builtin_powi for
4460 it if the minimum occurrence count for its factors is at
4461 least 2, or just use this cached product as our next
4462 multiplicand if the minimum occurrence count is 1. */
4463 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4465 if (rf1->repr && rf1->count > 0)
4466 break;
4469 if (j < vec_len)
4471 power = rf1->count;
4473 if (power == 1)
4475 iter_result = rf1->repr;
4477 if (dump_file && (dump_flags & TDF_DETAILS))
4479 unsigned elt;
4480 repeat_factor_t rf;
4481 fputs ("Multiplying by cached product ", dump_file);
4482 for (elt = j; elt < vec_len; elt++)
4484 rf = &repeat_factor_vec[elt];
4485 print_generic_expr (dump_file, rf->factor, 0);
4486 if (elt < vec_len - 1)
4487 fputs (" * ", dump_file);
4489 fputs ("\n", dump_file);
4492 else
4494 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4495 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4496 build_int_cst (integer_type_node,
4497 power));
4498 gimple_call_set_lhs (pow_stmt, iter_result);
4499 gimple_set_location (pow_stmt, gimple_location (stmt));
4500 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4502 if (dump_file && (dump_flags & TDF_DETAILS))
4504 unsigned elt;
4505 repeat_factor_t rf;
4506 fputs ("Building __builtin_pow call for cached product (",
4507 dump_file);
4508 for (elt = j; elt < vec_len; elt++)
4510 rf = &repeat_factor_vec[elt];
4511 print_generic_expr (dump_file, rf->factor, 0);
4512 if (elt < vec_len - 1)
4513 fputs (" * ", dump_file);
4515 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n",
4516 power);
4520 else
4522 /* Otherwise, find the first factor in the repeated factor
4523 vector whose occurrence count is at least 2. If no such
4524 factor exists, there are no builtin_powi opportunities
4525 remaining. */
4526 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4528 if (rf1->count >= 2)
4529 break;
4532 if (j >= vec_len)
4533 break;
4535 power = rf1->count;
4537 if (dump_file && (dump_flags & TDF_DETAILS))
4539 unsigned elt;
4540 repeat_factor_t rf;
4541 fputs ("Building __builtin_pow call for (", dump_file);
4542 for (elt = j; elt < vec_len; elt++)
4544 rf = &repeat_factor_vec[elt];
4545 print_generic_expr (dump_file, rf->factor, 0);
4546 if (elt < vec_len - 1)
4547 fputs (" * ", dump_file);
4549 fprintf (dump_file, ")^"HOST_WIDE_INT_PRINT_DEC"\n", power);
4552 reassociate_stats.pows_created++;
4554 /* Visit each element of the vector in reverse order (so that
4555 high-occurrence elements are visited first, and within the
4556 same occurrence count, lower-ranked elements are visited
4557 first). Form a linear product of all elements in this order
4558 whose occurrencce count is at least that of element J.
4559 Record the SSA name representing the product of each element
4560 with all subsequent elements in the vector. */
4561 if (j == vec_len - 1)
4562 rf1->repr = rf1->factor;
4563 else
4565 for (ii = vec_len - 2; ii >= (int)j; ii--)
4567 tree op1, op2;
4569 rf1 = &repeat_factor_vec[ii];
4570 rf2 = &repeat_factor_vec[ii + 1];
4572 /* Init the last factor's representative to be itself. */
4573 if (!rf2->repr)
4574 rf2->repr = rf2->factor;
4576 op1 = rf1->factor;
4577 op2 = rf2->repr;
4579 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4580 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4581 op1, op2);
4582 gimple_set_location (mul_stmt, gimple_location (stmt));
4583 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4584 rf1->repr = target_ssa;
4586 /* Don't reprocess the multiply we just introduced. */
4587 gimple_set_visited (mul_stmt, true);
4591 /* Form a call to __builtin_powi for the maximum product
4592 just formed, raised to the power obtained earlier. */
4593 rf1 = &repeat_factor_vec[j];
4594 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4595 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4596 build_int_cst (integer_type_node,
4597 power));
4598 gimple_call_set_lhs (pow_stmt, iter_result);
4599 gimple_set_location (pow_stmt, gimple_location (stmt));
4600 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4603 /* If we previously formed at least one other builtin_powi call,
4604 form the product of this one and those others. */
4605 if (result)
4607 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4608 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4609 result, iter_result);
4610 gimple_set_location (mul_stmt, gimple_location (stmt));
4611 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4612 gimple_set_visited (mul_stmt, true);
4613 result = new_result;
4615 else
4616 result = iter_result;
4618 /* Decrement the occurrence count of each element in the product
4619 by the count found above, and remove this many copies of each
4620 factor from OPS. */
4621 for (i = j; i < vec_len; i++)
4623 unsigned k = power;
4624 unsigned n;
4626 rf1 = &repeat_factor_vec[i];
4627 rf1->count -= power;
4629 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4631 if (oe->op == rf1->factor)
4633 if (oe->count <= k)
4635 ops->ordered_remove (n);
4636 k -= oe->count;
4638 if (k == 0)
4639 break;
4641 else
4643 oe->count -= k;
4644 break;
4651 /* At this point all elements in the repeated factor vector have a
4652 remaining occurrence count of 0 or 1, and those with a count of 1
4653 don't have cached representatives. Re-sort the ops vector and
4654 clean up. */
4655 ops->qsort (sort_by_operand_rank);
4656 repeat_factor_vec.release ();
4658 /* Return the final product computed herein. Note that there may
4659 still be some elements with single occurrence count left in OPS;
4660 those will be handled by the normal reassociation logic. */
4661 return result;
4664 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
4666 static void
4667 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple stmt, tree new_rhs)
4669 tree rhs1;
4671 if (dump_file && (dump_flags & TDF_DETAILS))
4673 fprintf (dump_file, "Transforming ");
4674 print_gimple_stmt (dump_file, stmt, 0, 0);
4677 rhs1 = gimple_assign_rhs1 (stmt);
4678 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
4679 update_stmt (stmt);
4680 remove_visited_stmt_chain (rhs1);
4682 if (dump_file && (dump_flags & TDF_DETAILS))
4684 fprintf (dump_file, " into ");
4685 print_gimple_stmt (dump_file, stmt, 0, 0);
4689 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
4691 static void
4692 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple stmt,
4693 tree rhs1, tree rhs2)
4695 if (dump_file && (dump_flags & TDF_DETAILS))
4697 fprintf (dump_file, "Transforming ");
4698 print_gimple_stmt (dump_file, stmt, 0, 0);
4701 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
4702 update_stmt (gsi_stmt (*gsi));
4703 remove_visited_stmt_chain (rhs1);
4705 if (dump_file && (dump_flags & TDF_DETAILS))
4707 fprintf (dump_file, " into ");
4708 print_gimple_stmt (dump_file, stmt, 0, 0);
4712 /* Reassociate expressions in basic block BB and its post-dominator as
4713 children. */
4715 static void
4716 reassociate_bb (basic_block bb)
4718 gimple_stmt_iterator gsi;
4719 basic_block son;
4720 gimple stmt = last_stmt (bb);
4722 if (stmt && !gimple_visited_p (stmt))
4723 maybe_optimize_range_tests (stmt);
4725 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
4727 stmt = gsi_stmt (gsi);
4729 if (is_gimple_assign (stmt)
4730 && !stmt_could_throw_p (stmt))
4732 tree lhs, rhs1, rhs2;
4733 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
4735 /* If this is not a gimple binary expression, there is
4736 nothing for us to do with it. */
4737 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
4738 continue;
4740 /* If this was part of an already processed statement,
4741 we don't need to touch it again. */
4742 if (gimple_visited_p (stmt))
4744 /* This statement might have become dead because of previous
4745 reassociations. */
4746 if (has_zero_uses (gimple_get_lhs (stmt)))
4748 reassoc_remove_stmt (&gsi);
4749 release_defs (stmt);
4750 /* We might end up removing the last stmt above which
4751 places the iterator to the end of the sequence.
4752 Reset it to the last stmt in this case which might
4753 be the end of the sequence as well if we removed
4754 the last statement of the sequence. In which case
4755 we need to bail out. */
4756 if (gsi_end_p (gsi))
4758 gsi = gsi_last_bb (bb);
4759 if (gsi_end_p (gsi))
4760 break;
4763 continue;
4766 lhs = gimple_assign_lhs (stmt);
4767 rhs1 = gimple_assign_rhs1 (stmt);
4768 rhs2 = gimple_assign_rhs2 (stmt);
4770 /* For non-bit or min/max operations we can't associate
4771 all types. Verify that here. */
4772 if (rhs_code != BIT_IOR_EXPR
4773 && rhs_code != BIT_AND_EXPR
4774 && rhs_code != BIT_XOR_EXPR
4775 && rhs_code != MIN_EXPR
4776 && rhs_code != MAX_EXPR
4777 && (!can_reassociate_p (lhs)
4778 || !can_reassociate_p (rhs1)
4779 || !can_reassociate_p (rhs2)))
4780 continue;
4782 if (associative_tree_code (rhs_code))
4784 auto_vec<operand_entry_t> ops;
4785 tree powi_result = NULL_TREE;
4787 /* There may be no immediate uses left by the time we
4788 get here because we may have eliminated them all. */
4789 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
4790 continue;
4792 gimple_set_visited (stmt, true);
4793 linearize_expr_tree (&ops, stmt, true, true);
4794 ops.qsort (sort_by_operand_rank);
4795 optimize_ops_list (rhs_code, &ops);
4796 if (undistribute_ops_list (rhs_code, &ops,
4797 loop_containing_stmt (stmt)))
4799 ops.qsort (sort_by_operand_rank);
4800 optimize_ops_list (rhs_code, &ops);
4803 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
4804 optimize_range_tests (rhs_code, &ops);
4806 if (first_pass_instance
4807 && rhs_code == MULT_EXPR
4808 && flag_unsafe_math_optimizations)
4809 powi_result = attempt_builtin_powi (stmt, &ops);
4811 /* If the operand vector is now empty, all operands were
4812 consumed by the __builtin_powi optimization. */
4813 if (ops.length () == 0)
4814 transform_stmt_to_copy (&gsi, stmt, powi_result);
4815 else if (ops.length () == 1)
4817 tree last_op = ops.last ()->op;
4819 if (powi_result)
4820 transform_stmt_to_multiply (&gsi, stmt, last_op,
4821 powi_result);
4822 else
4823 transform_stmt_to_copy (&gsi, stmt, last_op);
4825 else
4827 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
4828 int ops_num = ops.length ();
4829 int width = get_reassociation_width (ops_num, rhs_code, mode);
4830 tree new_lhs = lhs;
4832 if (dump_file && (dump_flags & TDF_DETAILS))
4833 fprintf (dump_file,
4834 "Width = %d was chosen for reassociation\n", width);
4836 if (width > 1
4837 && ops.length () > 3)
4838 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
4839 width, ops);
4840 else
4842 /* When there are three operands left, we want
4843 to make sure the ones that get the double
4844 binary op are chosen wisely. */
4845 int len = ops.length ();
4846 if (len >= 3)
4847 swap_ops_for_binary_stmt (ops, len - 3, stmt);
4849 new_lhs = rewrite_expr_tree (stmt, 0, ops,
4850 powi_result != NULL);
4853 /* If we combined some repeated factors into a
4854 __builtin_powi call, multiply that result by the
4855 reassociated operands. */
4856 if (powi_result)
4858 gimple mul_stmt, lhs_stmt = SSA_NAME_DEF_STMT (lhs);
4859 tree type = TREE_TYPE (lhs);
4860 tree target_ssa = make_temp_ssa_name (type, NULL,
4861 "reassocpow");
4862 gimple_set_lhs (lhs_stmt, target_ssa);
4863 update_stmt (lhs_stmt);
4864 if (lhs != new_lhs)
4865 target_ssa = new_lhs;
4866 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
4867 powi_result, target_ssa);
4868 gimple_set_location (mul_stmt, gimple_location (stmt));
4869 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
4875 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
4876 son;
4877 son = next_dom_son (CDI_POST_DOMINATORS, son))
4878 reassociate_bb (son);
4881 /* Add jumps around shifts for range tests turned into bit tests.
4882 For each SSA_NAME VAR we have code like:
4883 VAR = ...; // final stmt of range comparison
4884 // bit test here...;
4885 OTHERVAR = ...; // final stmt of the bit test sequence
4886 RES = VAR | OTHERVAR;
4887 Turn the above into:
4888 VAR = ...;
4889 if (VAR != 0)
4890 goto <l3>;
4891 else
4892 goto <l2>;
4893 <l2>:
4894 // bit test here...;
4895 OTHERVAR = ...;
4896 <l3>:
4897 # RES = PHI<1(l1), OTHERVAR(l2)>; */
4899 static void
4900 branch_fixup (void)
4902 tree var;
4903 unsigned int i;
4905 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
4907 gimple def_stmt = SSA_NAME_DEF_STMT (var);
4908 gimple use_stmt;
4909 use_operand_p use;
4910 bool ok = single_imm_use (var, &use, &use_stmt);
4911 gcc_assert (ok
4912 && is_gimple_assign (use_stmt)
4913 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
4914 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
4916 basic_block cond_bb = gimple_bb (def_stmt);
4917 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
4918 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
4920 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
4921 gimple g = gimple_build_cond (NE_EXPR, var,
4922 build_zero_cst (TREE_TYPE (var)),
4923 NULL_TREE, NULL_TREE);
4924 location_t loc = gimple_location (use_stmt);
4925 gimple_set_location (g, loc);
4926 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
4928 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
4929 etrue->probability = REG_BR_PROB_BASE / 2;
4930 etrue->count = cond_bb->count / 2;
4931 edge efalse = find_edge (cond_bb, then_bb);
4932 efalse->flags = EDGE_FALSE_VALUE;
4933 efalse->probability -= etrue->probability;
4934 efalse->count -= etrue->count;
4935 then_bb->count -= etrue->count;
4937 tree othervar = NULL_TREE;
4938 if (gimple_assign_rhs1 (use_stmt) == var)
4939 othervar = gimple_assign_rhs2 (use_stmt);
4940 else if (gimple_assign_rhs2 (use_stmt) == var)
4941 othervar = gimple_assign_rhs1 (use_stmt);
4942 else
4943 gcc_unreachable ();
4944 tree lhs = gimple_assign_lhs (use_stmt);
4945 gphi *phi = create_phi_node (lhs, merge_bb);
4946 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
4947 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
4948 gsi = gsi_for_stmt (use_stmt);
4949 gsi_remove (&gsi, true);
4951 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
4952 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
4954 reassoc_branch_fixups.release ();
4957 void dump_ops_vector (FILE *file, vec<operand_entry_t> ops);
4958 void debug_ops_vector (vec<operand_entry_t> ops);
4960 /* Dump the operand entry vector OPS to FILE. */
4962 void
4963 dump_ops_vector (FILE *file, vec<operand_entry_t> ops)
4965 operand_entry_t oe;
4966 unsigned int i;
4968 FOR_EACH_VEC_ELT (ops, i, oe)
4970 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
4971 print_generic_expr (file, oe->op, 0);
4975 /* Dump the operand entry vector OPS to STDERR. */
4977 DEBUG_FUNCTION void
4978 debug_ops_vector (vec<operand_entry_t> ops)
4980 dump_ops_vector (stderr, ops);
4983 static void
4984 do_reassoc (void)
4986 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
4987 reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
4990 /* Initialize the reassociation pass. */
4992 static void
4993 init_reassoc (void)
4995 int i;
4996 long rank = 2;
4997 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
4999 /* Find the loops, so that we can prevent moving calculations in
5000 them. */
5001 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5003 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5005 operand_entry_pool = create_alloc_pool ("operand entry pool",
5006 sizeof (struct operand_entry), 30);
5007 next_operand_entry_id = 0;
5009 /* Reverse RPO (Reverse Post Order) will give us something where
5010 deeper loops come later. */
5011 pre_and_rev_post_order_compute (NULL, bbs, false);
5012 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5013 operand_rank = new hash_map<tree, long>;
5015 /* Give each default definition a distinct rank. This includes
5016 parameters and the static chain. Walk backwards over all
5017 SSA names so that we get proper rank ordering according
5018 to tree_swap_operands_p. */
5019 for (i = num_ssa_names - 1; i > 0; --i)
5021 tree name = ssa_name (i);
5022 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5023 insert_operand_rank (name, ++rank);
5026 /* Set up rank for each BB */
5027 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5028 bb_rank[bbs[i]] = ++rank << 16;
5030 free (bbs);
5031 calculate_dominance_info (CDI_POST_DOMINATORS);
5032 plus_negates = vNULL;
5035 /* Cleanup after the reassociation pass, and print stats if
5036 requested. */
5038 static void
5039 fini_reassoc (void)
5041 statistics_counter_event (cfun, "Linearized",
5042 reassociate_stats.linearized);
5043 statistics_counter_event (cfun, "Constants eliminated",
5044 reassociate_stats.constants_eliminated);
5045 statistics_counter_event (cfun, "Ops eliminated",
5046 reassociate_stats.ops_eliminated);
5047 statistics_counter_event (cfun, "Statements rewritten",
5048 reassociate_stats.rewritten);
5049 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5050 reassociate_stats.pows_encountered);
5051 statistics_counter_event (cfun, "Built-in powi calls created",
5052 reassociate_stats.pows_created);
5054 delete operand_rank;
5055 free_alloc_pool (operand_entry_pool);
5056 free (bb_rank);
5057 plus_negates.release ();
5058 free_dominance_info (CDI_POST_DOMINATORS);
5059 loop_optimizer_finalize ();
5062 /* Gate and execute functions for Reassociation. */
5064 static unsigned int
5065 execute_reassoc (void)
5067 init_reassoc ();
5069 do_reassoc ();
5070 repropagate_negates ();
5071 branch_fixup ();
5073 fini_reassoc ();
5074 return 0;
5077 namespace {
5079 const pass_data pass_data_reassoc =
5081 GIMPLE_PASS, /* type */
5082 "reassoc", /* name */
5083 OPTGROUP_NONE, /* optinfo_flags */
5084 TV_TREE_REASSOC, /* tv_id */
5085 ( PROP_cfg | PROP_ssa ), /* properties_required */
5086 0, /* properties_provided */
5087 0, /* properties_destroyed */
5088 0, /* todo_flags_start */
5089 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5092 class pass_reassoc : public gimple_opt_pass
5094 public:
5095 pass_reassoc (gcc::context *ctxt)
5096 : gimple_opt_pass (pass_data_reassoc, ctxt)
5099 /* opt_pass methods: */
5100 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5101 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5102 virtual unsigned int execute (function *) { return execute_reassoc (); }
5104 }; // class pass_reassoc
5106 } // anon namespace
5108 gimple_opt_pass *
5109 make_pass_reassoc (gcc::context *ctxt)
5111 return new pass_reassoc (ctxt);