Add option for whether ceil etc. can raise "inexact", adjust x86 conditions.
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob1973077db7cb6682e80908029418c4faccb9ce2a
1 /* Reassociation for trees.
2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "tm_p.h"
33 #include "ssa.h"
34 #include "optabs-tree.h"
35 #include "gimple-pretty-print.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "cfganal.h"
40 #include "gimple-fold.h"
41 #include "tree-eh.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
44 #include "tree-cfg.h"
45 #include "tree-ssa-loop.h"
46 #include "flags.h"
47 #include "tree-ssa.h"
48 #include "langhooks.h"
49 #include "cfgloop.h"
50 #include "params.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
69 3. Optimization of the operand lists, eliminating things like a +
70 -a, a & a, etc.
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
104 those on it.
106 You want to first merge all leaves of the same rank, as much as
107 possible.
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
122 mergetmp2 = d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
131 So we have
133 build binary op
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
159 mergetmp = op1 + op2
160 newstmt = mergetmp + op3
162 instead of
163 mergetmp = op2 + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
168 can do.
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p;
180 /* Statistics */
181 static struct
183 int linearized;
184 int constants_eliminated;
185 int ops_eliminated;
186 int rewritten;
187 int pows_encountered;
188 int pows_created;
189 } reassociate_stats;
191 /* Operator, rank pair. */
192 struct operand_entry
194 unsigned int rank;
195 int id;
196 tree op;
197 unsigned int count;
198 gimple *stmt_to_insert;
201 static object_allocator<operand_entry> operand_entry_pool
202 ("operand entry pool");
204 /* This is used to assign a unique ID to each struct operand_entry
205 so that qsort results are identical on different hosts. */
206 static int next_operand_entry_id;
208 /* Starting rank number for a given basic block, so that we can rank
209 operations using unmovable instructions in that BB based on the bb
210 depth. */
211 static long *bb_rank;
213 /* Operand->rank hashtable. */
214 static hash_map<tree, long> *operand_rank;
216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
217 all basic blocks the CFG should be adjusted - basic blocks
218 split right after that SSA_NAME's definition statement and before
219 the only use, which must be a bit ior. */
220 static vec<tree> reassoc_branch_fixups;
222 /* Forward decls. */
223 static long get_rank (tree);
224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
227 possibly added by gsi_remove. */
229 bool
230 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232 gimple *stmt = gsi_stmt (*gsi);
234 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
235 return gsi_remove (gsi, true);
237 gimple_stmt_iterator prev = *gsi;
238 gsi_prev (&prev);
239 unsigned uid = gimple_uid (stmt);
240 basic_block bb = gimple_bb (stmt);
241 bool ret = gsi_remove (gsi, true);
242 if (!gsi_end_p (prev))
243 gsi_next (&prev);
244 else
245 prev = gsi_start_bb (bb);
246 gimple *end_stmt = gsi_stmt (*gsi);
247 while ((stmt = gsi_stmt (prev)) != end_stmt)
249 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
250 gimple_set_uid (stmt, uid);
251 gsi_next (&prev);
253 return ret;
256 /* Bias amount for loop-carried phis. We want this to be larger than
257 the depth of any reassociation tree we can see, but not larger than
258 the rank difference between two blocks. */
259 #define PHI_LOOP_BIAS (1 << 15)
261 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
262 an innermost loop, and the phi has only a single use which is inside
263 the loop, then the rank is the block rank of the loop latch plus an
264 extra bias for the loop-carried dependence. This causes expressions
265 calculated into an accumulator variable to be independent for each
266 iteration of the loop. If STMT is some other phi, the rank is the
267 block rank of its containing block. */
268 static long
269 phi_rank (gimple *stmt)
271 basic_block bb = gimple_bb (stmt);
272 struct loop *father = bb->loop_father;
273 tree res;
274 unsigned i;
275 use_operand_p use;
276 gimple *use_stmt;
278 /* We only care about real loops (those with a latch). */
279 if (!father->latch)
280 return bb_rank[bb->index];
282 /* Interesting phis must be in headers of innermost loops. */
283 if (bb != father->header
284 || father->inner)
285 return bb_rank[bb->index];
287 /* Ignore virtual SSA_NAMEs. */
288 res = gimple_phi_result (stmt);
289 if (virtual_operand_p (res))
290 return bb_rank[bb->index];
292 /* The phi definition must have a single use, and that use must be
293 within the loop. Otherwise this isn't an accumulator pattern. */
294 if (!single_imm_use (res, &use, &use_stmt)
295 || gimple_bb (use_stmt)->loop_father != father)
296 return bb_rank[bb->index];
298 /* Look for phi arguments from within the loop. If found, bias this phi. */
299 for (i = 0; i < gimple_phi_num_args (stmt); i++)
301 tree arg = gimple_phi_arg_def (stmt, i);
302 if (TREE_CODE (arg) == SSA_NAME
303 && !SSA_NAME_IS_DEFAULT_DEF (arg))
305 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
306 if (gimple_bb (def_stmt)->loop_father == father)
307 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
311 /* Must be an uninteresting phi. */
312 return bb_rank[bb->index];
315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
316 loop-carried dependence of an innermost loop, return TRUE; else
317 return FALSE. */
318 static bool
319 loop_carried_phi (tree exp)
321 gimple *phi_stmt;
322 long block_rank;
324 if (TREE_CODE (exp) != SSA_NAME
325 || SSA_NAME_IS_DEFAULT_DEF (exp))
326 return false;
328 phi_stmt = SSA_NAME_DEF_STMT (exp);
330 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
331 return false;
333 /* Non-loop-carried phis have block rank. Loop-carried phis have
334 an additional bias added in. If this phi doesn't have block rank,
335 it's biased and should not be propagated. */
336 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338 if (phi_rank (phi_stmt) != block_rank)
339 return true;
341 return false;
344 /* Return the maximum of RANK and the rank that should be propagated
345 from expression OP. For most operands, this is just the rank of OP.
346 For loop-carried phis, the value is zero to avoid undoing the bias
347 in favor of the phi. */
348 static long
349 propagate_rank (long rank, tree op)
351 long op_rank;
353 if (loop_carried_phi (op))
354 return rank;
356 op_rank = get_rank (op);
358 return MAX (rank, op_rank);
361 /* Look up the operand rank structure for expression E. */
363 static inline long
364 find_operand_rank (tree e)
366 long *slot = operand_rank->get (e);
367 return slot ? *slot : -1;
370 /* Insert {E,RANK} into the operand rank hashtable. */
372 static inline void
373 insert_operand_rank (tree e, long rank)
375 gcc_assert (rank > 0);
376 gcc_assert (!operand_rank->put (e, rank));
379 /* Given an expression E, return the rank of the expression. */
381 static long
382 get_rank (tree e)
384 /* SSA_NAME's have the rank of the expression they are the result
386 For globals and uninitialized values, the rank is 0.
387 For function arguments, use the pre-setup rank.
388 For PHI nodes, stores, asm statements, etc, we use the rank of
389 the BB.
390 For simple operations, the rank is the maximum rank of any of
391 its operands, or the bb_rank, whichever is less.
392 I make no claims that this is optimal, however, it gives good
393 results. */
395 /* We make an exception to the normal ranking system to break
396 dependences of accumulator variables in loops. Suppose we
397 have a simple one-block loop containing:
399 x_1 = phi(x_0, x_2)
400 b = a + x_1
401 c = b + d
402 x_2 = c + e
404 As shown, each iteration of the calculation into x is fully
405 dependent upon the iteration before it. We would prefer to
406 see this in the form:
408 x_1 = phi(x_0, x_2)
409 b = a + d
410 c = b + e
411 x_2 = c + x_1
413 If the loop is unrolled, the calculations of b and c from
414 different iterations can be interleaved.
416 To obtain this result during reassociation, we bias the rank
417 of the phi definition x_1 upward, when it is recognized as an
418 accumulator pattern. The artificial rank causes it to be
419 added last, providing the desired independence. */
421 if (TREE_CODE (e) == SSA_NAME)
423 ssa_op_iter iter;
424 gimple *stmt;
425 long rank;
426 tree op;
428 if (SSA_NAME_IS_DEFAULT_DEF (e))
429 return find_operand_rank (e);
431 stmt = SSA_NAME_DEF_STMT (e);
432 if (gimple_code (stmt) == GIMPLE_PHI)
433 return phi_rank (stmt);
435 if (!is_gimple_assign (stmt))
436 return bb_rank[gimple_bb (stmt)->index];
438 /* If we already have a rank for this expression, use that. */
439 rank = find_operand_rank (e);
440 if (rank != -1)
441 return rank;
443 /* Otherwise, find the maximum rank for the operands. As an
444 exception, remove the bias from loop-carried phis when propagating
445 the rank so that dependent operations are not also biased. */
446 /* Simply walk over all SSA uses - this takes advatage of the
447 fact that non-SSA operands are is_gimple_min_invariant and
448 thus have rank 0. */
449 rank = 0;
450 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
451 rank = propagate_rank (rank, op);
453 if (dump_file && (dump_flags & TDF_DETAILS))
455 fprintf (dump_file, "Rank for ");
456 print_generic_expr (dump_file, e, 0);
457 fprintf (dump_file, " is %ld\n", (rank + 1));
460 /* Note the rank in the hashtable so we don't recompute it. */
461 insert_operand_rank (e, (rank + 1));
462 return (rank + 1);
465 /* Constants, globals, etc., are rank 0 */
466 return 0;
470 /* We want integer ones to end up last no matter what, since they are
471 the ones we can do the most with. */
472 #define INTEGER_CONST_TYPE 1 << 3
473 #define FLOAT_CONST_TYPE 1 << 2
474 #define OTHER_CONST_TYPE 1 << 1
476 /* Classify an invariant tree into integer, float, or other, so that
477 we can sort them to be near other constants of the same type. */
478 static inline int
479 constant_type (tree t)
481 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
482 return INTEGER_CONST_TYPE;
483 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
484 return FLOAT_CONST_TYPE;
485 else
486 return OTHER_CONST_TYPE;
489 /* qsort comparison function to sort operand entries PA and PB by rank
490 so that the sorted array is ordered by rank in decreasing order. */
491 static int
492 sort_by_operand_rank (const void *pa, const void *pb)
494 const operand_entry *oea = *(const operand_entry *const *)pa;
495 const operand_entry *oeb = *(const operand_entry *const *)pb;
497 /* It's nicer for optimize_expression if constants that are likely
498 to fold when added/multiplied//whatever are put next to each
499 other. Since all constants have rank 0, order them by type. */
500 if (oeb->rank == 0 && oea->rank == 0)
502 if (constant_type (oeb->op) != constant_type (oea->op))
503 return constant_type (oeb->op) - constant_type (oea->op);
504 else
505 /* To make sorting result stable, we use unique IDs to determine
506 order. */
507 return oeb->id - oea->id;
510 /* Lastly, make sure the versions that are the same go next to each
511 other. */
512 if ((oeb->rank - oea->rank == 0)
513 && TREE_CODE (oea->op) == SSA_NAME
514 && TREE_CODE (oeb->op) == SSA_NAME)
516 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
517 versions of removed SSA_NAMEs, so if possible, prefer to sort
518 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
519 See PR60418. */
520 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
521 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
522 && !oea->stmt_to_insert
523 && !oeb->stmt_to_insert
524 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
526 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
527 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
528 basic_block bba = gimple_bb (stmta);
529 basic_block bbb = gimple_bb (stmtb);
530 if (bbb != bba)
532 if (bb_rank[bbb->index] != bb_rank[bba->index])
533 return bb_rank[bbb->index] - bb_rank[bba->index];
535 else
537 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
538 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
539 if (da != db)
540 return da ? 1 : -1;
544 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
545 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
546 else
547 return oeb->id - oea->id;
550 if (oeb->rank != oea->rank)
551 return oeb->rank - oea->rank;
552 else
553 return oeb->id - oea->id;
556 /* Add an operand entry to *OPS for the tree operand OP. */
558 static void
559 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
561 operand_entry *oe = operand_entry_pool.allocate ();
563 oe->op = op;
564 oe->rank = get_rank (op);
565 oe->id = next_operand_entry_id++;
566 oe->count = 1;
567 oe->stmt_to_insert = stmt_to_insert;
568 ops->safe_push (oe);
571 /* Add an operand entry to *OPS for the tree operand OP with repeat
572 count REPEAT. */
574 static void
575 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
576 HOST_WIDE_INT repeat)
578 operand_entry *oe = operand_entry_pool.allocate ();
580 oe->op = op;
581 oe->rank = get_rank (op);
582 oe->id = next_operand_entry_id++;
583 oe->count = repeat;
584 oe->stmt_to_insert = NULL;
585 ops->safe_push (oe);
587 reassociate_stats.pows_encountered++;
590 /* Return true if STMT is reassociable operation containing a binary
591 operation with tree code CODE, and is inside LOOP. */
593 static bool
594 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
596 basic_block bb = gimple_bb (stmt);
598 if (gimple_bb (stmt) == NULL)
599 return false;
601 if (!flow_bb_inside_loop_p (loop, bb))
602 return false;
604 if (is_gimple_assign (stmt)
605 && gimple_assign_rhs_code (stmt) == code
606 && has_single_use (gimple_assign_lhs (stmt)))
607 return true;
609 return false;
613 /* Return true if STMT is a nop-conversion. */
615 static bool
616 gimple_nop_conversion_p (gimple *stmt)
618 if (gassign *ass = dyn_cast <gassign *> (stmt))
620 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
621 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
622 TREE_TYPE (gimple_assign_rhs1 (ass))))
623 return true;
625 return false;
628 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
629 operand of the negate operation. Otherwise, return NULL. */
631 static tree
632 get_unary_op (tree name, enum tree_code opcode)
634 gimple *stmt = SSA_NAME_DEF_STMT (name);
636 /* Look through nop conversions (sign changes). */
637 if (gimple_nop_conversion_p (stmt)
638 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
639 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
641 if (!is_gimple_assign (stmt))
642 return NULL_TREE;
644 if (gimple_assign_rhs_code (stmt) == opcode)
645 return gimple_assign_rhs1 (stmt);
646 return NULL_TREE;
649 /* Return true if OP1 and OP2 have the same value if casted to either type. */
651 static bool
652 ops_equal_values_p (tree op1, tree op2)
654 if (op1 == op2)
655 return true;
657 tree orig_op1 = op1;
658 if (TREE_CODE (op1) == SSA_NAME)
660 gimple *stmt = SSA_NAME_DEF_STMT (op1);
661 if (gimple_nop_conversion_p (stmt))
663 op1 = gimple_assign_rhs1 (stmt);
664 if (op1 == op2)
665 return true;
669 if (TREE_CODE (op2) == SSA_NAME)
671 gimple *stmt = SSA_NAME_DEF_STMT (op2);
672 if (gimple_nop_conversion_p (stmt))
674 op2 = gimple_assign_rhs1 (stmt);
675 if (op1 == op2
676 || orig_op1 == op2)
677 return true;
681 return false;
685 /* If CURR and LAST are a pair of ops that OPCODE allows us to
686 eliminate through equivalences, do so, remove them from OPS, and
687 return true. Otherwise, return false. */
689 static bool
690 eliminate_duplicate_pair (enum tree_code opcode,
691 vec<operand_entry *> *ops,
692 bool *all_done,
693 unsigned int i,
694 operand_entry *curr,
695 operand_entry *last)
698 /* If we have two of the same op, and the opcode is & |, min, or max,
699 we can eliminate one of them.
700 If we have two of the same op, and the opcode is ^, we can
701 eliminate both of them. */
703 if (last && last->op == curr->op)
705 switch (opcode)
707 case MAX_EXPR:
708 case MIN_EXPR:
709 case BIT_IOR_EXPR:
710 case BIT_AND_EXPR:
711 if (dump_file && (dump_flags & TDF_DETAILS))
713 fprintf (dump_file, "Equivalence: ");
714 print_generic_expr (dump_file, curr->op, 0);
715 fprintf (dump_file, " [&|minmax] ");
716 print_generic_expr (dump_file, last->op, 0);
717 fprintf (dump_file, " -> ");
718 print_generic_stmt (dump_file, last->op, 0);
721 ops->ordered_remove (i);
722 reassociate_stats.ops_eliminated ++;
724 return true;
726 case BIT_XOR_EXPR:
727 if (dump_file && (dump_flags & TDF_DETAILS))
729 fprintf (dump_file, "Equivalence: ");
730 print_generic_expr (dump_file, curr->op, 0);
731 fprintf (dump_file, " ^ ");
732 print_generic_expr (dump_file, last->op, 0);
733 fprintf (dump_file, " -> nothing\n");
736 reassociate_stats.ops_eliminated += 2;
738 if (ops->length () == 2)
740 ops->truncate (0);
741 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
742 *all_done = true;
744 else
746 ops->ordered_remove (i-1);
747 ops->ordered_remove (i-1);
750 return true;
752 default:
753 break;
756 return false;
759 static vec<tree> plus_negates;
761 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
762 expression, look in OPS for a corresponding positive operation to cancel
763 it out. If we find one, remove the other from OPS, replace
764 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
765 return false. */
767 static bool
768 eliminate_plus_minus_pair (enum tree_code opcode,
769 vec<operand_entry *> *ops,
770 unsigned int currindex,
771 operand_entry *curr)
773 tree negateop;
774 tree notop;
775 unsigned int i;
776 operand_entry *oe;
778 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
779 return false;
781 negateop = get_unary_op (curr->op, NEGATE_EXPR);
782 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
783 if (negateop == NULL_TREE && notop == NULL_TREE)
784 return false;
786 /* Any non-negated version will have a rank that is one less than
787 the current rank. So once we hit those ranks, if we don't find
788 one, we can stop. */
790 for (i = currindex + 1;
791 ops->iterate (i, &oe)
792 && oe->rank >= curr->rank - 1 ;
793 i++)
795 if (negateop
796 && ops_equal_values_p (oe->op, negateop))
798 if (dump_file && (dump_flags & TDF_DETAILS))
800 fprintf (dump_file, "Equivalence: ");
801 print_generic_expr (dump_file, negateop, 0);
802 fprintf (dump_file, " + -");
803 print_generic_expr (dump_file, oe->op, 0);
804 fprintf (dump_file, " -> 0\n");
807 ops->ordered_remove (i);
808 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
809 ops->ordered_remove (currindex);
810 reassociate_stats.ops_eliminated ++;
812 return true;
814 else if (notop
815 && ops_equal_values_p (oe->op, notop))
817 tree op_type = TREE_TYPE (oe->op);
819 if (dump_file && (dump_flags & TDF_DETAILS))
821 fprintf (dump_file, "Equivalence: ");
822 print_generic_expr (dump_file, notop, 0);
823 fprintf (dump_file, " + ~");
824 print_generic_expr (dump_file, oe->op, 0);
825 fprintf (dump_file, " -> -1\n");
828 ops->ordered_remove (i);
829 add_to_ops_vec (ops, build_all_ones_cst (op_type));
830 ops->ordered_remove (currindex);
831 reassociate_stats.ops_eliminated ++;
833 return true;
837 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
838 save it for later inspection in repropagate_negates(). */
839 if (negateop != NULL_TREE
840 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
841 plus_negates.safe_push (curr->op);
843 return false;
846 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
847 bitwise not expression, look in OPS for a corresponding operand to
848 cancel it out. If we find one, remove the other from OPS, replace
849 OPS[CURRINDEX] with 0, and return true. Otherwise, return
850 false. */
852 static bool
853 eliminate_not_pairs (enum tree_code opcode,
854 vec<operand_entry *> *ops,
855 unsigned int currindex,
856 operand_entry *curr)
858 tree notop;
859 unsigned int i;
860 operand_entry *oe;
862 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
863 || TREE_CODE (curr->op) != SSA_NAME)
864 return false;
866 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
867 if (notop == NULL_TREE)
868 return false;
870 /* Any non-not version will have a rank that is one less than
871 the current rank. So once we hit those ranks, if we don't find
872 one, we can stop. */
874 for (i = currindex + 1;
875 ops->iterate (i, &oe)
876 && oe->rank >= curr->rank - 1;
877 i++)
879 if (oe->op == notop)
881 if (dump_file && (dump_flags & TDF_DETAILS))
883 fprintf (dump_file, "Equivalence: ");
884 print_generic_expr (dump_file, notop, 0);
885 if (opcode == BIT_AND_EXPR)
886 fprintf (dump_file, " & ~");
887 else if (opcode == BIT_IOR_EXPR)
888 fprintf (dump_file, " | ~");
889 print_generic_expr (dump_file, oe->op, 0);
890 if (opcode == BIT_AND_EXPR)
891 fprintf (dump_file, " -> 0\n");
892 else if (opcode == BIT_IOR_EXPR)
893 fprintf (dump_file, " -> -1\n");
896 if (opcode == BIT_AND_EXPR)
897 oe->op = build_zero_cst (TREE_TYPE (oe->op));
898 else if (opcode == BIT_IOR_EXPR)
899 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
901 reassociate_stats.ops_eliminated += ops->length () - 1;
902 ops->truncate (0);
903 ops->quick_push (oe);
904 return true;
908 return false;
911 /* Use constant value that may be present in OPS to try to eliminate
912 operands. Note that this function is only really used when we've
913 eliminated ops for other reasons, or merged constants. Across
914 single statements, fold already does all of this, plus more. There
915 is little point in duplicating logic, so I've only included the
916 identities that I could ever construct testcases to trigger. */
918 static void
919 eliminate_using_constants (enum tree_code opcode,
920 vec<operand_entry *> *ops)
922 operand_entry *oelast = ops->last ();
923 tree type = TREE_TYPE (oelast->op);
925 if (oelast->rank == 0
926 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
928 switch (opcode)
930 case BIT_AND_EXPR:
931 if (integer_zerop (oelast->op))
933 if (ops->length () != 1)
935 if (dump_file && (dump_flags & TDF_DETAILS))
936 fprintf (dump_file, "Found & 0, removing all other ops\n");
938 reassociate_stats.ops_eliminated += ops->length () - 1;
940 ops->truncate (0);
941 ops->quick_push (oelast);
942 return;
945 else if (integer_all_onesp (oelast->op))
947 if (ops->length () != 1)
949 if (dump_file && (dump_flags & TDF_DETAILS))
950 fprintf (dump_file, "Found & -1, removing\n");
951 ops->pop ();
952 reassociate_stats.ops_eliminated++;
955 break;
956 case BIT_IOR_EXPR:
957 if (integer_all_onesp (oelast->op))
959 if (ops->length () != 1)
961 if (dump_file && (dump_flags & TDF_DETAILS))
962 fprintf (dump_file, "Found | -1, removing all other ops\n");
964 reassociate_stats.ops_eliminated += ops->length () - 1;
966 ops->truncate (0);
967 ops->quick_push (oelast);
968 return;
971 else if (integer_zerop (oelast->op))
973 if (ops->length () != 1)
975 if (dump_file && (dump_flags & TDF_DETAILS))
976 fprintf (dump_file, "Found | 0, removing\n");
977 ops->pop ();
978 reassociate_stats.ops_eliminated++;
981 break;
982 case MULT_EXPR:
983 if (integer_zerop (oelast->op)
984 || (FLOAT_TYPE_P (type)
985 && !HONOR_NANS (type)
986 && !HONOR_SIGNED_ZEROS (type)
987 && real_zerop (oelast->op)))
989 if (ops->length () != 1)
991 if (dump_file && (dump_flags & TDF_DETAILS))
992 fprintf (dump_file, "Found * 0, removing all other ops\n");
994 reassociate_stats.ops_eliminated += ops->length () - 1;
995 ops->truncate (1);
996 ops->quick_push (oelast);
997 return;
1000 else if (integer_onep (oelast->op)
1001 || (FLOAT_TYPE_P (type)
1002 && !HONOR_SNANS (type)
1003 && real_onep (oelast->op)))
1005 if (ops->length () != 1)
1007 if (dump_file && (dump_flags & TDF_DETAILS))
1008 fprintf (dump_file, "Found * 1, removing\n");
1009 ops->pop ();
1010 reassociate_stats.ops_eliminated++;
1011 return;
1014 break;
1015 case BIT_XOR_EXPR:
1016 case PLUS_EXPR:
1017 case MINUS_EXPR:
1018 if (integer_zerop (oelast->op)
1019 || (FLOAT_TYPE_P (type)
1020 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1021 && fold_real_zero_addition_p (type, oelast->op,
1022 opcode == MINUS_EXPR)))
1024 if (ops->length () != 1)
1026 if (dump_file && (dump_flags & TDF_DETAILS))
1027 fprintf (dump_file, "Found [|^+] 0, removing\n");
1028 ops->pop ();
1029 reassociate_stats.ops_eliminated++;
1030 return;
1033 break;
1034 default:
1035 break;
1041 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1042 bool, bool);
1044 /* Structure for tracking and counting operands. */
1045 struct oecount {
1046 int cnt;
1047 int id;
1048 enum tree_code oecode;
1049 tree op;
1053 /* The heap for the oecount hashtable and the sorted list of operands. */
1054 static vec<oecount> cvec;
1057 /* Oecount hashtable helpers. */
1059 struct oecount_hasher : int_hash <int, 0, 1>
1061 static inline hashval_t hash (int);
1062 static inline bool equal (int, int);
1065 /* Hash function for oecount. */
1067 inline hashval_t
1068 oecount_hasher::hash (int p)
1070 const oecount *c = &cvec[p - 42];
1071 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1074 /* Comparison function for oecount. */
1076 inline bool
1077 oecount_hasher::equal (int p1, int p2)
1079 const oecount *c1 = &cvec[p1 - 42];
1080 const oecount *c2 = &cvec[p2 - 42];
1081 return (c1->oecode == c2->oecode
1082 && c1->op == c2->op);
1085 /* Comparison function for qsort sorting oecount elements by count. */
1087 static int
1088 oecount_cmp (const void *p1, const void *p2)
1090 const oecount *c1 = (const oecount *)p1;
1091 const oecount *c2 = (const oecount *)p2;
1092 if (c1->cnt != c2->cnt)
1093 return c1->cnt - c2->cnt;
1094 else
1095 /* If counts are identical, use unique IDs to stabilize qsort. */
1096 return c1->id - c2->id;
1099 /* Return TRUE iff STMT represents a builtin call that raises OP
1100 to some exponent. */
1102 static bool
1103 stmt_is_power_of_op (gimple *stmt, tree op)
1105 if (!is_gimple_call (stmt))
1106 return false;
1108 switch (gimple_call_combined_fn (stmt))
1110 CASE_CFN_POW:
1111 CASE_CFN_POWI:
1112 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1114 default:
1115 return false;
1119 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1120 in place and return the result. Assumes that stmt_is_power_of_op
1121 was previously called for STMT and returned TRUE. */
1123 static HOST_WIDE_INT
1124 decrement_power (gimple *stmt)
1126 REAL_VALUE_TYPE c, cint;
1127 HOST_WIDE_INT power;
1128 tree arg1;
1130 switch (gimple_call_combined_fn (stmt))
1132 CASE_CFN_POW:
1133 arg1 = gimple_call_arg (stmt, 1);
1134 c = TREE_REAL_CST (arg1);
1135 power = real_to_integer (&c) - 1;
1136 real_from_integer (&cint, VOIDmode, power, SIGNED);
1137 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1138 return power;
1140 CASE_CFN_POWI:
1141 arg1 = gimple_call_arg (stmt, 1);
1142 power = TREE_INT_CST_LOW (arg1) - 1;
1143 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1144 return power;
1146 default:
1147 gcc_unreachable ();
1151 /* Find the single immediate use of STMT's LHS, and replace it
1152 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1153 replace *DEF with OP as well. */
1155 static void
1156 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1158 tree lhs;
1159 gimple *use_stmt;
1160 use_operand_p use;
1161 gimple_stmt_iterator gsi;
1163 if (is_gimple_call (stmt))
1164 lhs = gimple_call_lhs (stmt);
1165 else
1166 lhs = gimple_assign_lhs (stmt);
1168 gcc_assert (has_single_use (lhs));
1169 single_imm_use (lhs, &use, &use_stmt);
1170 if (lhs == *def)
1171 *def = op;
1172 SET_USE (use, op);
1173 if (TREE_CODE (op) != SSA_NAME)
1174 update_stmt (use_stmt);
1175 gsi = gsi_for_stmt (stmt);
1176 unlink_stmt_vdef (stmt);
1177 reassoc_remove_stmt (&gsi);
1178 release_defs (stmt);
1181 /* Walks the linear chain with result *DEF searching for an operation
1182 with operand OP and code OPCODE removing that from the chain. *DEF
1183 is updated if there is only one operand but no operation left. */
1185 static void
1186 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1188 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1192 tree name;
1194 if (opcode == MULT_EXPR)
1196 if (stmt_is_power_of_op (stmt, op))
1198 if (decrement_power (stmt) == 1)
1199 propagate_op_to_single_use (op, stmt, def);
1200 return;
1202 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1204 if (gimple_assign_rhs1 (stmt) == op)
1206 propagate_op_to_single_use (op, stmt, def);
1207 return;
1209 else if (integer_minus_onep (op)
1210 || real_minus_onep (op))
1212 gimple_assign_set_rhs_code
1213 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1214 return;
1219 name = gimple_assign_rhs1 (stmt);
1221 /* If this is the operation we look for and one of the operands
1222 is ours simply propagate the other operand into the stmts
1223 single use. */
1224 if (gimple_assign_rhs_code (stmt) == opcode
1225 && (name == op
1226 || gimple_assign_rhs2 (stmt) == op))
1228 if (name == op)
1229 name = gimple_assign_rhs2 (stmt);
1230 propagate_op_to_single_use (name, stmt, def);
1231 return;
1234 /* We might have a multiply of two __builtin_pow* calls, and
1235 the operand might be hiding in the rightmost one. Likewise
1236 this can happen for a negate. */
1237 if (opcode == MULT_EXPR
1238 && gimple_assign_rhs_code (stmt) == opcode
1239 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1240 && has_single_use (gimple_assign_rhs2 (stmt)))
1242 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1243 if (stmt_is_power_of_op (stmt2, op))
1245 if (decrement_power (stmt2) == 1)
1246 propagate_op_to_single_use (op, stmt2, def);
1247 return;
1249 else if (is_gimple_assign (stmt2)
1250 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1252 if (gimple_assign_rhs1 (stmt2) == op)
1254 propagate_op_to_single_use (op, stmt2, def);
1255 return;
1257 else if (integer_minus_onep (op)
1258 || real_minus_onep (op))
1260 gimple_assign_set_rhs_code
1261 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1262 return;
1267 /* Continue walking the chain. */
1268 gcc_assert (name != op
1269 && TREE_CODE (name) == SSA_NAME);
1270 stmt = SSA_NAME_DEF_STMT (name);
1272 while (1);
1275 /* Returns true if statement S1 dominates statement S2. Like
1276 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1278 static bool
1279 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1281 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1283 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1284 SSA_NAME. Assume it lives at the beginning of function and
1285 thus dominates everything. */
1286 if (!bb1 || s1 == s2)
1287 return true;
1289 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1290 if (!bb2)
1291 return false;
1293 if (bb1 == bb2)
1295 /* PHIs in the same basic block are assumed to be
1296 executed all in parallel, if only one stmt is a PHI,
1297 it dominates the other stmt in the same basic block. */
1298 if (gimple_code (s1) == GIMPLE_PHI)
1299 return true;
1301 if (gimple_code (s2) == GIMPLE_PHI)
1302 return false;
1304 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1306 if (gimple_uid (s1) < gimple_uid (s2))
1307 return true;
1309 if (gimple_uid (s1) > gimple_uid (s2))
1310 return false;
1312 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1313 unsigned int uid = gimple_uid (s1);
1314 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1316 gimple *s = gsi_stmt (gsi);
1317 if (gimple_uid (s) != uid)
1318 break;
1319 if (s == s2)
1320 return true;
1323 return false;
1326 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1329 /* Insert STMT after INSERT_POINT. */
1331 static void
1332 insert_stmt_after (gimple *stmt, gimple *insert_point)
1334 gimple_stmt_iterator gsi;
1335 basic_block bb;
1337 if (gimple_code (insert_point) == GIMPLE_PHI)
1338 bb = gimple_bb (insert_point);
1339 else if (!stmt_ends_bb_p (insert_point))
1341 gsi = gsi_for_stmt (insert_point);
1342 gimple_set_uid (stmt, gimple_uid (insert_point));
1343 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1344 return;
1346 else
1347 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1348 thus if it must end a basic block, it should be a call that can
1349 throw, or some assignment that can throw. If it throws, the LHS
1350 of it will not be initialized though, so only valid places using
1351 the SSA_NAME should be dominated by the fallthru edge. */
1352 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1353 gsi = gsi_after_labels (bb);
1354 if (gsi_end_p (gsi))
1356 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1357 gimple_set_uid (stmt,
1358 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1360 else
1361 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1362 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1365 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1366 the result. Places the statement after the definition of either
1367 OP1 or OP2. Returns the new statement. */
1369 static gimple *
1370 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1372 gimple *op1def = NULL, *op2def = NULL;
1373 gimple_stmt_iterator gsi;
1374 tree op;
1375 gassign *sum;
1377 /* Create the addition statement. */
1378 op = make_ssa_name (type);
1379 sum = gimple_build_assign (op, opcode, op1, op2);
1381 /* Find an insertion place and insert. */
1382 if (TREE_CODE (op1) == SSA_NAME)
1383 op1def = SSA_NAME_DEF_STMT (op1);
1384 if (TREE_CODE (op2) == SSA_NAME)
1385 op2def = SSA_NAME_DEF_STMT (op2);
1386 if ((!op1def || gimple_nop_p (op1def))
1387 && (!op2def || gimple_nop_p (op2def)))
1389 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1390 if (gsi_end_p (gsi))
1392 gimple_stmt_iterator gsi2
1393 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1394 gimple_set_uid (sum,
1395 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1397 else
1398 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1399 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1401 else
1403 gimple *insert_point;
1404 if ((!op1def || gimple_nop_p (op1def))
1405 || (op2def && !gimple_nop_p (op2def)
1406 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1407 insert_point = op2def;
1408 else
1409 insert_point = op1def;
1410 insert_stmt_after (sum, insert_point);
1412 update_stmt (sum);
1414 return sum;
1417 /* Perform un-distribution of divisions and multiplications.
1418 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1419 to (A + B) / X for real X.
1421 The algorithm is organized as follows.
1423 - First we walk the addition chain *OPS looking for summands that
1424 are defined by a multiplication or a real division. This results
1425 in the candidates bitmap with relevant indices into *OPS.
1427 - Second we build the chains of multiplications or divisions for
1428 these candidates, counting the number of occurrences of (operand, code)
1429 pairs in all of the candidates chains.
1431 - Third we sort the (operand, code) pairs by number of occurrence and
1432 process them starting with the pair with the most uses.
1434 * For each such pair we walk the candidates again to build a
1435 second candidate bitmap noting all multiplication/division chains
1436 that have at least one occurrence of (operand, code).
1438 * We build an alternate addition chain only covering these
1439 candidates with one (operand, code) operation removed from their
1440 multiplication/division chain.
1442 * The first candidate gets replaced by the alternate addition chain
1443 multiplied/divided by the operand.
1445 * All candidate chains get disabled for further processing and
1446 processing of (operand, code) pairs continues.
1448 The alternate addition chains built are re-processed by the main
1449 reassociation algorithm which allows optimizing a * x * y + b * y * x
1450 to (a + b ) * x * y in one invocation of the reassociation pass. */
1452 static bool
1453 undistribute_ops_list (enum tree_code opcode,
1454 vec<operand_entry *> *ops, struct loop *loop)
1456 unsigned int length = ops->length ();
1457 operand_entry *oe1;
1458 unsigned i, j;
1459 sbitmap candidates, candidates2;
1460 unsigned nr_candidates, nr_candidates2;
1461 sbitmap_iterator sbi0;
1462 vec<operand_entry *> *subops;
1463 bool changed = false;
1464 int next_oecount_id = 0;
1466 if (length <= 1
1467 || opcode != PLUS_EXPR)
1468 return false;
1470 /* Build a list of candidates to process. */
1471 candidates = sbitmap_alloc (length);
1472 bitmap_clear (candidates);
1473 nr_candidates = 0;
1474 FOR_EACH_VEC_ELT (*ops, i, oe1)
1476 enum tree_code dcode;
1477 gimple *oe1def;
1479 if (TREE_CODE (oe1->op) != SSA_NAME)
1480 continue;
1481 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1482 if (!is_gimple_assign (oe1def))
1483 continue;
1484 dcode = gimple_assign_rhs_code (oe1def);
1485 if ((dcode != MULT_EXPR
1486 && dcode != RDIV_EXPR)
1487 || !is_reassociable_op (oe1def, dcode, loop))
1488 continue;
1490 bitmap_set_bit (candidates, i);
1491 nr_candidates++;
1494 if (nr_candidates < 2)
1496 sbitmap_free (candidates);
1497 return false;
1500 if (dump_file && (dump_flags & TDF_DETAILS))
1502 fprintf (dump_file, "searching for un-distribute opportunities ");
1503 print_generic_expr (dump_file,
1504 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1505 fprintf (dump_file, " %d\n", nr_candidates);
1508 /* Build linearized sub-operand lists and the counting table. */
1509 cvec.create (0);
1511 hash_table<oecount_hasher> ctable (15);
1513 /* ??? Macro arguments cannot have multi-argument template types in
1514 them. This typedef is needed to workaround that limitation. */
1515 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1516 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1517 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1519 gimple *oedef;
1520 enum tree_code oecode;
1521 unsigned j;
1523 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1524 oecode = gimple_assign_rhs_code (oedef);
1525 linearize_expr_tree (&subops[i], oedef,
1526 associative_tree_code (oecode), false);
1528 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1530 oecount c;
1531 int *slot;
1532 int idx;
1533 c.oecode = oecode;
1534 c.cnt = 1;
1535 c.id = next_oecount_id++;
1536 c.op = oe1->op;
1537 cvec.safe_push (c);
1538 idx = cvec.length () + 41;
1539 slot = ctable.find_slot (idx, INSERT);
1540 if (!*slot)
1542 *slot = idx;
1544 else
1546 cvec.pop ();
1547 cvec[*slot - 42].cnt++;
1552 /* Sort the counting table. */
1553 cvec.qsort (oecount_cmp);
1555 if (dump_file && (dump_flags & TDF_DETAILS))
1557 oecount *c;
1558 fprintf (dump_file, "Candidates:\n");
1559 FOR_EACH_VEC_ELT (cvec, j, c)
1561 fprintf (dump_file, " %u %s: ", c->cnt,
1562 c->oecode == MULT_EXPR
1563 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1564 print_generic_expr (dump_file, c->op, 0);
1565 fprintf (dump_file, "\n");
1569 /* Process the (operand, code) pairs in order of most occurrence. */
1570 candidates2 = sbitmap_alloc (length);
1571 while (!cvec.is_empty ())
1573 oecount *c = &cvec.last ();
1574 if (c->cnt < 2)
1575 break;
1577 /* Now collect the operands in the outer chain that contain
1578 the common operand in their inner chain. */
1579 bitmap_clear (candidates2);
1580 nr_candidates2 = 0;
1581 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1583 gimple *oedef;
1584 enum tree_code oecode;
1585 unsigned j;
1586 tree op = (*ops)[i]->op;
1588 /* If we undistributed in this chain already this may be
1589 a constant. */
1590 if (TREE_CODE (op) != SSA_NAME)
1591 continue;
1593 oedef = SSA_NAME_DEF_STMT (op);
1594 oecode = gimple_assign_rhs_code (oedef);
1595 if (oecode != c->oecode)
1596 continue;
1598 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1600 if (oe1->op == c->op)
1602 bitmap_set_bit (candidates2, i);
1603 ++nr_candidates2;
1604 break;
1609 if (nr_candidates2 >= 2)
1611 operand_entry *oe1, *oe2;
1612 gimple *prod;
1613 int first = bitmap_first_set_bit (candidates2);
1615 /* Build the new addition chain. */
1616 oe1 = (*ops)[first];
1617 if (dump_file && (dump_flags & TDF_DETAILS))
1619 fprintf (dump_file, "Building (");
1620 print_generic_expr (dump_file, oe1->op, 0);
1622 zero_one_operation (&oe1->op, c->oecode, c->op);
1623 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1625 gimple *sum;
1626 oe2 = (*ops)[i];
1627 if (dump_file && (dump_flags & TDF_DETAILS))
1629 fprintf (dump_file, " + ");
1630 print_generic_expr (dump_file, oe2->op, 0);
1632 zero_one_operation (&oe2->op, c->oecode, c->op);
1633 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1634 oe1->op, oe2->op, opcode);
1635 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1636 oe2->rank = 0;
1637 oe1->op = gimple_get_lhs (sum);
1640 /* Apply the multiplication/division. */
1641 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1642 oe1->op, c->op, c->oecode);
1643 if (dump_file && (dump_flags & TDF_DETAILS))
1645 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1646 print_generic_expr (dump_file, c->op, 0);
1647 fprintf (dump_file, "\n");
1650 /* Record it in the addition chain and disable further
1651 undistribution with this op. */
1652 oe1->op = gimple_assign_lhs (prod);
1653 oe1->rank = get_rank (oe1->op);
1654 subops[first].release ();
1656 changed = true;
1659 cvec.pop ();
1662 for (i = 0; i < ops->length (); ++i)
1663 subops[i].release ();
1664 free (subops);
1665 cvec.release ();
1666 sbitmap_free (candidates);
1667 sbitmap_free (candidates2);
1669 return changed;
1672 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1673 expression, examine the other OPS to see if any of them are comparisons
1674 of the same values, which we may be able to combine or eliminate.
1675 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1677 static bool
1678 eliminate_redundant_comparison (enum tree_code opcode,
1679 vec<operand_entry *> *ops,
1680 unsigned int currindex,
1681 operand_entry *curr)
1683 tree op1, op2;
1684 enum tree_code lcode, rcode;
1685 gimple *def1, *def2;
1686 int i;
1687 operand_entry *oe;
1689 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1690 return false;
1692 /* Check that CURR is a comparison. */
1693 if (TREE_CODE (curr->op) != SSA_NAME)
1694 return false;
1695 def1 = SSA_NAME_DEF_STMT (curr->op);
1696 if (!is_gimple_assign (def1))
1697 return false;
1698 lcode = gimple_assign_rhs_code (def1);
1699 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1700 return false;
1701 op1 = gimple_assign_rhs1 (def1);
1702 op2 = gimple_assign_rhs2 (def1);
1704 /* Now look for a similar comparison in the remaining OPS. */
1705 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1707 tree t;
1709 if (TREE_CODE (oe->op) != SSA_NAME)
1710 continue;
1711 def2 = SSA_NAME_DEF_STMT (oe->op);
1712 if (!is_gimple_assign (def2))
1713 continue;
1714 rcode = gimple_assign_rhs_code (def2);
1715 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1716 continue;
1718 /* If we got here, we have a match. See if we can combine the
1719 two comparisons. */
1720 if (opcode == BIT_IOR_EXPR)
1721 t = maybe_fold_or_comparisons (lcode, op1, op2,
1722 rcode, gimple_assign_rhs1 (def2),
1723 gimple_assign_rhs2 (def2));
1724 else
1725 t = maybe_fold_and_comparisons (lcode, op1, op2,
1726 rcode, gimple_assign_rhs1 (def2),
1727 gimple_assign_rhs2 (def2));
1728 if (!t)
1729 continue;
1731 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1732 always give us a boolean_type_node value back. If the original
1733 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1734 we need to convert. */
1735 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1736 t = fold_convert (TREE_TYPE (curr->op), t);
1738 if (TREE_CODE (t) != INTEGER_CST
1739 && !operand_equal_p (t, curr->op, 0))
1741 enum tree_code subcode;
1742 tree newop1, newop2;
1743 if (!COMPARISON_CLASS_P (t))
1744 continue;
1745 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1746 STRIP_USELESS_TYPE_CONVERSION (newop1);
1747 STRIP_USELESS_TYPE_CONVERSION (newop2);
1748 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1749 continue;
1752 if (dump_file && (dump_flags & TDF_DETAILS))
1754 fprintf (dump_file, "Equivalence: ");
1755 print_generic_expr (dump_file, curr->op, 0);
1756 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1757 print_generic_expr (dump_file, oe->op, 0);
1758 fprintf (dump_file, " -> ");
1759 print_generic_expr (dump_file, t, 0);
1760 fprintf (dump_file, "\n");
1763 /* Now we can delete oe, as it has been subsumed by the new combined
1764 expression t. */
1765 ops->ordered_remove (i);
1766 reassociate_stats.ops_eliminated ++;
1768 /* If t is the same as curr->op, we're done. Otherwise we must
1769 replace curr->op with t. Special case is if we got a constant
1770 back, in which case we add it to the end instead of in place of
1771 the current entry. */
1772 if (TREE_CODE (t) == INTEGER_CST)
1774 ops->ordered_remove (currindex);
1775 add_to_ops_vec (ops, t);
1777 else if (!operand_equal_p (t, curr->op, 0))
1779 gimple *sum;
1780 enum tree_code subcode;
1781 tree newop1;
1782 tree newop2;
1783 gcc_assert (COMPARISON_CLASS_P (t));
1784 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1785 STRIP_USELESS_TYPE_CONVERSION (newop1);
1786 STRIP_USELESS_TYPE_CONVERSION (newop2);
1787 gcc_checking_assert (is_gimple_val (newop1)
1788 && is_gimple_val (newop2));
1789 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1790 curr->op = gimple_get_lhs (sum);
1792 return true;
1795 return false;
1799 /* Transform repeated addition of same values into multiply with
1800 constant. */
1801 static bool
1802 transform_add_to_multiply (vec<operand_entry *> *ops)
1804 operand_entry *oe;
1805 tree op = NULL_TREE;
1806 int j;
1807 int i, start = -1, end = 0, count = 0;
1808 vec<std::pair <int, int> > indxs = vNULL;
1809 bool changed = false;
1811 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1812 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1813 || !flag_unsafe_math_optimizations))
1814 return false;
1816 /* Look for repeated operands. */
1817 FOR_EACH_VEC_ELT (*ops, i, oe)
1819 if (start == -1)
1821 count = 1;
1822 op = oe->op;
1823 start = i;
1825 else if (operand_equal_p (oe->op, op, 0))
1827 count++;
1828 end = i;
1830 else
1832 if (count > 1)
1833 indxs.safe_push (std::make_pair (start, end));
1834 count = 1;
1835 op = oe->op;
1836 start = i;
1840 if (count > 1)
1841 indxs.safe_push (std::make_pair (start, end));
1843 for (j = indxs.length () - 1; j >= 0; --j)
1845 /* Convert repeated operand addition to multiplication. */
1846 start = indxs[j].first;
1847 end = indxs[j].second;
1848 op = (*ops)[start]->op;
1849 count = end - start + 1;
1850 for (i = end; i >= start; --i)
1851 ops->unordered_remove (i);
1852 tree tmp = make_ssa_name (TREE_TYPE (op));
1853 tree cst = build_int_cst (integer_type_node, count);
1854 gassign *mul_stmt
1855 = gimple_build_assign (tmp, MULT_EXPR,
1856 op, fold_convert (TREE_TYPE (op), cst));
1857 gimple_set_visited (mul_stmt, true);
1858 add_to_ops_vec (ops, tmp, mul_stmt);
1859 changed = true;
1862 return changed;
1866 /* Perform various identities and other optimizations on the list of
1867 operand entries, stored in OPS. The tree code for the binary
1868 operation between all the operands is OPCODE. */
1870 static void
1871 optimize_ops_list (enum tree_code opcode,
1872 vec<operand_entry *> *ops)
1874 unsigned int length = ops->length ();
1875 unsigned int i;
1876 operand_entry *oe;
1877 operand_entry *oelast = NULL;
1878 bool iterate = false;
1880 if (length == 1)
1881 return;
1883 oelast = ops->last ();
1885 /* If the last two are constants, pop the constants off, merge them
1886 and try the next two. */
1887 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1889 operand_entry *oelm1 = (*ops)[length - 2];
1891 if (oelm1->rank == 0
1892 && is_gimple_min_invariant (oelm1->op)
1893 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1894 TREE_TYPE (oelast->op)))
1896 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1897 oelm1->op, oelast->op);
1899 if (folded && is_gimple_min_invariant (folded))
1901 if (dump_file && (dump_flags & TDF_DETAILS))
1902 fprintf (dump_file, "Merging constants\n");
1904 ops->pop ();
1905 ops->pop ();
1907 add_to_ops_vec (ops, folded);
1908 reassociate_stats.constants_eliminated++;
1910 optimize_ops_list (opcode, ops);
1911 return;
1916 eliminate_using_constants (opcode, ops);
1917 oelast = NULL;
1919 for (i = 0; ops->iterate (i, &oe);)
1921 bool done = false;
1923 if (eliminate_not_pairs (opcode, ops, i, oe))
1924 return;
1925 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1926 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1927 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1929 if (done)
1930 return;
1931 iterate = true;
1932 oelast = NULL;
1933 continue;
1935 oelast = oe;
1936 i++;
1939 length = ops->length ();
1940 oelast = ops->last ();
1942 if (iterate)
1943 optimize_ops_list (opcode, ops);
1946 /* The following functions are subroutines to optimize_range_tests and allow
1947 it to try to change a logical combination of comparisons into a range
1948 test.
1950 For example, both
1951 X == 2 || X == 5 || X == 3 || X == 4
1953 X >= 2 && X <= 5
1954 are converted to
1955 (unsigned) (X - 2) <= 3
1957 For more information see comments above fold_test_range in fold-const.c,
1958 this implementation is for GIMPLE. */
1960 struct range_entry
1962 tree exp;
1963 tree low;
1964 tree high;
1965 bool in_p;
1966 bool strict_overflow_p;
1967 unsigned int idx, next;
1970 /* This is similar to make_range in fold-const.c, but on top of
1971 GIMPLE instead of trees. If EXP is non-NULL, it should be
1972 an SSA_NAME and STMT argument is ignored, otherwise STMT
1973 argument should be a GIMPLE_COND. */
1975 static void
1976 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1978 int in_p;
1979 tree low, high;
1980 bool is_bool, strict_overflow_p;
1982 r->exp = NULL_TREE;
1983 r->in_p = false;
1984 r->strict_overflow_p = false;
1985 r->low = NULL_TREE;
1986 r->high = NULL_TREE;
1987 if (exp != NULL_TREE
1988 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1989 return;
1991 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1992 and see if we can refine the range. Some of the cases below may not
1993 happen, but it doesn't seem worth worrying about this. We "continue"
1994 the outer loop when we've changed something; otherwise we "break"
1995 the switch, which will "break" the while. */
1996 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1997 high = low;
1998 in_p = 0;
1999 strict_overflow_p = false;
2000 is_bool = false;
2001 if (exp == NULL_TREE)
2002 is_bool = true;
2003 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2005 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2006 is_bool = true;
2007 else
2008 return;
2010 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2011 is_bool = true;
2013 while (1)
2015 enum tree_code code;
2016 tree arg0, arg1, exp_type;
2017 tree nexp;
2018 location_t loc;
2020 if (exp != NULL_TREE)
2022 if (TREE_CODE (exp) != SSA_NAME
2023 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2024 break;
2026 stmt = SSA_NAME_DEF_STMT (exp);
2027 if (!is_gimple_assign (stmt))
2028 break;
2030 code = gimple_assign_rhs_code (stmt);
2031 arg0 = gimple_assign_rhs1 (stmt);
2032 arg1 = gimple_assign_rhs2 (stmt);
2033 exp_type = TREE_TYPE (exp);
2035 else
2037 code = gimple_cond_code (stmt);
2038 arg0 = gimple_cond_lhs (stmt);
2039 arg1 = gimple_cond_rhs (stmt);
2040 exp_type = boolean_type_node;
2043 if (TREE_CODE (arg0) != SSA_NAME)
2044 break;
2045 loc = gimple_location (stmt);
2046 switch (code)
2048 case BIT_NOT_EXPR:
2049 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2050 /* Ensure the range is either +[-,0], +[0,0],
2051 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2052 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2053 or similar expression of unconditional true or
2054 false, it should not be negated. */
2055 && ((high && integer_zerop (high))
2056 || (low && integer_onep (low))))
2058 in_p = !in_p;
2059 exp = arg0;
2060 continue;
2062 break;
2063 case SSA_NAME:
2064 exp = arg0;
2065 continue;
2066 CASE_CONVERT:
2067 if (is_bool)
2068 goto do_default;
2069 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2071 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2072 is_bool = true;
2073 else
2074 return;
2076 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2077 is_bool = true;
2078 goto do_default;
2079 case EQ_EXPR:
2080 case NE_EXPR:
2081 case LT_EXPR:
2082 case LE_EXPR:
2083 case GE_EXPR:
2084 case GT_EXPR:
2085 is_bool = true;
2086 /* FALLTHRU */
2087 default:
2088 if (!is_bool)
2089 return;
2090 do_default:
2091 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2092 &low, &high, &in_p,
2093 &strict_overflow_p);
2094 if (nexp != NULL_TREE)
2096 exp = nexp;
2097 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2098 continue;
2100 break;
2102 break;
2104 if (is_bool)
2106 r->exp = exp;
2107 r->in_p = in_p;
2108 r->low = low;
2109 r->high = high;
2110 r->strict_overflow_p = strict_overflow_p;
2114 /* Comparison function for qsort. Sort entries
2115 without SSA_NAME exp first, then with SSA_NAMEs sorted
2116 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2117 by increasing ->low and if ->low is the same, by increasing
2118 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2119 maximum. */
2121 static int
2122 range_entry_cmp (const void *a, const void *b)
2124 const struct range_entry *p = (const struct range_entry *) a;
2125 const struct range_entry *q = (const struct range_entry *) b;
2127 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2129 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2131 /* Group range_entries for the same SSA_NAME together. */
2132 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2133 return -1;
2134 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2135 return 1;
2136 /* If ->low is different, NULL low goes first, then by
2137 ascending low. */
2138 if (p->low != NULL_TREE)
2140 if (q->low != NULL_TREE)
2142 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2143 p->low, q->low);
2144 if (tem && integer_onep (tem))
2145 return -1;
2146 tem = fold_binary (GT_EXPR, boolean_type_node,
2147 p->low, q->low);
2148 if (tem && integer_onep (tem))
2149 return 1;
2151 else
2152 return 1;
2154 else if (q->low != NULL_TREE)
2155 return -1;
2156 /* If ->high is different, NULL high goes last, before that by
2157 ascending high. */
2158 if (p->high != NULL_TREE)
2160 if (q->high != NULL_TREE)
2162 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2163 p->high, q->high);
2164 if (tem && integer_onep (tem))
2165 return -1;
2166 tem = fold_binary (GT_EXPR, boolean_type_node,
2167 p->high, q->high);
2168 if (tem && integer_onep (tem))
2169 return 1;
2171 else
2172 return -1;
2174 else if (q->high != NULL_TREE)
2175 return 1;
2176 /* If both ranges are the same, sort below by ascending idx. */
2178 else
2179 return 1;
2181 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2182 return -1;
2184 if (p->idx < q->idx)
2185 return -1;
2186 else
2188 gcc_checking_assert (p->idx > q->idx);
2189 return 1;
2193 /* Helper routine of optimize_range_test.
2194 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2195 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2196 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2197 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2198 an array of COUNT pointers to other ranges. Return
2199 true if the range merge has been successful.
2200 If OPCODE is ERROR_MARK, this is called from within
2201 maybe_optimize_range_tests and is performing inter-bb range optimization.
2202 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2203 oe->rank. */
2205 static bool
2206 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2207 struct range_entry **otherrangep,
2208 unsigned int count, enum tree_code opcode,
2209 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2210 bool in_p, tree low, tree high, bool strict_overflow_p)
2212 operand_entry *oe = (*ops)[range->idx];
2213 tree op = oe->op;
2214 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2215 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2216 location_t loc = gimple_location (stmt);
2217 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2218 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2219 in_p, low, high);
2220 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2221 gimple_stmt_iterator gsi;
2222 unsigned int i, uid;
2224 if (tem == NULL_TREE)
2225 return false;
2227 /* If op is default def SSA_NAME, there is no place to insert the
2228 new comparison. Give up, unless we can use OP itself as the
2229 range test. */
2230 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2232 if (op == range->exp
2233 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2234 || TREE_CODE (optype) == BOOLEAN_TYPE)
2235 && (op == tem
2236 || (TREE_CODE (tem) == EQ_EXPR
2237 && TREE_OPERAND (tem, 0) == op
2238 && integer_onep (TREE_OPERAND (tem, 1))))
2239 && opcode != BIT_IOR_EXPR
2240 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2242 stmt = NULL;
2243 tem = op;
2245 else
2246 return false;
2249 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2250 warning_at (loc, OPT_Wstrict_overflow,
2251 "assuming signed overflow does not occur "
2252 "when simplifying range test");
2254 if (dump_file && (dump_flags & TDF_DETAILS))
2256 struct range_entry *r;
2257 fprintf (dump_file, "Optimizing range tests ");
2258 print_generic_expr (dump_file, range->exp, 0);
2259 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2260 print_generic_expr (dump_file, range->low, 0);
2261 fprintf (dump_file, ", ");
2262 print_generic_expr (dump_file, range->high, 0);
2263 fprintf (dump_file, "]");
2264 for (i = 0; i < count; i++)
2266 if (otherrange)
2267 r = otherrange + i;
2268 else
2269 r = otherrangep[i];
2270 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2271 print_generic_expr (dump_file, r->low, 0);
2272 fprintf (dump_file, ", ");
2273 print_generic_expr (dump_file, r->high, 0);
2274 fprintf (dump_file, "]");
2276 fprintf (dump_file, "\n into ");
2277 print_generic_expr (dump_file, tem, 0);
2278 fprintf (dump_file, "\n");
2281 if (opcode == BIT_IOR_EXPR
2282 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2283 tem = invert_truthvalue_loc (loc, tem);
2285 tem = fold_convert_loc (loc, optype, tem);
2286 if (stmt)
2288 gsi = gsi_for_stmt (stmt);
2289 uid = gimple_uid (stmt);
2291 else
2293 gsi = gsi_none ();
2294 uid = 0;
2296 if (stmt == NULL)
2297 gcc_checking_assert (tem == op);
2298 /* In rare cases range->exp can be equal to lhs of stmt.
2299 In that case we have to insert after the stmt rather then before
2300 it. If stmt is a PHI, insert it at the start of the basic block. */
2301 else if (op != range->exp)
2303 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2304 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2305 GSI_SAME_STMT);
2306 gsi_prev (&gsi);
2308 else if (gimple_code (stmt) != GIMPLE_PHI)
2310 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2311 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2312 GSI_CONTINUE_LINKING);
2314 else
2316 gsi = gsi_after_labels (gimple_bb (stmt));
2317 if (!gsi_end_p (gsi))
2318 uid = gimple_uid (gsi_stmt (gsi));
2319 else
2321 gsi = gsi_start_bb (gimple_bb (stmt));
2322 uid = 1;
2323 while (!gsi_end_p (gsi))
2325 uid = gimple_uid (gsi_stmt (gsi));
2326 gsi_next (&gsi);
2329 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2330 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2331 GSI_SAME_STMT);
2332 if (gsi_end_p (gsi))
2333 gsi = gsi_last_bb (gimple_bb (stmt));
2334 else
2335 gsi_prev (&gsi);
2337 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2338 if (gimple_uid (gsi_stmt (gsi)))
2339 break;
2340 else
2341 gimple_set_uid (gsi_stmt (gsi), uid);
2343 oe->op = tem;
2344 range->exp = exp;
2345 range->low = low;
2346 range->high = high;
2347 range->in_p = in_p;
2348 range->strict_overflow_p = false;
2350 for (i = 0; i < count; i++)
2352 if (otherrange)
2353 range = otherrange + i;
2354 else
2355 range = otherrangep[i];
2356 oe = (*ops)[range->idx];
2357 /* Now change all the other range test immediate uses, so that
2358 those tests will be optimized away. */
2359 if (opcode == ERROR_MARK)
2361 if (oe->op)
2362 oe->op = build_int_cst (TREE_TYPE (oe->op),
2363 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2364 else
2365 oe->op = (oe->rank == BIT_IOR_EXPR
2366 ? boolean_false_node : boolean_true_node);
2368 else
2369 oe->op = error_mark_node;
2370 range->exp = NULL_TREE;
2372 return true;
2375 /* Optimize X == CST1 || X == CST2
2376 if popcount (CST1 ^ CST2) == 1 into
2377 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2378 Similarly for ranges. E.g.
2379 X != 2 && X != 3 && X != 10 && X != 11
2380 will be transformed by the previous optimization into
2381 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2382 and this loop can transform that into
2383 !(((X & ~8) - 2U) <= 1U). */
2385 static bool
2386 optimize_range_tests_xor (enum tree_code opcode, tree type,
2387 tree lowi, tree lowj, tree highi, tree highj,
2388 vec<operand_entry *> *ops,
2389 struct range_entry *rangei,
2390 struct range_entry *rangej)
2392 tree lowxor, highxor, tem, exp;
2393 /* Check lowi ^ lowj == highi ^ highj and
2394 popcount (lowi ^ lowj) == 1. */
2395 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2396 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2397 return false;
2398 if (!integer_pow2p (lowxor))
2399 return false;
2400 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2401 if (!tree_int_cst_equal (lowxor, highxor))
2402 return false;
2404 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2405 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2406 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2407 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2408 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2409 NULL, rangei->in_p, lowj, highj,
2410 rangei->strict_overflow_p
2411 || rangej->strict_overflow_p))
2412 return true;
2413 return false;
2416 /* Optimize X == CST1 || X == CST2
2417 if popcount (CST2 - CST1) == 1 into
2418 ((X - CST1) & ~(CST2 - CST1)) == 0.
2419 Similarly for ranges. E.g.
2420 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2421 || X == 75 || X == 45
2422 will be transformed by the previous optimization into
2423 (X - 43U) <= 3U || (X - 75U) <= 3U
2424 and this loop can transform that into
2425 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2426 static bool
2427 optimize_range_tests_diff (enum tree_code opcode, tree type,
2428 tree lowi, tree lowj, tree highi, tree highj,
2429 vec<operand_entry *> *ops,
2430 struct range_entry *rangei,
2431 struct range_entry *rangej)
2433 tree tem1, tem2, mask;
2434 /* Check highi - lowi == highj - lowj. */
2435 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2436 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2437 return false;
2438 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2439 if (!tree_int_cst_equal (tem1, tem2))
2440 return false;
2441 /* Check popcount (lowj - lowi) == 1. */
2442 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2443 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2444 return false;
2445 if (!integer_pow2p (tem1))
2446 return false;
2448 type = unsigned_type_for (type);
2449 tem1 = fold_convert (type, tem1);
2450 tem2 = fold_convert (type, tem2);
2451 lowi = fold_convert (type, lowi);
2452 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2453 tem1 = fold_binary (MINUS_EXPR, type,
2454 fold_convert (type, rangei->exp), lowi);
2455 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2456 lowj = build_int_cst (type, 0);
2457 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2458 NULL, rangei->in_p, lowj, tem2,
2459 rangei->strict_overflow_p
2460 || rangej->strict_overflow_p))
2461 return true;
2462 return false;
2465 /* It does some common checks for function optimize_range_tests_xor and
2466 optimize_range_tests_diff.
2467 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2468 Else it calls optimize_range_tests_diff. */
2470 static bool
2471 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2472 bool optimize_xor, vec<operand_entry *> *ops,
2473 struct range_entry *ranges)
2475 int i, j;
2476 bool any_changes = false;
2477 for (i = first; i < length; i++)
2479 tree lowi, highi, lowj, highj, type, tem;
2481 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2482 continue;
2483 type = TREE_TYPE (ranges[i].exp);
2484 if (!INTEGRAL_TYPE_P (type))
2485 continue;
2486 lowi = ranges[i].low;
2487 if (lowi == NULL_TREE)
2488 lowi = TYPE_MIN_VALUE (type);
2489 highi = ranges[i].high;
2490 if (highi == NULL_TREE)
2491 continue;
2492 for (j = i + 1; j < length && j < i + 64; j++)
2494 bool changes;
2495 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2496 continue;
2497 lowj = ranges[j].low;
2498 if (lowj == NULL_TREE)
2499 continue;
2500 highj = ranges[j].high;
2501 if (highj == NULL_TREE)
2502 highj = TYPE_MAX_VALUE (type);
2503 /* Check lowj > highi. */
2504 tem = fold_binary (GT_EXPR, boolean_type_node,
2505 lowj, highi);
2506 if (tem == NULL_TREE || !integer_onep (tem))
2507 continue;
2508 if (optimize_xor)
2509 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2510 highi, highj, ops,
2511 ranges + i, ranges + j);
2512 else
2513 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2514 highi, highj, ops,
2515 ranges + i, ranges + j);
2516 if (changes)
2518 any_changes = true;
2519 break;
2523 return any_changes;
2526 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2527 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2528 EXP on success, NULL otherwise. */
2530 static tree
2531 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2532 wide_int *mask, tree *totallowp)
2534 tree tem = int_const_binop (MINUS_EXPR, high, low);
2535 if (tem == NULL_TREE
2536 || TREE_CODE (tem) != INTEGER_CST
2537 || TREE_OVERFLOW (tem)
2538 || tree_int_cst_sgn (tem) == -1
2539 || compare_tree_int (tem, prec) != -1)
2540 return NULL_TREE;
2542 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2543 *mask = wi::shifted_mask (0, max, false, prec);
2544 if (TREE_CODE (exp) == BIT_AND_EXPR
2545 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2547 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2548 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2549 if (wi::popcount (msk) == 1
2550 && wi::ltu_p (msk, prec - max))
2552 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2553 max += msk.to_uhwi ();
2554 exp = TREE_OPERAND (exp, 0);
2555 if (integer_zerop (low)
2556 && TREE_CODE (exp) == PLUS_EXPR
2557 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2559 tree ret = TREE_OPERAND (exp, 0);
2560 STRIP_NOPS (ret);
2561 widest_int bias
2562 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2563 TYPE_PRECISION (TREE_TYPE (low))));
2564 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2565 if (totallowp)
2567 *totallowp = tbias;
2568 return ret;
2570 else if (!tree_int_cst_lt (totallow, tbias))
2571 return NULL_TREE;
2572 bias = wi::to_widest (tbias);
2573 bias -= wi::to_widest (totallow);
2574 if (bias >= 0 && bias < prec - max)
2576 *mask = wi::lshift (*mask, bias);
2577 return ret;
2582 if (totallowp)
2583 return exp;
2584 if (!tree_int_cst_lt (totallow, low))
2585 return exp;
2586 tem = int_const_binop (MINUS_EXPR, low, totallow);
2587 if (tem == NULL_TREE
2588 || TREE_CODE (tem) != INTEGER_CST
2589 || TREE_OVERFLOW (tem)
2590 || compare_tree_int (tem, prec - max) == 1)
2591 return NULL_TREE;
2593 *mask = wi::lshift (*mask, wi::to_widest (tem));
2594 return exp;
2597 /* Attempt to optimize small range tests using bit test.
2598 E.g.
2599 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2600 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2601 has been by earlier optimizations optimized into:
2602 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2603 As all the 43 through 82 range is less than 64 numbers,
2604 for 64-bit word targets optimize that into:
2605 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2607 static bool
2608 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2609 vec<operand_entry *> *ops,
2610 struct range_entry *ranges)
2612 int i, j;
2613 bool any_changes = false;
2614 int prec = GET_MODE_BITSIZE (word_mode);
2615 auto_vec<struct range_entry *, 64> candidates;
2617 for (i = first; i < length - 2; i++)
2619 tree lowi, highi, lowj, highj, type;
2621 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2622 continue;
2623 type = TREE_TYPE (ranges[i].exp);
2624 if (!INTEGRAL_TYPE_P (type))
2625 continue;
2626 lowi = ranges[i].low;
2627 if (lowi == NULL_TREE)
2628 lowi = TYPE_MIN_VALUE (type);
2629 highi = ranges[i].high;
2630 if (highi == NULL_TREE)
2631 continue;
2632 wide_int mask;
2633 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2634 highi, &mask, &lowi);
2635 if (exp == NULL_TREE)
2636 continue;
2637 bool strict_overflow_p = ranges[i].strict_overflow_p;
2638 candidates.truncate (0);
2639 int end = MIN (i + 64, length);
2640 for (j = i + 1; j < end; j++)
2642 tree exp2;
2643 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2644 continue;
2645 if (ranges[j].exp == exp)
2647 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2649 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2650 if (exp2 == exp)
2652 else if (TREE_CODE (exp2) == PLUS_EXPR)
2654 exp2 = TREE_OPERAND (exp2, 0);
2655 STRIP_NOPS (exp2);
2656 if (exp2 != exp)
2657 continue;
2659 else
2660 continue;
2662 else
2663 continue;
2664 lowj = ranges[j].low;
2665 if (lowj == NULL_TREE)
2666 continue;
2667 highj = ranges[j].high;
2668 if (highj == NULL_TREE)
2669 highj = TYPE_MAX_VALUE (type);
2670 wide_int mask2;
2671 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2672 highj, &mask2, NULL);
2673 if (exp2 != exp)
2674 continue;
2675 mask |= mask2;
2676 strict_overflow_p |= ranges[j].strict_overflow_p;
2677 candidates.safe_push (&ranges[j]);
2680 /* If we need otherwise 3 or more comparisons, use a bit test. */
2681 if (candidates.length () >= 2)
2683 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2684 wi::to_widest (lowi)
2685 + prec - 1 - wi::clz (mask));
2686 operand_entry *oe = (*ops)[ranges[i].idx];
2687 tree op = oe->op;
2688 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2689 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2690 location_t loc = gimple_location (stmt);
2691 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2693 /* See if it isn't cheaper to pretend the minimum value of the
2694 range is 0, if maximum value is small enough.
2695 We can avoid then subtraction of the minimum value, but the
2696 mask constant could be perhaps more expensive. */
2697 if (compare_tree_int (lowi, 0) > 0
2698 && compare_tree_int (high, prec) < 0)
2700 int cost_diff;
2701 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2702 rtx reg = gen_raw_REG (word_mode, 10000);
2703 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2704 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2705 GEN_INT (-m)), speed_p);
2706 rtx r = immed_wide_int_const (mask, word_mode);
2707 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2708 word_mode, speed_p);
2709 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2710 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2711 word_mode, speed_p);
2712 if (cost_diff > 0)
2714 mask = wi::lshift (mask, m);
2715 lowi = build_zero_cst (TREE_TYPE (lowi));
2719 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2720 false, lowi, high);
2721 if (tem == NULL_TREE || is_gimple_val (tem))
2722 continue;
2723 tree etype = unsigned_type_for (TREE_TYPE (exp));
2724 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2725 fold_convert_loc (loc, etype, exp),
2726 fold_convert_loc (loc, etype, lowi));
2727 exp = fold_convert_loc (loc, integer_type_node, exp);
2728 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2729 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2730 build_int_cst (word_type, 1), exp);
2731 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2732 wide_int_to_tree (word_type, mask));
2733 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2734 build_zero_cst (word_type));
2735 if (is_gimple_val (exp))
2736 continue;
2738 /* The shift might have undefined behavior if TEM is true,
2739 but reassociate_bb isn't prepared to have basic blocks
2740 split when it is running. So, temporarily emit a code
2741 with BIT_IOR_EXPR instead of &&, and fix it up in
2742 branch_fixup. */
2743 gimple_seq seq;
2744 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2745 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2746 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2747 gimple_seq seq2;
2748 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2749 gimple_seq_add_seq_without_update (&seq, seq2);
2750 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2751 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2752 gimple *g = gimple_build_assign (make_ssa_name (optype),
2753 BIT_IOR_EXPR, tem, exp);
2754 gimple_set_location (g, loc);
2755 gimple_seq_add_stmt_without_update (&seq, g);
2756 exp = gimple_assign_lhs (g);
2757 tree val = build_zero_cst (optype);
2758 if (update_range_test (&ranges[i], NULL, candidates.address (),
2759 candidates.length (), opcode, ops, exp,
2760 seq, false, val, val, strict_overflow_p))
2762 any_changes = true;
2763 reassoc_branch_fixups.safe_push (tem);
2765 else
2766 gimple_seq_discard (seq);
2769 return any_changes;
2772 /* Optimize range tests, similarly how fold_range_test optimizes
2773 it on trees. The tree code for the binary
2774 operation between all the operands is OPCODE.
2775 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2776 maybe_optimize_range_tests for inter-bb range optimization.
2777 In that case if oe->op is NULL, oe->id is bb->index whose
2778 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2779 the actual opcode. */
2781 static bool
2782 optimize_range_tests (enum tree_code opcode,
2783 vec<operand_entry *> *ops)
2785 unsigned int length = ops->length (), i, j, first;
2786 operand_entry *oe;
2787 struct range_entry *ranges;
2788 bool any_changes = false;
2790 if (length == 1)
2791 return false;
2793 ranges = XNEWVEC (struct range_entry, length);
2794 for (i = 0; i < length; i++)
2796 oe = (*ops)[i];
2797 ranges[i].idx = i;
2798 init_range_entry (ranges + i, oe->op,
2799 oe->op
2800 ? NULL
2801 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2802 /* For | invert it now, we will invert it again before emitting
2803 the optimized expression. */
2804 if (opcode == BIT_IOR_EXPR
2805 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2806 ranges[i].in_p = !ranges[i].in_p;
2809 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2810 for (i = 0; i < length; i++)
2811 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2812 break;
2814 /* Try to merge ranges. */
2815 for (first = i; i < length; i++)
2817 tree low = ranges[i].low;
2818 tree high = ranges[i].high;
2819 int in_p = ranges[i].in_p;
2820 bool strict_overflow_p = ranges[i].strict_overflow_p;
2821 int update_fail_count = 0;
2823 for (j = i + 1; j < length; j++)
2825 if (ranges[i].exp != ranges[j].exp)
2826 break;
2827 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2828 ranges[j].in_p, ranges[j].low, ranges[j].high))
2829 break;
2830 strict_overflow_p |= ranges[j].strict_overflow_p;
2833 if (j == i + 1)
2834 continue;
2836 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2837 opcode, ops, ranges[i].exp, NULL, in_p,
2838 low, high, strict_overflow_p))
2840 i = j - 1;
2841 any_changes = true;
2843 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2844 while update_range_test would fail. */
2845 else if (update_fail_count == 64)
2846 i = j - 1;
2847 else
2848 ++update_fail_count;
2851 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2852 ops, ranges);
2854 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2855 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2856 ops, ranges);
2857 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2858 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2859 ops, ranges);
2861 if (any_changes && opcode != ERROR_MARK)
2863 j = 0;
2864 FOR_EACH_VEC_ELT (*ops, i, oe)
2866 if (oe->op == error_mark_node)
2867 continue;
2868 else if (i != j)
2869 (*ops)[j] = oe;
2870 j++;
2872 ops->truncate (j);
2875 XDELETEVEC (ranges);
2876 return any_changes;
2879 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
2880 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
2881 otherwise the comparison code. */
2883 static tree_code
2884 ovce_extract_ops (tree var, gassign **rets, bool *reti)
2886 if (TREE_CODE (var) != SSA_NAME)
2887 return ERROR_MARK;
2889 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
2890 if (stmt == NULL)
2891 return ERROR_MARK;
2893 /* ??? If we start creating more COND_EXPR, we could perform
2894 this same optimization with them. For now, simplify. */
2895 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
2896 return ERROR_MARK;
2898 tree cond = gimple_assign_rhs1 (stmt);
2899 tree_code cmp = TREE_CODE (cond);
2900 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
2901 return ERROR_MARK;
2903 /* ??? For now, allow only canonical true and false result vectors.
2904 We could expand this to other constants should the need arise,
2905 but at the moment we don't create them. */
2906 tree t = gimple_assign_rhs2 (stmt);
2907 tree f = gimple_assign_rhs3 (stmt);
2908 bool inv;
2909 if (integer_all_onesp (t))
2910 inv = false;
2911 else if (integer_all_onesp (f))
2913 cmp = invert_tree_comparison (cmp, false);
2914 inv = true;
2916 else
2917 return ERROR_MARK;
2918 if (!integer_zerop (f))
2919 return ERROR_MARK;
2921 /* Success! */
2922 if (rets)
2923 *rets = stmt;
2924 if (reti)
2925 *reti = inv;
2926 return cmp;
2929 /* Optimize the condition of VEC_COND_EXPRs which have been combined
2930 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
2932 static bool
2933 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
2935 unsigned int length = ops->length (), i, j;
2936 bool any_changes = false;
2938 if (length == 1)
2939 return false;
2941 for (i = 0; i < length; ++i)
2943 tree elt0 = (*ops)[i]->op;
2945 gassign *stmt0;
2946 bool invert;
2947 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
2948 if (cmp0 == ERROR_MARK)
2949 continue;
2951 for (j = i + 1; j < length; ++j)
2953 tree &elt1 = (*ops)[j]->op;
2955 gassign *stmt1;
2956 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
2957 if (cmp1 == ERROR_MARK)
2958 continue;
2960 tree cond0 = gimple_assign_rhs1 (stmt0);
2961 tree x0 = TREE_OPERAND (cond0, 0);
2962 tree y0 = TREE_OPERAND (cond0, 1);
2964 tree cond1 = gimple_assign_rhs1 (stmt1);
2965 tree x1 = TREE_OPERAND (cond1, 0);
2966 tree y1 = TREE_OPERAND (cond1, 1);
2968 tree comb;
2969 if (opcode == BIT_AND_EXPR)
2970 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2971 else if (opcode == BIT_IOR_EXPR)
2972 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2973 else
2974 gcc_unreachable ();
2975 if (comb == NULL)
2976 continue;
2978 /* Success! */
2979 if (dump_file && (dump_flags & TDF_DETAILS))
2981 fprintf (dump_file, "Transforming ");
2982 print_generic_expr (dump_file, cond0, 0);
2983 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
2984 print_generic_expr (dump_file, cond1, 0);
2985 fprintf (dump_file, " into ");
2986 print_generic_expr (dump_file, comb, 0);
2987 fputc ('\n', dump_file);
2990 gimple_assign_set_rhs1 (stmt0, comb);
2991 if (invert)
2992 std::swap (*gimple_assign_rhs2_ptr (stmt0),
2993 *gimple_assign_rhs3_ptr (stmt0));
2994 update_stmt (stmt0);
2996 elt1 = error_mark_node;
2997 any_changes = true;
3001 if (any_changes)
3003 operand_entry *oe;
3004 j = 0;
3005 FOR_EACH_VEC_ELT (*ops, i, oe)
3007 if (oe->op == error_mark_node)
3008 continue;
3009 else if (i != j)
3010 (*ops)[j] = oe;
3011 j++;
3013 ops->truncate (j);
3016 return any_changes;
3019 /* Return true if STMT is a cast like:
3020 <bb N>:
3022 _123 = (int) _234;
3024 <bb M>:
3025 # _345 = PHI <_123(N), 1(...), 1(...)>
3026 where _234 has bool type, _123 has single use and
3027 bb N has a single successor M. This is commonly used in
3028 the last block of a range test. */
3030 static bool
3031 final_range_test_p (gimple *stmt)
3033 basic_block bb, rhs_bb;
3034 edge e;
3035 tree lhs, rhs;
3036 use_operand_p use_p;
3037 gimple *use_stmt;
3039 if (!gimple_assign_cast_p (stmt))
3040 return false;
3041 bb = gimple_bb (stmt);
3042 if (!single_succ_p (bb))
3043 return false;
3044 e = single_succ_edge (bb);
3045 if (e->flags & EDGE_COMPLEX)
3046 return false;
3048 lhs = gimple_assign_lhs (stmt);
3049 rhs = gimple_assign_rhs1 (stmt);
3050 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3051 || TREE_CODE (rhs) != SSA_NAME
3052 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
3053 return false;
3055 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3056 if (!single_imm_use (lhs, &use_p, &use_stmt))
3057 return false;
3059 if (gimple_code (use_stmt) != GIMPLE_PHI
3060 || gimple_bb (use_stmt) != e->dest)
3061 return false;
3063 /* And that the rhs is defined in the same loop. */
3064 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
3065 if (rhs_bb == NULL
3066 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3067 return false;
3069 return true;
3072 /* Return true if BB is suitable basic block for inter-bb range test
3073 optimization. If BACKWARD is true, BB should be the only predecessor
3074 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3075 or compared with to find a common basic block to which all conditions
3076 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3077 be the only predecessor of BB. */
3079 static bool
3080 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3081 bool backward)
3083 edge_iterator ei, ei2;
3084 edge e, e2;
3085 gimple *stmt;
3086 gphi_iterator gsi;
3087 bool other_edge_seen = false;
3088 bool is_cond;
3090 if (test_bb == bb)
3091 return false;
3092 /* Check last stmt first. */
3093 stmt = last_stmt (bb);
3094 if (stmt == NULL
3095 || (gimple_code (stmt) != GIMPLE_COND
3096 && (backward || !final_range_test_p (stmt)))
3097 || gimple_visited_p (stmt)
3098 || stmt_could_throw_p (stmt)
3099 || *other_bb == bb)
3100 return false;
3101 is_cond = gimple_code (stmt) == GIMPLE_COND;
3102 if (is_cond)
3104 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3105 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3106 to *OTHER_BB (if not set yet, try to find it out). */
3107 if (EDGE_COUNT (bb->succs) != 2)
3108 return false;
3109 FOR_EACH_EDGE (e, ei, bb->succs)
3111 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3112 return false;
3113 if (e->dest == test_bb)
3115 if (backward)
3116 continue;
3117 else
3118 return false;
3120 if (e->dest == bb)
3121 return false;
3122 if (*other_bb == NULL)
3124 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3125 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3126 return false;
3127 else if (e->dest == e2->dest)
3128 *other_bb = e->dest;
3129 if (*other_bb == NULL)
3130 return false;
3132 if (e->dest == *other_bb)
3133 other_edge_seen = true;
3134 else if (backward)
3135 return false;
3137 if (*other_bb == NULL || !other_edge_seen)
3138 return false;
3140 else if (single_succ (bb) != *other_bb)
3141 return false;
3143 /* Now check all PHIs of *OTHER_BB. */
3144 e = find_edge (bb, *other_bb);
3145 e2 = find_edge (test_bb, *other_bb);
3146 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3148 gphi *phi = gsi.phi ();
3149 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3150 corresponding to BB and TEST_BB predecessor must be the same. */
3151 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3152 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3154 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3155 one of the PHIs should have the lhs of the last stmt in
3156 that block as PHI arg and that PHI should have 0 or 1
3157 corresponding to it in all other range test basic blocks
3158 considered. */
3159 if (!is_cond)
3161 if (gimple_phi_arg_def (phi, e->dest_idx)
3162 == gimple_assign_lhs (stmt)
3163 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3164 || integer_onep (gimple_phi_arg_def (phi,
3165 e2->dest_idx))))
3166 continue;
3168 else
3170 gimple *test_last = last_stmt (test_bb);
3171 if (gimple_code (test_last) != GIMPLE_COND
3172 && gimple_phi_arg_def (phi, e2->dest_idx)
3173 == gimple_assign_lhs (test_last)
3174 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3175 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3176 continue;
3179 return false;
3182 return true;
3185 /* Return true if BB doesn't have side-effects that would disallow
3186 range test optimization, all SSA_NAMEs set in the bb are consumed
3187 in the bb and there are no PHIs. */
3189 static bool
3190 no_side_effect_bb (basic_block bb)
3192 gimple_stmt_iterator gsi;
3193 gimple *last;
3195 if (!gimple_seq_empty_p (phi_nodes (bb)))
3196 return false;
3197 last = last_stmt (bb);
3198 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3200 gimple *stmt = gsi_stmt (gsi);
3201 tree lhs;
3202 imm_use_iterator imm_iter;
3203 use_operand_p use_p;
3205 if (is_gimple_debug (stmt))
3206 continue;
3207 if (gimple_has_side_effects (stmt))
3208 return false;
3209 if (stmt == last)
3210 return true;
3211 if (!is_gimple_assign (stmt))
3212 return false;
3213 lhs = gimple_assign_lhs (stmt);
3214 if (TREE_CODE (lhs) != SSA_NAME)
3215 return false;
3216 if (gimple_assign_rhs_could_trap_p (stmt))
3217 return false;
3218 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3220 gimple *use_stmt = USE_STMT (use_p);
3221 if (is_gimple_debug (use_stmt))
3222 continue;
3223 if (gimple_bb (use_stmt) != bb)
3224 return false;
3227 return false;
3230 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3231 return true and fill in *OPS recursively. */
3233 static bool
3234 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3235 struct loop *loop)
3237 gimple *stmt = SSA_NAME_DEF_STMT (var);
3238 tree rhs[2];
3239 int i;
3241 if (!is_reassociable_op (stmt, code, loop))
3242 return false;
3244 rhs[0] = gimple_assign_rhs1 (stmt);
3245 rhs[1] = gimple_assign_rhs2 (stmt);
3246 gimple_set_visited (stmt, true);
3247 for (i = 0; i < 2; i++)
3248 if (TREE_CODE (rhs[i]) == SSA_NAME
3249 && !get_ops (rhs[i], code, ops, loop)
3250 && has_single_use (rhs[i]))
3252 operand_entry *oe = operand_entry_pool.allocate ();
3254 oe->op = rhs[i];
3255 oe->rank = code;
3256 oe->id = 0;
3257 oe->count = 1;
3258 oe->stmt_to_insert = NULL;
3259 ops->safe_push (oe);
3261 return true;
3264 /* Find the ops that were added by get_ops starting from VAR, see if
3265 they were changed during update_range_test and if yes, create new
3266 stmts. */
3268 static tree
3269 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3270 unsigned int *pidx, struct loop *loop)
3272 gimple *stmt = SSA_NAME_DEF_STMT (var);
3273 tree rhs[4];
3274 int i;
3276 if (!is_reassociable_op (stmt, code, loop))
3277 return NULL;
3279 rhs[0] = gimple_assign_rhs1 (stmt);
3280 rhs[1] = gimple_assign_rhs2 (stmt);
3281 rhs[2] = rhs[0];
3282 rhs[3] = rhs[1];
3283 for (i = 0; i < 2; i++)
3284 if (TREE_CODE (rhs[i]) == SSA_NAME)
3286 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3287 if (rhs[2 + i] == NULL_TREE)
3289 if (has_single_use (rhs[i]))
3290 rhs[2 + i] = ops[(*pidx)++]->op;
3291 else
3292 rhs[2 + i] = rhs[i];
3295 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3296 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3298 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3299 var = make_ssa_name (TREE_TYPE (var));
3300 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3301 rhs[2], rhs[3]);
3302 gimple_set_uid (g, gimple_uid (stmt));
3303 gimple_set_visited (g, true);
3304 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3306 return var;
3309 /* Structure to track the initial value passed to get_ops and
3310 the range in the ops vector for each basic block. */
3312 struct inter_bb_range_test_entry
3314 tree op;
3315 unsigned int first_idx, last_idx;
3318 /* Inter-bb range test optimization.
3320 Returns TRUE if a gimple conditional is optimized to a true/false,
3321 otherwise return FALSE.
3323 This indicates to the caller that it should run a CFG cleanup pass
3324 once reassociation is completed. */
3326 static bool
3327 maybe_optimize_range_tests (gimple *stmt)
3329 basic_block first_bb = gimple_bb (stmt);
3330 basic_block last_bb = first_bb;
3331 basic_block other_bb = NULL;
3332 basic_block bb;
3333 edge_iterator ei;
3334 edge e;
3335 auto_vec<operand_entry *> ops;
3336 auto_vec<inter_bb_range_test_entry> bbinfo;
3337 bool any_changes = false;
3338 bool cfg_cleanup_needed = false;
3340 /* Consider only basic blocks that end with GIMPLE_COND or
3341 a cast statement satisfying final_range_test_p. All
3342 but the last bb in the first_bb .. last_bb range
3343 should end with GIMPLE_COND. */
3344 if (gimple_code (stmt) == GIMPLE_COND)
3346 if (EDGE_COUNT (first_bb->succs) != 2)
3347 return cfg_cleanup_needed;
3349 else if (final_range_test_p (stmt))
3350 other_bb = single_succ (first_bb);
3351 else
3352 return cfg_cleanup_needed;
3354 if (stmt_could_throw_p (stmt))
3355 return cfg_cleanup_needed;
3357 /* As relative ordering of post-dominator sons isn't fixed,
3358 maybe_optimize_range_tests can be called first on any
3359 bb in the range we want to optimize. So, start searching
3360 backwards, if first_bb can be set to a predecessor. */
3361 while (single_pred_p (first_bb))
3363 basic_block pred_bb = single_pred (first_bb);
3364 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3365 break;
3366 if (!no_side_effect_bb (first_bb))
3367 break;
3368 first_bb = pred_bb;
3370 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3371 Before starting forward search in last_bb successors, find
3372 out the other_bb. */
3373 if (first_bb == last_bb)
3375 other_bb = NULL;
3376 /* As non-GIMPLE_COND last stmt always terminates the range,
3377 if forward search didn't discover anything, just give up. */
3378 if (gimple_code (stmt) != GIMPLE_COND)
3379 return cfg_cleanup_needed;
3380 /* Look at both successors. Either it ends with a GIMPLE_COND
3381 and satisfies suitable_cond_bb, or ends with a cast and
3382 other_bb is that cast's successor. */
3383 FOR_EACH_EDGE (e, ei, first_bb->succs)
3384 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3385 || e->dest == first_bb)
3386 return cfg_cleanup_needed;
3387 else if (single_pred_p (e->dest))
3389 stmt = last_stmt (e->dest);
3390 if (stmt
3391 && gimple_code (stmt) == GIMPLE_COND
3392 && EDGE_COUNT (e->dest->succs) == 2)
3394 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3395 break;
3396 else
3397 other_bb = NULL;
3399 else if (stmt
3400 && final_range_test_p (stmt)
3401 && find_edge (first_bb, single_succ (e->dest)))
3403 other_bb = single_succ (e->dest);
3404 if (other_bb == first_bb)
3405 other_bb = NULL;
3408 if (other_bb == NULL)
3409 return cfg_cleanup_needed;
3411 /* Now do the forward search, moving last_bb to successor bbs
3412 that aren't other_bb. */
3413 while (EDGE_COUNT (last_bb->succs) == 2)
3415 FOR_EACH_EDGE (e, ei, last_bb->succs)
3416 if (e->dest != other_bb)
3417 break;
3418 if (e == NULL)
3419 break;
3420 if (!single_pred_p (e->dest))
3421 break;
3422 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3423 break;
3424 if (!no_side_effect_bb (e->dest))
3425 break;
3426 last_bb = e->dest;
3428 if (first_bb == last_bb)
3429 return cfg_cleanup_needed;
3430 /* Here basic blocks first_bb through last_bb's predecessor
3431 end with GIMPLE_COND, all of them have one of the edges to
3432 other_bb and another to another block in the range,
3433 all blocks except first_bb don't have side-effects and
3434 last_bb ends with either GIMPLE_COND, or cast satisfying
3435 final_range_test_p. */
3436 for (bb = last_bb; ; bb = single_pred (bb))
3438 enum tree_code code;
3439 tree lhs, rhs;
3440 inter_bb_range_test_entry bb_ent;
3442 bb_ent.op = NULL_TREE;
3443 bb_ent.first_idx = ops.length ();
3444 bb_ent.last_idx = bb_ent.first_idx;
3445 e = find_edge (bb, other_bb);
3446 stmt = last_stmt (bb);
3447 gimple_set_visited (stmt, true);
3448 if (gimple_code (stmt) != GIMPLE_COND)
3450 use_operand_p use_p;
3451 gimple *phi;
3452 edge e2;
3453 unsigned int d;
3455 lhs = gimple_assign_lhs (stmt);
3456 rhs = gimple_assign_rhs1 (stmt);
3457 gcc_assert (bb == last_bb);
3459 /* stmt is
3460 _123 = (int) _234;
3462 followed by:
3463 <bb M>:
3464 # _345 = PHI <_123(N), 1(...), 1(...)>
3466 or 0 instead of 1. If it is 0, the _234
3467 range test is anded together with all the
3468 other range tests, if it is 1, it is ored with
3469 them. */
3470 single_imm_use (lhs, &use_p, &phi);
3471 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3472 e2 = find_edge (first_bb, other_bb);
3473 d = e2->dest_idx;
3474 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3475 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3476 code = BIT_AND_EXPR;
3477 else
3479 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3480 code = BIT_IOR_EXPR;
3483 /* If _234 SSA_NAME_DEF_STMT is
3484 _234 = _567 | _789;
3485 (or &, corresponding to 1/0 in the phi arguments,
3486 push into ops the individual range test arguments
3487 of the bitwise or resp. and, recursively. */
3488 if (!get_ops (rhs, code, &ops,
3489 loop_containing_stmt (stmt))
3490 && has_single_use (rhs))
3492 /* Otherwise, push the _234 range test itself. */
3493 operand_entry *oe = operand_entry_pool.allocate ();
3495 oe->op = rhs;
3496 oe->rank = code;
3497 oe->id = 0;
3498 oe->count = 1;
3499 oe->stmt_to_insert = NULL;
3500 ops.safe_push (oe);
3501 bb_ent.last_idx++;
3503 else
3504 bb_ent.last_idx = ops.length ();
3505 bb_ent.op = rhs;
3506 bbinfo.safe_push (bb_ent);
3507 continue;
3509 /* Otherwise stmt is GIMPLE_COND. */
3510 code = gimple_cond_code (stmt);
3511 lhs = gimple_cond_lhs (stmt);
3512 rhs = gimple_cond_rhs (stmt);
3513 if (TREE_CODE (lhs) == SSA_NAME
3514 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3515 && ((code != EQ_EXPR && code != NE_EXPR)
3516 || rhs != boolean_false_node
3517 /* Either push into ops the individual bitwise
3518 or resp. and operands, depending on which
3519 edge is other_bb. */
3520 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3521 ^ (code == EQ_EXPR))
3522 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3523 loop_containing_stmt (stmt))))
3525 /* Or push the GIMPLE_COND stmt itself. */
3526 operand_entry *oe = operand_entry_pool.allocate ();
3528 oe->op = NULL;
3529 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3530 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3531 /* oe->op = NULL signs that there is no SSA_NAME
3532 for the range test, and oe->id instead is the
3533 basic block number, at which's end the GIMPLE_COND
3534 is. */
3535 oe->id = bb->index;
3536 oe->count = 1;
3537 oe->stmt_to_insert = NULL;
3538 ops.safe_push (oe);
3539 bb_ent.op = NULL;
3540 bb_ent.last_idx++;
3542 else if (ops.length () > bb_ent.first_idx)
3544 bb_ent.op = lhs;
3545 bb_ent.last_idx = ops.length ();
3547 bbinfo.safe_push (bb_ent);
3548 if (bb == first_bb)
3549 break;
3551 if (ops.length () > 1)
3552 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3553 if (any_changes)
3555 unsigned int idx, max_idx = 0;
3556 /* update_ops relies on has_single_use predicates returning the
3557 same values as it did during get_ops earlier. Additionally it
3558 never removes statements, only adds new ones and it should walk
3559 from the single imm use and check the predicate already before
3560 making those changes.
3561 On the other side, the handling of GIMPLE_COND directly can turn
3562 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3563 it needs to be done in a separate loop afterwards. */
3564 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3566 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3567 && bbinfo[idx].op != NULL_TREE)
3569 tree new_op;
3571 max_idx = idx;
3572 stmt = last_stmt (bb);
3573 new_op = update_ops (bbinfo[idx].op,
3574 (enum tree_code)
3575 ops[bbinfo[idx].first_idx]->rank,
3576 ops, &bbinfo[idx].first_idx,
3577 loop_containing_stmt (stmt));
3578 if (new_op == NULL_TREE)
3580 gcc_assert (bb == last_bb);
3581 new_op = ops[bbinfo[idx].first_idx++]->op;
3583 if (bbinfo[idx].op != new_op)
3585 imm_use_iterator iter;
3586 use_operand_p use_p;
3587 gimple *use_stmt, *cast_stmt = NULL;
3589 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3590 if (is_gimple_debug (use_stmt))
3591 continue;
3592 else if (gimple_code (use_stmt) == GIMPLE_COND
3593 || gimple_code (use_stmt) == GIMPLE_PHI)
3594 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3595 SET_USE (use_p, new_op);
3596 else if (gimple_assign_cast_p (use_stmt))
3597 cast_stmt = use_stmt;
3598 else
3599 gcc_unreachable ();
3600 if (cast_stmt)
3602 gcc_assert (bb == last_bb);
3603 tree lhs = gimple_assign_lhs (cast_stmt);
3604 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3605 enum tree_code rhs_code
3606 = gimple_assign_rhs_code (cast_stmt);
3607 gassign *g;
3608 if (is_gimple_min_invariant (new_op))
3610 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3611 g = gimple_build_assign (new_lhs, new_op);
3613 else
3614 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3615 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3616 gimple_set_uid (g, gimple_uid (cast_stmt));
3617 gimple_set_visited (g, true);
3618 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3619 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3620 if (is_gimple_debug (use_stmt))
3621 continue;
3622 else if (gimple_code (use_stmt) == GIMPLE_COND
3623 || gimple_code (use_stmt) == GIMPLE_PHI)
3624 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3625 SET_USE (use_p, new_lhs);
3626 else
3627 gcc_unreachable ();
3631 if (bb == first_bb)
3632 break;
3634 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3636 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3637 && bbinfo[idx].op == NULL_TREE
3638 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3640 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3642 if (idx > max_idx)
3643 max_idx = idx;
3645 /* If we collapse the conditional to a true/false
3646 condition, then bubble that knowledge up to our caller. */
3647 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3649 gimple_cond_make_false (cond_stmt);
3650 cfg_cleanup_needed = true;
3652 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3654 gimple_cond_make_true (cond_stmt);
3655 cfg_cleanup_needed = true;
3657 else
3659 gimple_cond_set_code (cond_stmt, NE_EXPR);
3660 gimple_cond_set_lhs (cond_stmt,
3661 ops[bbinfo[idx].first_idx]->op);
3662 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3664 update_stmt (cond_stmt);
3666 if (bb == first_bb)
3667 break;
3670 /* The above changes could result in basic blocks after the first
3671 modified one, up to and including last_bb, to be executed even if
3672 they would not be in the original program. If the value ranges of
3673 assignment lhs' in those bbs were dependent on the conditions
3674 guarding those basic blocks which now can change, the VRs might
3675 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
3676 are only used within the same bb, it should be not a big deal if
3677 we just reset all the VRs in those bbs. See PR68671. */
3678 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
3679 reset_flow_sensitive_info_in_bb (bb);
3681 return cfg_cleanup_needed;
3684 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3685 of STMT in it's operands. This is also known as a "destructive
3686 update" operation. */
3688 static bool
3689 is_phi_for_stmt (gimple *stmt, tree operand)
3691 gimple *def_stmt;
3692 gphi *def_phi;
3693 tree lhs;
3694 use_operand_p arg_p;
3695 ssa_op_iter i;
3697 if (TREE_CODE (operand) != SSA_NAME)
3698 return false;
3700 lhs = gimple_assign_lhs (stmt);
3702 def_stmt = SSA_NAME_DEF_STMT (operand);
3703 def_phi = dyn_cast <gphi *> (def_stmt);
3704 if (!def_phi)
3705 return false;
3707 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3708 if (lhs == USE_FROM_PTR (arg_p))
3709 return true;
3710 return false;
3713 /* Remove def stmt of VAR if VAR has zero uses and recurse
3714 on rhs1 operand if so. */
3716 static void
3717 remove_visited_stmt_chain (tree var)
3719 gimple *stmt;
3720 gimple_stmt_iterator gsi;
3722 while (1)
3724 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3725 return;
3726 stmt = SSA_NAME_DEF_STMT (var);
3727 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3729 var = gimple_assign_rhs1 (stmt);
3730 gsi = gsi_for_stmt (stmt);
3731 reassoc_remove_stmt (&gsi);
3732 release_defs (stmt);
3734 else
3735 return;
3739 /* This function checks three consequtive operands in
3740 passed operands vector OPS starting from OPINDEX and
3741 swaps two operands if it is profitable for binary operation
3742 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3744 We pair ops with the same rank if possible.
3746 The alternative we try is to see if STMT is a destructive
3747 update style statement, which is like:
3748 b = phi (a, ...)
3749 a = c + b;
3750 In that case, we want to use the destructive update form to
3751 expose the possible vectorizer sum reduction opportunity.
3752 In that case, the third operand will be the phi node. This
3753 check is not performed if STMT is null.
3755 We could, of course, try to be better as noted above, and do a
3756 lot of work to try to find these opportunities in >3 operand
3757 cases, but it is unlikely to be worth it. */
3759 static void
3760 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3761 unsigned int opindex, gimple *stmt)
3763 operand_entry *oe1, *oe2, *oe3;
3765 oe1 = ops[opindex];
3766 oe2 = ops[opindex + 1];
3767 oe3 = ops[opindex + 2];
3769 if ((oe1->rank == oe2->rank
3770 && oe2->rank != oe3->rank)
3771 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3772 && !is_phi_for_stmt (stmt, oe1->op)
3773 && !is_phi_for_stmt (stmt, oe2->op)))
3774 std::swap (*oe1, *oe3);
3775 else if ((oe1->rank == oe3->rank
3776 && oe2->rank != oe3->rank)
3777 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3778 && !is_phi_for_stmt (stmt, oe1->op)
3779 && !is_phi_for_stmt (stmt, oe3->op)))
3780 std::swap (*oe1, *oe2);
3783 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3784 two definitions, otherwise return STMT. */
3786 static inline gimple *
3787 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3789 if (TREE_CODE (rhs1) == SSA_NAME
3790 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3791 stmt = SSA_NAME_DEF_STMT (rhs1);
3792 if (TREE_CODE (rhs2) == SSA_NAME
3793 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3794 stmt = SSA_NAME_DEF_STMT (rhs2);
3795 return stmt;
3798 /* If the stmt that defines operand has to be inserted, insert it
3799 before the use. */
3800 static void
3801 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
3803 gcc_assert (is_gimple_assign (stmt_to_insert));
3804 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
3805 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
3806 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
3807 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
3808 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
3810 /* If the insert point is not stmt, then insert_point would be
3811 the point where operand rhs1 or rhs2 is defined. In this case,
3812 stmt_to_insert has to be inserted afterwards. This would
3813 only happen when the stmt insertion point is flexible. */
3814 if (stmt == insert_point)
3815 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
3816 else
3817 insert_stmt_after (stmt_to_insert, insert_point);
3821 /* Recursively rewrite our linearized statements so that the operators
3822 match those in OPS[OPINDEX], putting the computation in rank
3823 order. Return new lhs. */
3825 static tree
3826 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3827 vec<operand_entry *> ops, bool changed)
3829 tree rhs1 = gimple_assign_rhs1 (stmt);
3830 tree rhs2 = gimple_assign_rhs2 (stmt);
3831 tree lhs = gimple_assign_lhs (stmt);
3832 operand_entry *oe;
3834 /* The final recursion case for this function is that you have
3835 exactly two operations left.
3836 If we had exactly one op in the entire list to start with, we
3837 would have never called this function, and the tail recursion
3838 rewrites them one at a time. */
3839 if (opindex + 2 == ops.length ())
3841 operand_entry *oe1, *oe2;
3843 oe1 = ops[opindex];
3844 oe2 = ops[opindex + 1];
3846 if (rhs1 != oe1->op || rhs2 != oe2->op)
3848 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3849 unsigned int uid = gimple_uid (stmt);
3851 if (dump_file && (dump_flags & TDF_DETAILS))
3853 fprintf (dump_file, "Transforming ");
3854 print_gimple_stmt (dump_file, stmt, 0, 0);
3857 /* If the stmt that defines operand has to be inserted, insert it
3858 before the use. */
3859 if (oe1->stmt_to_insert)
3860 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
3861 if (oe2->stmt_to_insert)
3862 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
3863 /* Even when changed is false, reassociation could have e.g. removed
3864 some redundant operations, so unless we are just swapping the
3865 arguments or unless there is no change at all (then we just
3866 return lhs), force creation of a new SSA_NAME. */
3867 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3869 gimple *insert_point
3870 = find_insert_point (stmt, oe1->op, oe2->op);
3871 lhs = make_ssa_name (TREE_TYPE (lhs));
3872 stmt
3873 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3874 oe1->op, oe2->op);
3875 gimple_set_uid (stmt, uid);
3876 gimple_set_visited (stmt, true);
3877 if (insert_point == gsi_stmt (gsi))
3878 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3879 else
3880 insert_stmt_after (stmt, insert_point);
3882 else
3884 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3885 == stmt);
3886 gimple_assign_set_rhs1 (stmt, oe1->op);
3887 gimple_assign_set_rhs2 (stmt, oe2->op);
3888 update_stmt (stmt);
3891 if (rhs1 != oe1->op && rhs1 != oe2->op)
3892 remove_visited_stmt_chain (rhs1);
3894 if (dump_file && (dump_flags & TDF_DETAILS))
3896 fprintf (dump_file, " into ");
3897 print_gimple_stmt (dump_file, stmt, 0, 0);
3900 return lhs;
3903 /* If we hit here, we should have 3 or more ops left. */
3904 gcc_assert (opindex + 2 < ops.length ());
3906 /* Rewrite the next operator. */
3907 oe = ops[opindex];
3909 /* If the stmt that defines operand has to be inserted, insert it
3910 before the use. */
3911 if (oe->stmt_to_insert)
3912 insert_stmt_before_use (stmt, oe->stmt_to_insert);
3914 /* Recurse on the LHS of the binary operator, which is guaranteed to
3915 be the non-leaf side. */
3916 tree new_rhs1
3917 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3918 changed || oe->op != rhs2);
3920 if (oe->op != rhs2 || new_rhs1 != rhs1)
3922 if (dump_file && (dump_flags & TDF_DETAILS))
3924 fprintf (dump_file, "Transforming ");
3925 print_gimple_stmt (dump_file, stmt, 0, 0);
3928 /* If changed is false, this is either opindex == 0
3929 or all outer rhs2's were equal to corresponding oe->op,
3930 and powi_result is NULL.
3931 That means lhs is equivalent before and after reassociation.
3932 Otherwise ensure the old lhs SSA_NAME is not reused and
3933 create a new stmt as well, so that any debug stmts will be
3934 properly adjusted. */
3935 if (changed)
3937 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3938 unsigned int uid = gimple_uid (stmt);
3939 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3941 lhs = make_ssa_name (TREE_TYPE (lhs));
3942 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3943 new_rhs1, oe->op);
3944 gimple_set_uid (stmt, uid);
3945 gimple_set_visited (stmt, true);
3946 if (insert_point == gsi_stmt (gsi))
3947 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3948 else
3949 insert_stmt_after (stmt, insert_point);
3951 else
3953 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3954 == stmt);
3955 gimple_assign_set_rhs1 (stmt, new_rhs1);
3956 gimple_assign_set_rhs2 (stmt, oe->op);
3957 update_stmt (stmt);
3960 if (dump_file && (dump_flags & TDF_DETAILS))
3962 fprintf (dump_file, " into ");
3963 print_gimple_stmt (dump_file, stmt, 0, 0);
3966 return lhs;
3969 /* Find out how many cycles we need to compute statements chain.
3970 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3971 maximum number of independent statements we may execute per cycle. */
3973 static int
3974 get_required_cycles (int ops_num, int cpu_width)
3976 int res;
3977 int elog;
3978 unsigned int rest;
3980 /* While we have more than 2 * cpu_width operands
3981 we may reduce number of operands by cpu_width
3982 per cycle. */
3983 res = ops_num / (2 * cpu_width);
3985 /* Remained operands count may be reduced twice per cycle
3986 until we have only one operand. */
3987 rest = (unsigned)(ops_num - res * cpu_width);
3988 elog = exact_log2 (rest);
3989 if (elog >= 0)
3990 res += elog;
3991 else
3992 res += floor_log2 (rest) + 1;
3994 return res;
3997 /* Returns an optimal number of registers to use for computation of
3998 given statements. */
4000 static int
4001 get_reassociation_width (int ops_num, enum tree_code opc,
4002 machine_mode mode)
4004 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4005 int width;
4006 int width_min;
4007 int cycles_best;
4009 if (param_width > 0)
4010 width = param_width;
4011 else
4012 width = targetm.sched.reassociation_width (opc, mode);
4014 if (width == 1)
4015 return width;
4017 /* Get the minimal time required for sequence computation. */
4018 cycles_best = get_required_cycles (ops_num, width);
4020 /* Check if we may use less width and still compute sequence for
4021 the same time. It will allow us to reduce registers usage.
4022 get_required_cycles is monotonically increasing with lower width
4023 so we can perform a binary search for the minimal width that still
4024 results in the optimal cycle count. */
4025 width_min = 1;
4026 while (width > width_min)
4028 int width_mid = (width + width_min) / 2;
4030 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4031 width = width_mid;
4032 else if (width_min < width_mid)
4033 width_min = width_mid;
4034 else
4035 break;
4038 return width;
4041 /* Recursively rewrite our linearized statements so that the operators
4042 match those in OPS[OPINDEX], putting the computation in rank
4043 order and trying to allow operations to be executed in
4044 parallel. */
4046 static void
4047 rewrite_expr_tree_parallel (gassign *stmt, int width,
4048 vec<operand_entry *> ops)
4050 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4051 int op_num = ops.length ();
4052 int stmt_num = op_num - 1;
4053 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4054 int op_index = op_num - 1;
4055 int stmt_index = 0;
4056 int ready_stmts_end = 0;
4057 int i = 0;
4058 gimple *stmt1 = NULL, *stmt2 = NULL;
4059 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4061 /* We start expression rewriting from the top statements.
4062 So, in this loop we create a full list of statements
4063 we will work with. */
4064 stmts[stmt_num - 1] = stmt;
4065 for (i = stmt_num - 2; i >= 0; i--)
4066 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4068 for (i = 0; i < stmt_num; i++)
4070 tree op1, op2;
4072 /* Determine whether we should use results of
4073 already handled statements or not. */
4074 if (ready_stmts_end == 0
4075 && (i - stmt_index >= width || op_index < 1))
4076 ready_stmts_end = i;
4078 /* Now we choose operands for the next statement. Non zero
4079 value in ready_stmts_end means here that we should use
4080 the result of already generated statements as new operand. */
4081 if (ready_stmts_end > 0)
4083 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4084 if (ready_stmts_end > stmt_index)
4085 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4086 else if (op_index >= 0)
4088 operand_entry *oe = ops[op_index--];
4089 stmt2 = oe->stmt_to_insert;
4090 op2 = oe->op;
4092 else
4094 gcc_assert (stmt_index < i);
4095 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4098 if (stmt_index >= ready_stmts_end)
4099 ready_stmts_end = 0;
4101 else
4103 if (op_index > 1)
4104 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4105 operand_entry *oe2 = ops[op_index--];
4106 operand_entry *oe1 = ops[op_index--];
4107 op2 = oe2->op;
4108 stmt2 = oe2->stmt_to_insert;
4109 op1 = oe1->op;
4110 stmt1 = oe1->stmt_to_insert;
4113 /* If we emit the last statement then we should put
4114 operands into the last statement. It will also
4115 break the loop. */
4116 if (op_index < 0 && stmt_index == i)
4117 i = stmt_num - 1;
4119 if (dump_file && (dump_flags & TDF_DETAILS))
4121 fprintf (dump_file, "Transforming ");
4122 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4125 /* If the stmt that defines operand has to be inserted, insert it
4126 before the use. */
4127 if (stmt1)
4128 insert_stmt_before_use (stmts[i], stmt1);
4129 if (stmt2)
4130 insert_stmt_before_use (stmts[i], stmt2);
4131 stmt1 = stmt2 = NULL;
4133 /* We keep original statement only for the last one. All
4134 others are recreated. */
4135 if (i == stmt_num - 1)
4137 gimple_assign_set_rhs1 (stmts[i], op1);
4138 gimple_assign_set_rhs2 (stmts[i], op2);
4139 update_stmt (stmts[i]);
4141 else
4143 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4145 if (dump_file && (dump_flags & TDF_DETAILS))
4147 fprintf (dump_file, " into ");
4148 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4152 remove_visited_stmt_chain (last_rhs1);
4155 /* Transform STMT, which is really (A +B) + (C + D) into the left
4156 linear form, ((A+B)+C)+D.
4157 Recurse on D if necessary. */
4159 static void
4160 linearize_expr (gimple *stmt)
4162 gimple_stmt_iterator gsi;
4163 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4164 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4165 gimple *oldbinrhs = binrhs;
4166 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4167 gimple *newbinrhs = NULL;
4168 struct loop *loop = loop_containing_stmt (stmt);
4169 tree lhs = gimple_assign_lhs (stmt);
4171 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4172 && is_reassociable_op (binrhs, rhscode, loop));
4174 gsi = gsi_for_stmt (stmt);
4176 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4177 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4178 gimple_assign_rhs_code (binrhs),
4179 gimple_assign_lhs (binlhs),
4180 gimple_assign_rhs2 (binrhs));
4181 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4182 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4183 gimple_set_uid (binrhs, gimple_uid (stmt));
4185 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4186 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4188 if (dump_file && (dump_flags & TDF_DETAILS))
4190 fprintf (dump_file, "Linearized: ");
4191 print_gimple_stmt (dump_file, stmt, 0, 0);
4194 reassociate_stats.linearized++;
4195 update_stmt (stmt);
4197 gsi = gsi_for_stmt (oldbinrhs);
4198 reassoc_remove_stmt (&gsi);
4199 release_defs (oldbinrhs);
4201 gimple_set_visited (stmt, true);
4202 gimple_set_visited (binlhs, true);
4203 gimple_set_visited (binrhs, true);
4205 /* Tail recurse on the new rhs if it still needs reassociation. */
4206 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4207 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4208 want to change the algorithm while converting to tuples. */
4209 linearize_expr (stmt);
4212 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4213 it. Otherwise, return NULL. */
4215 static gimple *
4216 get_single_immediate_use (tree lhs)
4218 use_operand_p immuse;
4219 gimple *immusestmt;
4221 if (TREE_CODE (lhs) == SSA_NAME
4222 && single_imm_use (lhs, &immuse, &immusestmt)
4223 && is_gimple_assign (immusestmt))
4224 return immusestmt;
4226 return NULL;
4229 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4230 representing the negated value. Insertions of any necessary
4231 instructions go before GSI.
4232 This function is recursive in that, if you hand it "a_5" as the
4233 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4234 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4236 static tree
4237 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4239 gimple *negatedefstmt = NULL;
4240 tree resultofnegate;
4241 gimple_stmt_iterator gsi;
4242 unsigned int uid;
4244 /* If we are trying to negate a name, defined by an add, negate the
4245 add operands instead. */
4246 if (TREE_CODE (tonegate) == SSA_NAME)
4247 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4248 if (TREE_CODE (tonegate) == SSA_NAME
4249 && is_gimple_assign (negatedefstmt)
4250 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4251 && has_single_use (gimple_assign_lhs (negatedefstmt))
4252 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4254 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4255 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4256 tree lhs = gimple_assign_lhs (negatedefstmt);
4257 gimple *g;
4259 gsi = gsi_for_stmt (negatedefstmt);
4260 rhs1 = negate_value (rhs1, &gsi);
4262 gsi = gsi_for_stmt (negatedefstmt);
4263 rhs2 = negate_value (rhs2, &gsi);
4265 gsi = gsi_for_stmt (negatedefstmt);
4266 lhs = make_ssa_name (TREE_TYPE (lhs));
4267 gimple_set_visited (negatedefstmt, true);
4268 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4269 gimple_set_uid (g, gimple_uid (negatedefstmt));
4270 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4271 return lhs;
4274 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4275 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4276 NULL_TREE, true, GSI_SAME_STMT);
4277 gsi = *gsip;
4278 uid = gimple_uid (gsi_stmt (gsi));
4279 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4281 gimple *stmt = gsi_stmt (gsi);
4282 if (gimple_uid (stmt) != 0)
4283 break;
4284 gimple_set_uid (stmt, uid);
4286 return resultofnegate;
4289 /* Return true if we should break up the subtract in STMT into an add
4290 with negate. This is true when we the subtract operands are really
4291 adds, or the subtract itself is used in an add expression. In
4292 either case, breaking up the subtract into an add with negate
4293 exposes the adds to reassociation. */
4295 static bool
4296 should_break_up_subtract (gimple *stmt)
4298 tree lhs = gimple_assign_lhs (stmt);
4299 tree binlhs = gimple_assign_rhs1 (stmt);
4300 tree binrhs = gimple_assign_rhs2 (stmt);
4301 gimple *immusestmt;
4302 struct loop *loop = loop_containing_stmt (stmt);
4304 if (TREE_CODE (binlhs) == SSA_NAME
4305 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4306 return true;
4308 if (TREE_CODE (binrhs) == SSA_NAME
4309 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4310 return true;
4312 if (TREE_CODE (lhs) == SSA_NAME
4313 && (immusestmt = get_single_immediate_use (lhs))
4314 && is_gimple_assign (immusestmt)
4315 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4316 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4317 return true;
4318 return false;
4321 /* Transform STMT from A - B into A + -B. */
4323 static void
4324 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4326 tree rhs1 = gimple_assign_rhs1 (stmt);
4327 tree rhs2 = gimple_assign_rhs2 (stmt);
4329 if (dump_file && (dump_flags & TDF_DETAILS))
4331 fprintf (dump_file, "Breaking up subtract ");
4332 print_gimple_stmt (dump_file, stmt, 0, 0);
4335 rhs2 = negate_value (rhs2, gsip);
4336 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4337 update_stmt (stmt);
4340 /* Determine whether STMT is a builtin call that raises an SSA name
4341 to an integer power and has only one use. If so, and this is early
4342 reassociation and unsafe math optimizations are permitted, place
4343 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4344 If any of these conditions does not hold, return FALSE. */
4346 static bool
4347 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4349 tree arg1;
4350 REAL_VALUE_TYPE c, cint;
4352 switch (gimple_call_combined_fn (stmt))
4354 CASE_CFN_POW:
4355 if (flag_errno_math)
4356 return false;
4358 *base = gimple_call_arg (stmt, 0);
4359 arg1 = gimple_call_arg (stmt, 1);
4361 if (TREE_CODE (arg1) != REAL_CST)
4362 return false;
4364 c = TREE_REAL_CST (arg1);
4366 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4367 return false;
4369 *exponent = real_to_integer (&c);
4370 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4371 if (!real_identical (&c, &cint))
4372 return false;
4374 break;
4376 CASE_CFN_POWI:
4377 *base = gimple_call_arg (stmt, 0);
4378 arg1 = gimple_call_arg (stmt, 1);
4380 if (!tree_fits_shwi_p (arg1))
4381 return false;
4383 *exponent = tree_to_shwi (arg1);
4384 break;
4386 default:
4387 return false;
4390 /* Expanding negative exponents is generally unproductive, so we don't
4391 complicate matters with those. Exponents of zero and one should
4392 have been handled by expression folding. */
4393 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4394 return false;
4396 return true;
4399 /* Try to derive and add operand entry for OP to *OPS. Return false if
4400 unsuccessful. */
4402 static bool
4403 try_special_add_to_ops (vec<operand_entry *> *ops,
4404 enum tree_code code,
4405 tree op, gimple* def_stmt)
4407 tree base = NULL_TREE;
4408 HOST_WIDE_INT exponent = 0;
4410 if (TREE_CODE (op) != SSA_NAME
4411 || ! has_single_use (op))
4412 return false;
4414 if (code == MULT_EXPR
4415 && reassoc_insert_powi_p
4416 && flag_unsafe_math_optimizations
4417 && is_gimple_call (def_stmt)
4418 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4420 add_repeat_to_ops_vec (ops, base, exponent);
4421 gimple_set_visited (def_stmt, true);
4422 return true;
4424 else if (code == MULT_EXPR
4425 && is_gimple_assign (def_stmt)
4426 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4427 && !HONOR_SNANS (TREE_TYPE (op))
4428 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4429 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4431 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4432 tree cst = build_minus_one_cst (TREE_TYPE (op));
4433 add_to_ops_vec (ops, rhs1);
4434 add_to_ops_vec (ops, cst);
4435 gimple_set_visited (def_stmt, true);
4436 return true;
4439 return false;
4442 /* Recursively linearize a binary expression that is the RHS of STMT.
4443 Place the operands of the expression tree in the vector named OPS. */
4445 static void
4446 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4447 bool is_associative, bool set_visited)
4449 tree binlhs = gimple_assign_rhs1 (stmt);
4450 tree binrhs = gimple_assign_rhs2 (stmt);
4451 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4452 bool binlhsisreassoc = false;
4453 bool binrhsisreassoc = false;
4454 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4455 struct loop *loop = loop_containing_stmt (stmt);
4457 if (set_visited)
4458 gimple_set_visited (stmt, true);
4460 if (TREE_CODE (binlhs) == SSA_NAME)
4462 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4463 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4464 && !stmt_could_throw_p (binlhsdef));
4467 if (TREE_CODE (binrhs) == SSA_NAME)
4469 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4470 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4471 && !stmt_could_throw_p (binrhsdef));
4474 /* If the LHS is not reassociable, but the RHS is, we need to swap
4475 them. If neither is reassociable, there is nothing we can do, so
4476 just put them in the ops vector. If the LHS is reassociable,
4477 linearize it. If both are reassociable, then linearize the RHS
4478 and the LHS. */
4480 if (!binlhsisreassoc)
4482 /* If this is not a associative operation like division, give up. */
4483 if (!is_associative)
4485 add_to_ops_vec (ops, binrhs);
4486 return;
4489 if (!binrhsisreassoc)
4491 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4492 add_to_ops_vec (ops, binrhs);
4494 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
4495 add_to_ops_vec (ops, binlhs);
4497 return;
4500 if (dump_file && (dump_flags & TDF_DETAILS))
4502 fprintf (dump_file, "swapping operands of ");
4503 print_gimple_stmt (dump_file, stmt, 0, 0);
4506 swap_ssa_operands (stmt,
4507 gimple_assign_rhs1_ptr (stmt),
4508 gimple_assign_rhs2_ptr (stmt));
4509 update_stmt (stmt);
4511 if (dump_file && (dump_flags & TDF_DETAILS))
4513 fprintf (dump_file, " is now ");
4514 print_gimple_stmt (dump_file, stmt, 0, 0);
4517 /* We want to make it so the lhs is always the reassociative op,
4518 so swap. */
4519 std::swap (binlhs, binrhs);
4521 else if (binrhsisreassoc)
4523 linearize_expr (stmt);
4524 binlhs = gimple_assign_rhs1 (stmt);
4525 binrhs = gimple_assign_rhs2 (stmt);
4528 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4529 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4530 rhscode, loop));
4531 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4532 is_associative, set_visited);
4534 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4535 add_to_ops_vec (ops, binrhs);
4538 /* Repropagate the negates back into subtracts, since no other pass
4539 currently does it. */
4541 static void
4542 repropagate_negates (void)
4544 unsigned int i = 0;
4545 tree negate;
4547 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4549 gimple *user = get_single_immediate_use (negate);
4551 if (!user || !is_gimple_assign (user))
4552 continue;
4554 /* The negate operand can be either operand of a PLUS_EXPR
4555 (it can be the LHS if the RHS is a constant for example).
4557 Force the negate operand to the RHS of the PLUS_EXPR, then
4558 transform the PLUS_EXPR into a MINUS_EXPR. */
4559 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4561 /* If the negated operand appears on the LHS of the
4562 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4563 to force the negated operand to the RHS of the PLUS_EXPR. */
4564 if (gimple_assign_rhs1 (user) == negate)
4566 swap_ssa_operands (user,
4567 gimple_assign_rhs1_ptr (user),
4568 gimple_assign_rhs2_ptr (user));
4571 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4572 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4573 if (gimple_assign_rhs2 (user) == negate)
4575 tree rhs1 = gimple_assign_rhs1 (user);
4576 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4577 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4578 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4579 update_stmt (user);
4582 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4584 if (gimple_assign_rhs1 (user) == negate)
4586 /* We have
4587 x = -a
4588 y = x - b
4589 which we transform into
4590 x = a + b
4591 y = -x .
4592 This pushes down the negate which we possibly can merge
4593 into some other operation, hence insert it into the
4594 plus_negates vector. */
4595 gimple *feed = SSA_NAME_DEF_STMT (negate);
4596 tree a = gimple_assign_rhs1 (feed);
4597 tree b = gimple_assign_rhs2 (user);
4598 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4599 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4600 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4601 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4602 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4603 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4604 user = gsi_stmt (gsi2);
4605 update_stmt (user);
4606 reassoc_remove_stmt (&gsi);
4607 release_defs (feed);
4608 plus_negates.safe_push (gimple_assign_lhs (user));
4610 else
4612 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4613 rid of one operation. */
4614 gimple *feed = SSA_NAME_DEF_STMT (negate);
4615 tree a = gimple_assign_rhs1 (feed);
4616 tree rhs1 = gimple_assign_rhs1 (user);
4617 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4618 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4619 update_stmt (gsi_stmt (gsi));
4625 /* Returns true if OP is of a type for which we can do reassociation.
4626 That is for integral or non-saturating fixed-point types, and for
4627 floating point type when associative-math is enabled. */
4629 static bool
4630 can_reassociate_p (tree op)
4632 tree type = TREE_TYPE (op);
4633 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4634 || NON_SAT_FIXED_POINT_TYPE_P (type)
4635 || (flag_associative_math && FLOAT_TYPE_P (type)))
4636 return true;
4637 return false;
4640 /* Break up subtract operations in block BB.
4642 We do this top down because we don't know whether the subtract is
4643 part of a possible chain of reassociation except at the top.
4645 IE given
4646 d = f + g
4647 c = a + e
4648 b = c - d
4649 q = b - r
4650 k = t - q
4652 we want to break up k = t - q, but we won't until we've transformed q
4653 = b - r, which won't be broken up until we transform b = c - d.
4655 En passant, clear the GIMPLE visited flag on every statement
4656 and set UIDs within each basic block. */
4658 static void
4659 break_up_subtract_bb (basic_block bb)
4661 gimple_stmt_iterator gsi;
4662 basic_block son;
4663 unsigned int uid = 1;
4665 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4667 gimple *stmt = gsi_stmt (gsi);
4668 gimple_set_visited (stmt, false);
4669 gimple_set_uid (stmt, uid++);
4671 if (!is_gimple_assign (stmt)
4672 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4673 continue;
4675 /* Look for simple gimple subtract operations. */
4676 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4678 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4679 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4680 continue;
4682 /* Check for a subtract used only in an addition. If this
4683 is the case, transform it into add of a negate for better
4684 reassociation. IE transform C = A-B into C = A + -B if C
4685 is only used in an addition. */
4686 if (should_break_up_subtract (stmt))
4687 break_up_subtract (stmt, &gsi);
4689 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4690 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4691 plus_negates.safe_push (gimple_assign_lhs (stmt));
4693 for (son = first_dom_son (CDI_DOMINATORS, bb);
4694 son;
4695 son = next_dom_son (CDI_DOMINATORS, son))
4696 break_up_subtract_bb (son);
4699 /* Used for repeated factor analysis. */
4700 struct repeat_factor
4702 /* An SSA name that occurs in a multiply chain. */
4703 tree factor;
4705 /* Cached rank of the factor. */
4706 unsigned rank;
4708 /* Number of occurrences of the factor in the chain. */
4709 HOST_WIDE_INT count;
4711 /* An SSA name representing the product of this factor and
4712 all factors appearing later in the repeated factor vector. */
4713 tree repr;
4717 static vec<repeat_factor> repeat_factor_vec;
4719 /* Used for sorting the repeat factor vector. Sort primarily by
4720 ascending occurrence count, secondarily by descending rank. */
4722 static int
4723 compare_repeat_factors (const void *x1, const void *x2)
4725 const repeat_factor *rf1 = (const repeat_factor *) x1;
4726 const repeat_factor *rf2 = (const repeat_factor *) x2;
4728 if (rf1->count != rf2->count)
4729 return rf1->count - rf2->count;
4731 return rf2->rank - rf1->rank;
4734 /* Look for repeated operands in OPS in the multiply tree rooted at
4735 STMT. Replace them with an optimal sequence of multiplies and powi
4736 builtin calls, and remove the used operands from OPS. Return an
4737 SSA name representing the value of the replacement sequence. */
4739 static tree
4740 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4742 unsigned i, j, vec_len;
4743 int ii;
4744 operand_entry *oe;
4745 repeat_factor *rf1, *rf2;
4746 repeat_factor rfnew;
4747 tree result = NULL_TREE;
4748 tree target_ssa, iter_result;
4749 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4750 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4751 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4752 gimple *mul_stmt, *pow_stmt;
4754 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4755 target. */
4756 if (!powi_fndecl)
4757 return NULL_TREE;
4759 /* Allocate the repeated factor vector. */
4760 repeat_factor_vec.create (10);
4762 /* Scan the OPS vector for all SSA names in the product and build
4763 up a vector of occurrence counts for each factor. */
4764 FOR_EACH_VEC_ELT (*ops, i, oe)
4766 if (TREE_CODE (oe->op) == SSA_NAME)
4768 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4770 if (rf1->factor == oe->op)
4772 rf1->count += oe->count;
4773 break;
4777 if (j >= repeat_factor_vec.length ())
4779 rfnew.factor = oe->op;
4780 rfnew.rank = oe->rank;
4781 rfnew.count = oe->count;
4782 rfnew.repr = NULL_TREE;
4783 repeat_factor_vec.safe_push (rfnew);
4788 /* Sort the repeated factor vector by (a) increasing occurrence count,
4789 and (b) decreasing rank. */
4790 repeat_factor_vec.qsort (compare_repeat_factors);
4792 /* It is generally best to combine as many base factors as possible
4793 into a product before applying __builtin_powi to the result.
4794 However, the sort order chosen for the repeated factor vector
4795 allows us to cache partial results for the product of the base
4796 factors for subsequent use. When we already have a cached partial
4797 result from a previous iteration, it is best to make use of it
4798 before looking for another __builtin_pow opportunity.
4800 As an example, consider x * x * y * y * y * z * z * z * z.
4801 We want to first compose the product x * y * z, raise it to the
4802 second power, then multiply this by y * z, and finally multiply
4803 by z. This can be done in 5 multiplies provided we cache y * z
4804 for use in both expressions:
4806 t1 = y * z
4807 t2 = t1 * x
4808 t3 = t2 * t2
4809 t4 = t1 * t3
4810 result = t4 * z
4812 If we instead ignored the cached y * z and first multiplied by
4813 the __builtin_pow opportunity z * z, we would get the inferior:
4815 t1 = y * z
4816 t2 = t1 * x
4817 t3 = t2 * t2
4818 t4 = z * z
4819 t5 = t3 * t4
4820 result = t5 * y */
4822 vec_len = repeat_factor_vec.length ();
4824 /* Repeatedly look for opportunities to create a builtin_powi call. */
4825 while (true)
4827 HOST_WIDE_INT power;
4829 /* First look for the largest cached product of factors from
4830 preceding iterations. If found, create a builtin_powi for
4831 it if the minimum occurrence count for its factors is at
4832 least 2, or just use this cached product as our next
4833 multiplicand if the minimum occurrence count is 1. */
4834 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4836 if (rf1->repr && rf1->count > 0)
4837 break;
4840 if (j < vec_len)
4842 power = rf1->count;
4844 if (power == 1)
4846 iter_result = rf1->repr;
4848 if (dump_file && (dump_flags & TDF_DETAILS))
4850 unsigned elt;
4851 repeat_factor *rf;
4852 fputs ("Multiplying by cached product ", dump_file);
4853 for (elt = j; elt < vec_len; elt++)
4855 rf = &repeat_factor_vec[elt];
4856 print_generic_expr (dump_file, rf->factor, 0);
4857 if (elt < vec_len - 1)
4858 fputs (" * ", dump_file);
4860 fputs ("\n", dump_file);
4863 else
4865 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4866 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4867 build_int_cst (integer_type_node,
4868 power));
4869 gimple_call_set_lhs (pow_stmt, iter_result);
4870 gimple_set_location (pow_stmt, gimple_location (stmt));
4871 gimple_set_uid (pow_stmt, gimple_uid (stmt));
4872 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4874 if (dump_file && (dump_flags & TDF_DETAILS))
4876 unsigned elt;
4877 repeat_factor *rf;
4878 fputs ("Building __builtin_pow call for cached product (",
4879 dump_file);
4880 for (elt = j; elt < vec_len; elt++)
4882 rf = &repeat_factor_vec[elt];
4883 print_generic_expr (dump_file, rf->factor, 0);
4884 if (elt < vec_len - 1)
4885 fputs (" * ", dump_file);
4887 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4888 power);
4892 else
4894 /* Otherwise, find the first factor in the repeated factor
4895 vector whose occurrence count is at least 2. If no such
4896 factor exists, there are no builtin_powi opportunities
4897 remaining. */
4898 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4900 if (rf1->count >= 2)
4901 break;
4904 if (j >= vec_len)
4905 break;
4907 power = rf1->count;
4909 if (dump_file && (dump_flags & TDF_DETAILS))
4911 unsigned elt;
4912 repeat_factor *rf;
4913 fputs ("Building __builtin_pow call for (", dump_file);
4914 for (elt = j; elt < vec_len; elt++)
4916 rf = &repeat_factor_vec[elt];
4917 print_generic_expr (dump_file, rf->factor, 0);
4918 if (elt < vec_len - 1)
4919 fputs (" * ", dump_file);
4921 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4924 reassociate_stats.pows_created++;
4926 /* Visit each element of the vector in reverse order (so that
4927 high-occurrence elements are visited first, and within the
4928 same occurrence count, lower-ranked elements are visited
4929 first). Form a linear product of all elements in this order
4930 whose occurrencce count is at least that of element J.
4931 Record the SSA name representing the product of each element
4932 with all subsequent elements in the vector. */
4933 if (j == vec_len - 1)
4934 rf1->repr = rf1->factor;
4935 else
4937 for (ii = vec_len - 2; ii >= (int)j; ii--)
4939 tree op1, op2;
4941 rf1 = &repeat_factor_vec[ii];
4942 rf2 = &repeat_factor_vec[ii + 1];
4944 /* Init the last factor's representative to be itself. */
4945 if (!rf2->repr)
4946 rf2->repr = rf2->factor;
4948 op1 = rf1->factor;
4949 op2 = rf2->repr;
4951 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4952 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4953 op1, op2);
4954 gimple_set_location (mul_stmt, gimple_location (stmt));
4955 gimple_set_uid (mul_stmt, gimple_uid (stmt));
4956 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4957 rf1->repr = target_ssa;
4959 /* Don't reprocess the multiply we just introduced. */
4960 gimple_set_visited (mul_stmt, true);
4964 /* Form a call to __builtin_powi for the maximum product
4965 just formed, raised to the power obtained earlier. */
4966 rf1 = &repeat_factor_vec[j];
4967 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4968 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4969 build_int_cst (integer_type_node,
4970 power));
4971 gimple_call_set_lhs (pow_stmt, iter_result);
4972 gimple_set_location (pow_stmt, gimple_location (stmt));
4973 gimple_set_uid (pow_stmt, gimple_uid (stmt));
4974 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4977 /* If we previously formed at least one other builtin_powi call,
4978 form the product of this one and those others. */
4979 if (result)
4981 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4982 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4983 result, iter_result);
4984 gimple_set_location (mul_stmt, gimple_location (stmt));
4985 gimple_set_uid (mul_stmt, gimple_uid (stmt));
4986 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4987 gimple_set_visited (mul_stmt, true);
4988 result = new_result;
4990 else
4991 result = iter_result;
4993 /* Decrement the occurrence count of each element in the product
4994 by the count found above, and remove this many copies of each
4995 factor from OPS. */
4996 for (i = j; i < vec_len; i++)
4998 unsigned k = power;
4999 unsigned n;
5001 rf1 = &repeat_factor_vec[i];
5002 rf1->count -= power;
5004 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5006 if (oe->op == rf1->factor)
5008 if (oe->count <= k)
5010 ops->ordered_remove (n);
5011 k -= oe->count;
5013 if (k == 0)
5014 break;
5016 else
5018 oe->count -= k;
5019 break;
5026 /* At this point all elements in the repeated factor vector have a
5027 remaining occurrence count of 0 or 1, and those with a count of 1
5028 don't have cached representatives. Re-sort the ops vector and
5029 clean up. */
5030 ops->qsort (sort_by_operand_rank);
5031 repeat_factor_vec.release ();
5033 /* Return the final product computed herein. Note that there may
5034 still be some elements with single occurrence count left in OPS;
5035 those will be handled by the normal reassociation logic. */
5036 return result;
5039 /* Attempt to optimize
5040 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5041 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5043 static void
5044 attempt_builtin_copysign (vec<operand_entry *> *ops)
5046 operand_entry *oe;
5047 unsigned int i;
5048 unsigned int length = ops->length ();
5049 tree cst = ops->last ()->op;
5051 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5052 return;
5054 FOR_EACH_VEC_ELT (*ops, i, oe)
5056 if (TREE_CODE (oe->op) == SSA_NAME
5057 && has_single_use (oe->op))
5059 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5060 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5062 tree arg0, arg1;
5063 switch (gimple_call_combined_fn (old_call))
5065 CASE_CFN_COPYSIGN:
5066 arg0 = gimple_call_arg (old_call, 0);
5067 arg1 = gimple_call_arg (old_call, 1);
5068 /* The first argument of copysign must be a constant,
5069 otherwise there's nothing to do. */
5070 if (TREE_CODE (arg0) == REAL_CST)
5072 tree type = TREE_TYPE (arg0);
5073 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5074 /* If we couldn't fold to a single constant, skip it.
5075 That happens e.g. for inexact multiplication when
5076 -frounding-math. */
5077 if (mul == NULL_TREE)
5078 break;
5079 /* Instead of adjusting OLD_CALL, let's build a new
5080 call to not leak the LHS and prevent keeping bogus
5081 debug statements. DCE will clean up the old call. */
5082 gcall *new_call;
5083 if (gimple_call_internal_p (old_call))
5084 new_call = gimple_build_call_internal
5085 (IFN_COPYSIGN, 2, mul, arg1);
5086 else
5087 new_call = gimple_build_call
5088 (gimple_call_fndecl (old_call), 2, mul, arg1);
5089 tree lhs = make_ssa_name (type);
5090 gimple_call_set_lhs (new_call, lhs);
5091 gimple_set_location (new_call,
5092 gimple_location (old_call));
5093 insert_stmt_after (new_call, old_call);
5094 /* We've used the constant, get rid of it. */
5095 ops->pop ();
5096 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5097 /* Handle the CST1 < 0 case by negating the result. */
5098 if (cst1_neg)
5100 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5101 gimple *negate_stmt
5102 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5103 insert_stmt_after (negate_stmt, new_call);
5104 oe->op = negrhs;
5106 else
5107 oe->op = lhs;
5108 if (dump_file && (dump_flags & TDF_DETAILS))
5110 fprintf (dump_file, "Optimizing copysign: ");
5111 print_generic_expr (dump_file, cst, 0);
5112 fprintf (dump_file, " * COPYSIGN (");
5113 print_generic_expr (dump_file, arg0, 0);
5114 fprintf (dump_file, ", ");
5115 print_generic_expr (dump_file, arg1, 0);
5116 fprintf (dump_file, ") into %sCOPYSIGN (",
5117 cst1_neg ? "-" : "");
5118 print_generic_expr (dump_file, mul, 0);
5119 fprintf (dump_file, ", ");
5120 print_generic_expr (dump_file, arg1, 0);
5121 fprintf (dump_file, "\n");
5123 return;
5125 break;
5126 default:
5127 break;
5134 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5136 static void
5137 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5139 tree rhs1;
5141 if (dump_file && (dump_flags & TDF_DETAILS))
5143 fprintf (dump_file, "Transforming ");
5144 print_gimple_stmt (dump_file, stmt, 0, 0);
5147 rhs1 = gimple_assign_rhs1 (stmt);
5148 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5149 update_stmt (stmt);
5150 remove_visited_stmt_chain (rhs1);
5152 if (dump_file && (dump_flags & TDF_DETAILS))
5154 fprintf (dump_file, " into ");
5155 print_gimple_stmt (dump_file, stmt, 0, 0);
5159 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5161 static void
5162 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5163 tree rhs1, tree rhs2)
5165 if (dump_file && (dump_flags & TDF_DETAILS))
5167 fprintf (dump_file, "Transforming ");
5168 print_gimple_stmt (dump_file, stmt, 0, 0);
5171 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5172 update_stmt (gsi_stmt (*gsi));
5173 remove_visited_stmt_chain (rhs1);
5175 if (dump_file && (dump_flags & TDF_DETAILS))
5177 fprintf (dump_file, " into ");
5178 print_gimple_stmt (dump_file, stmt, 0, 0);
5182 /* Reassociate expressions in basic block BB and its post-dominator as
5183 children.
5185 Bubble up return status from maybe_optimize_range_tests. */
5187 static bool
5188 reassociate_bb (basic_block bb)
5190 gimple_stmt_iterator gsi;
5191 basic_block son;
5192 gimple *stmt = last_stmt (bb);
5193 bool cfg_cleanup_needed = false;
5195 if (stmt && !gimple_visited_p (stmt))
5196 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5198 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5200 stmt = gsi_stmt (gsi);
5202 if (is_gimple_assign (stmt)
5203 && !stmt_could_throw_p (stmt))
5205 tree lhs, rhs1, rhs2;
5206 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5208 /* If this is not a gimple binary expression, there is
5209 nothing for us to do with it. */
5210 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5211 continue;
5213 /* If this was part of an already processed statement,
5214 we don't need to touch it again. */
5215 if (gimple_visited_p (stmt))
5217 /* This statement might have become dead because of previous
5218 reassociations. */
5219 if (has_zero_uses (gimple_get_lhs (stmt)))
5221 reassoc_remove_stmt (&gsi);
5222 release_defs (stmt);
5223 /* We might end up removing the last stmt above which
5224 places the iterator to the end of the sequence.
5225 Reset it to the last stmt in this case which might
5226 be the end of the sequence as well if we removed
5227 the last statement of the sequence. In which case
5228 we need to bail out. */
5229 if (gsi_end_p (gsi))
5231 gsi = gsi_last_bb (bb);
5232 if (gsi_end_p (gsi))
5233 break;
5236 continue;
5239 lhs = gimple_assign_lhs (stmt);
5240 rhs1 = gimple_assign_rhs1 (stmt);
5241 rhs2 = gimple_assign_rhs2 (stmt);
5243 /* For non-bit or min/max operations we can't associate
5244 all types. Verify that here. */
5245 if (rhs_code != BIT_IOR_EXPR
5246 && rhs_code != BIT_AND_EXPR
5247 && rhs_code != BIT_XOR_EXPR
5248 && rhs_code != MIN_EXPR
5249 && rhs_code != MAX_EXPR
5250 && (!can_reassociate_p (lhs)
5251 || !can_reassociate_p (rhs1)
5252 || !can_reassociate_p (rhs2)))
5253 continue;
5255 if (associative_tree_code (rhs_code))
5257 auto_vec<operand_entry *> ops;
5258 tree powi_result = NULL_TREE;
5259 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5261 /* There may be no immediate uses left by the time we
5262 get here because we may have eliminated them all. */
5263 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5264 continue;
5266 gimple_set_visited (stmt, true);
5267 linearize_expr_tree (&ops, stmt, true, true);
5268 ops.qsort (sort_by_operand_rank);
5269 optimize_ops_list (rhs_code, &ops);
5270 if (undistribute_ops_list (rhs_code, &ops,
5271 loop_containing_stmt (stmt)))
5273 ops.qsort (sort_by_operand_rank);
5274 optimize_ops_list (rhs_code, &ops);
5277 if (rhs_code == PLUS_EXPR
5278 && transform_add_to_multiply (&ops))
5279 ops.qsort (sort_by_operand_rank);
5281 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5283 if (is_vector)
5284 optimize_vec_cond_expr (rhs_code, &ops);
5285 else
5286 optimize_range_tests (rhs_code, &ops);
5289 if (rhs_code == MULT_EXPR && !is_vector)
5291 attempt_builtin_copysign (&ops);
5293 if (reassoc_insert_powi_p
5294 && flag_unsafe_math_optimizations)
5295 powi_result = attempt_builtin_powi (stmt, &ops);
5298 operand_entry *last;
5299 bool negate_result = false;
5300 if (ops.length () > 1
5301 && rhs_code == MULT_EXPR)
5303 last = ops.last ();
5304 if (((TREE_CODE (last->op) == INTEGER_CST
5305 && integer_minus_onep (last->op))
5306 || real_minus_onep (last->op))
5307 && !HONOR_SNANS (TREE_TYPE (lhs))
5308 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5309 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5311 ops.pop ();
5312 negate_result = true;
5316 /* If the operand vector is now empty, all operands were
5317 consumed by the __builtin_powi optimization. */
5318 if (ops.length () == 0)
5319 transform_stmt_to_copy (&gsi, stmt, powi_result);
5320 else if (ops.length () == 1)
5322 tree last_op = ops.last ()->op;
5324 /* If the stmt that defines operand has to be inserted, insert it
5325 before the use. */
5326 if (ops.last ()->stmt_to_insert)
5327 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5328 if (powi_result)
5329 transform_stmt_to_multiply (&gsi, stmt, last_op,
5330 powi_result);
5331 else
5332 transform_stmt_to_copy (&gsi, stmt, last_op);
5334 else
5336 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5337 int ops_num = ops.length ();
5338 int width = get_reassociation_width (ops_num, rhs_code, mode);
5339 tree new_lhs = lhs;
5341 if (dump_file && (dump_flags & TDF_DETAILS))
5342 fprintf (dump_file,
5343 "Width = %d was chosen for reassociation\n", width);
5345 if (width > 1
5346 && ops.length () > 3)
5347 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5348 width, ops);
5349 else
5351 /* When there are three operands left, we want
5352 to make sure the ones that get the double
5353 binary op are chosen wisely. */
5354 int len = ops.length ();
5355 if (len >= 3)
5356 swap_ops_for_binary_stmt (ops, len - 3, stmt);
5358 new_lhs = rewrite_expr_tree (stmt, 0, ops,
5359 powi_result != NULL);
5362 /* If we combined some repeated factors into a
5363 __builtin_powi call, multiply that result by the
5364 reassociated operands. */
5365 if (powi_result)
5367 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5368 tree type = TREE_TYPE (lhs);
5369 tree target_ssa = make_temp_ssa_name (type, NULL,
5370 "reassocpow");
5371 gimple_set_lhs (lhs_stmt, target_ssa);
5372 update_stmt (lhs_stmt);
5373 if (lhs != new_lhs)
5374 target_ssa = new_lhs;
5375 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5376 powi_result, target_ssa);
5377 gimple_set_location (mul_stmt, gimple_location (stmt));
5378 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5379 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5383 if (negate_result)
5385 stmt = SSA_NAME_DEF_STMT (lhs);
5386 tree tmp = make_ssa_name (TREE_TYPE (lhs));
5387 gimple_set_lhs (stmt, tmp);
5388 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5389 tmp);
5390 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5391 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5392 update_stmt (stmt);
5397 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5398 son;
5399 son = next_dom_son (CDI_POST_DOMINATORS, son))
5400 cfg_cleanup_needed |= reassociate_bb (son);
5402 return cfg_cleanup_needed;
5405 /* Add jumps around shifts for range tests turned into bit tests.
5406 For each SSA_NAME VAR we have code like:
5407 VAR = ...; // final stmt of range comparison
5408 // bit test here...;
5409 OTHERVAR = ...; // final stmt of the bit test sequence
5410 RES = VAR | OTHERVAR;
5411 Turn the above into:
5412 VAR = ...;
5413 if (VAR != 0)
5414 goto <l3>;
5415 else
5416 goto <l2>;
5417 <l2>:
5418 // bit test here...;
5419 OTHERVAR = ...;
5420 <l3>:
5421 # RES = PHI<1(l1), OTHERVAR(l2)>; */
5423 static void
5424 branch_fixup (void)
5426 tree var;
5427 unsigned int i;
5429 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5431 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5432 gimple *use_stmt;
5433 use_operand_p use;
5434 bool ok = single_imm_use (var, &use, &use_stmt);
5435 gcc_assert (ok
5436 && is_gimple_assign (use_stmt)
5437 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5438 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5440 basic_block cond_bb = gimple_bb (def_stmt);
5441 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5442 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5444 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5445 gimple *g = gimple_build_cond (NE_EXPR, var,
5446 build_zero_cst (TREE_TYPE (var)),
5447 NULL_TREE, NULL_TREE);
5448 location_t loc = gimple_location (use_stmt);
5449 gimple_set_location (g, loc);
5450 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5452 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5453 etrue->probability = REG_BR_PROB_BASE / 2;
5454 etrue->count = cond_bb->count / 2;
5455 edge efalse = find_edge (cond_bb, then_bb);
5456 efalse->flags = EDGE_FALSE_VALUE;
5457 efalse->probability -= etrue->probability;
5458 efalse->count -= etrue->count;
5459 then_bb->count -= etrue->count;
5461 tree othervar = NULL_TREE;
5462 if (gimple_assign_rhs1 (use_stmt) == var)
5463 othervar = gimple_assign_rhs2 (use_stmt);
5464 else if (gimple_assign_rhs2 (use_stmt) == var)
5465 othervar = gimple_assign_rhs1 (use_stmt);
5466 else
5467 gcc_unreachable ();
5468 tree lhs = gimple_assign_lhs (use_stmt);
5469 gphi *phi = create_phi_node (lhs, merge_bb);
5470 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5471 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5472 gsi = gsi_for_stmt (use_stmt);
5473 gsi_remove (&gsi, true);
5475 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5476 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5478 reassoc_branch_fixups.release ();
5481 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5482 void debug_ops_vector (vec<operand_entry *> ops);
5484 /* Dump the operand entry vector OPS to FILE. */
5486 void
5487 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5489 operand_entry *oe;
5490 unsigned int i;
5492 FOR_EACH_VEC_ELT (ops, i, oe)
5494 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5495 print_generic_expr (file, oe->op, 0);
5496 fprintf (file, "\n");
5500 /* Dump the operand entry vector OPS to STDERR. */
5502 DEBUG_FUNCTION void
5503 debug_ops_vector (vec<operand_entry *> ops)
5505 dump_ops_vector (stderr, ops);
5508 /* Bubble up return status from reassociate_bb. */
5510 static bool
5511 do_reassoc (void)
5513 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5514 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5517 /* Initialize the reassociation pass. */
5519 static void
5520 init_reassoc (void)
5522 int i;
5523 long rank = 2;
5524 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5526 /* Find the loops, so that we can prevent moving calculations in
5527 them. */
5528 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5530 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5532 next_operand_entry_id = 0;
5534 /* Reverse RPO (Reverse Post Order) will give us something where
5535 deeper loops come later. */
5536 pre_and_rev_post_order_compute (NULL, bbs, false);
5537 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5538 operand_rank = new hash_map<tree, long>;
5540 /* Give each default definition a distinct rank. This includes
5541 parameters and the static chain. Walk backwards over all
5542 SSA names so that we get proper rank ordering according
5543 to tree_swap_operands_p. */
5544 for (i = num_ssa_names - 1; i > 0; --i)
5546 tree name = ssa_name (i);
5547 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5548 insert_operand_rank (name, ++rank);
5551 /* Set up rank for each BB */
5552 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5553 bb_rank[bbs[i]] = ++rank << 16;
5555 free (bbs);
5556 calculate_dominance_info (CDI_POST_DOMINATORS);
5557 plus_negates = vNULL;
5560 /* Cleanup after the reassociation pass, and print stats if
5561 requested. */
5563 static void
5564 fini_reassoc (void)
5566 statistics_counter_event (cfun, "Linearized",
5567 reassociate_stats.linearized);
5568 statistics_counter_event (cfun, "Constants eliminated",
5569 reassociate_stats.constants_eliminated);
5570 statistics_counter_event (cfun, "Ops eliminated",
5571 reassociate_stats.ops_eliminated);
5572 statistics_counter_event (cfun, "Statements rewritten",
5573 reassociate_stats.rewritten);
5574 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5575 reassociate_stats.pows_encountered);
5576 statistics_counter_event (cfun, "Built-in powi calls created",
5577 reassociate_stats.pows_created);
5579 delete operand_rank;
5580 operand_entry_pool.release ();
5581 free (bb_rank);
5582 plus_negates.release ();
5583 free_dominance_info (CDI_POST_DOMINATORS);
5584 loop_optimizer_finalize ();
5587 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
5588 insertion of __builtin_powi calls.
5590 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5591 optimization of a gimple conditional. Otherwise returns zero. */
5593 static unsigned int
5594 execute_reassoc (bool insert_powi_p)
5596 reassoc_insert_powi_p = insert_powi_p;
5598 init_reassoc ();
5600 bool cfg_cleanup_needed;
5601 cfg_cleanup_needed = do_reassoc ();
5602 repropagate_negates ();
5603 branch_fixup ();
5605 fini_reassoc ();
5606 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5609 namespace {
5611 const pass_data pass_data_reassoc =
5613 GIMPLE_PASS, /* type */
5614 "reassoc", /* name */
5615 OPTGROUP_NONE, /* optinfo_flags */
5616 TV_TREE_REASSOC, /* tv_id */
5617 ( PROP_cfg | PROP_ssa ), /* properties_required */
5618 0, /* properties_provided */
5619 0, /* properties_destroyed */
5620 0, /* todo_flags_start */
5621 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5624 class pass_reassoc : public gimple_opt_pass
5626 public:
5627 pass_reassoc (gcc::context *ctxt)
5628 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5631 /* opt_pass methods: */
5632 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5633 void set_pass_param (unsigned int n, bool param)
5635 gcc_assert (n == 0);
5636 insert_powi_p = param;
5638 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5639 virtual unsigned int execute (function *)
5640 { return execute_reassoc (insert_powi_p); }
5642 private:
5643 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
5644 point 3a in the pass header comment. */
5645 bool insert_powi_p;
5646 }; // class pass_reassoc
5648 } // anon namespace
5650 gimple_opt_pass *
5651 make_pass_reassoc (gcc::context *ctxt)
5653 return new pass_reassoc (ctxt);