ira.c bb_loop_depth again
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blobc9ed67956793bc685e3c200ed7d3dd688449d14e
1 /* Reassociation for trees.
2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "tm_p.h"
33 #include "ssa.h"
34 #include "optabs-tree.h"
35 #include "gimple-pretty-print.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "cfganal.h"
40 #include "gimple-fold.h"
41 #include "tree-eh.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
44 #include "tree-cfg.h"
45 #include "tree-ssa-loop.h"
46 #include "flags.h"
47 #include "tree-ssa.h"
48 #include "langhooks.h"
49 #include "cfgloop.h"
50 #include "params.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
69 3. Optimization of the operand lists, eliminating things like a +
70 -a, a & a, etc.
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
104 those on it.
106 You want to first merge all leaves of the same rank, as much as
107 possible.
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
122 mergetmp2 = d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
131 So we have
133 build binary op
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
159 mergetmp = op1 + op2
160 newstmt = mergetmp + op3
162 instead of
163 mergetmp = op2 + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
168 can do.
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p;
180 /* Statistics */
181 static struct
183 int linearized;
184 int constants_eliminated;
185 int ops_eliminated;
186 int rewritten;
187 int pows_encountered;
188 int pows_created;
189 } reassociate_stats;
191 /* Operator, rank pair. */
192 struct operand_entry
194 unsigned int rank;
195 int id;
196 tree op;
197 unsigned int count;
198 gimple *stmt_to_insert;
201 static object_allocator<operand_entry> operand_entry_pool
202 ("operand entry pool");
204 /* This is used to assign a unique ID to each struct operand_entry
205 so that qsort results are identical on different hosts. */
206 static int next_operand_entry_id;
208 /* Starting rank number for a given basic block, so that we can rank
209 operations using unmovable instructions in that BB based on the bb
210 depth. */
211 static long *bb_rank;
213 /* Operand->rank hashtable. */
214 static hash_map<tree, long> *operand_rank;
216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
217 all basic blocks the CFG should be adjusted - basic blocks
218 split right after that SSA_NAME's definition statement and before
219 the only use, which must be a bit ior. */
220 static vec<tree> reassoc_branch_fixups;
222 /* Forward decls. */
223 static long get_rank (tree);
224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
227 possibly added by gsi_remove. */
229 bool
230 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232 gimple *stmt = gsi_stmt (*gsi);
234 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
235 return gsi_remove (gsi, true);
237 gimple_stmt_iterator prev = *gsi;
238 gsi_prev (&prev);
239 unsigned uid = gimple_uid (stmt);
240 basic_block bb = gimple_bb (stmt);
241 bool ret = gsi_remove (gsi, true);
242 if (!gsi_end_p (prev))
243 gsi_next (&prev);
244 else
245 prev = gsi_start_bb (bb);
246 gimple *end_stmt = gsi_stmt (*gsi);
247 while ((stmt = gsi_stmt (prev)) != end_stmt)
249 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
250 gimple_set_uid (stmt, uid);
251 gsi_next (&prev);
253 return ret;
256 /* Bias amount for loop-carried phis. We want this to be larger than
257 the depth of any reassociation tree we can see, but not larger than
258 the rank difference between two blocks. */
259 #define PHI_LOOP_BIAS (1 << 15)
261 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
262 an innermost loop, and the phi has only a single use which is inside
263 the loop, then the rank is the block rank of the loop latch plus an
264 extra bias for the loop-carried dependence. This causes expressions
265 calculated into an accumulator variable to be independent for each
266 iteration of the loop. If STMT is some other phi, the rank is the
267 block rank of its containing block. */
268 static long
269 phi_rank (gimple *stmt)
271 basic_block bb = gimple_bb (stmt);
272 struct loop *father = bb->loop_father;
273 tree res;
274 unsigned i;
275 use_operand_p use;
276 gimple *use_stmt;
278 /* We only care about real loops (those with a latch). */
279 if (!father->latch)
280 return bb_rank[bb->index];
282 /* Interesting phis must be in headers of innermost loops. */
283 if (bb != father->header
284 || father->inner)
285 return bb_rank[bb->index];
287 /* Ignore virtual SSA_NAMEs. */
288 res = gimple_phi_result (stmt);
289 if (virtual_operand_p (res))
290 return bb_rank[bb->index];
292 /* The phi definition must have a single use, and that use must be
293 within the loop. Otherwise this isn't an accumulator pattern. */
294 if (!single_imm_use (res, &use, &use_stmt)
295 || gimple_bb (use_stmt)->loop_father != father)
296 return bb_rank[bb->index];
298 /* Look for phi arguments from within the loop. If found, bias this phi. */
299 for (i = 0; i < gimple_phi_num_args (stmt); i++)
301 tree arg = gimple_phi_arg_def (stmt, i);
302 if (TREE_CODE (arg) == SSA_NAME
303 && !SSA_NAME_IS_DEFAULT_DEF (arg))
305 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
306 if (gimple_bb (def_stmt)->loop_father == father)
307 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
311 /* Must be an uninteresting phi. */
312 return bb_rank[bb->index];
315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
316 loop-carried dependence of an innermost loop, return TRUE; else
317 return FALSE. */
318 static bool
319 loop_carried_phi (tree exp)
321 gimple *phi_stmt;
322 long block_rank;
324 if (TREE_CODE (exp) != SSA_NAME
325 || SSA_NAME_IS_DEFAULT_DEF (exp))
326 return false;
328 phi_stmt = SSA_NAME_DEF_STMT (exp);
330 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
331 return false;
333 /* Non-loop-carried phis have block rank. Loop-carried phis have
334 an additional bias added in. If this phi doesn't have block rank,
335 it's biased and should not be propagated. */
336 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338 if (phi_rank (phi_stmt) != block_rank)
339 return true;
341 return false;
344 /* Return the maximum of RANK and the rank that should be propagated
345 from expression OP. For most operands, this is just the rank of OP.
346 For loop-carried phis, the value is zero to avoid undoing the bias
347 in favor of the phi. */
348 static long
349 propagate_rank (long rank, tree op)
351 long op_rank;
353 if (loop_carried_phi (op))
354 return rank;
356 op_rank = get_rank (op);
358 return MAX (rank, op_rank);
361 /* Look up the operand rank structure for expression E. */
363 static inline long
364 find_operand_rank (tree e)
366 long *slot = operand_rank->get (e);
367 return slot ? *slot : -1;
370 /* Insert {E,RANK} into the operand rank hashtable. */
372 static inline void
373 insert_operand_rank (tree e, long rank)
375 gcc_assert (rank > 0);
376 gcc_assert (!operand_rank->put (e, rank));
379 /* Given an expression E, return the rank of the expression. */
381 static long
382 get_rank (tree e)
384 /* SSA_NAME's have the rank of the expression they are the result
386 For globals and uninitialized values, the rank is 0.
387 For function arguments, use the pre-setup rank.
388 For PHI nodes, stores, asm statements, etc, we use the rank of
389 the BB.
390 For simple operations, the rank is the maximum rank of any of
391 its operands, or the bb_rank, whichever is less.
392 I make no claims that this is optimal, however, it gives good
393 results. */
395 /* We make an exception to the normal ranking system to break
396 dependences of accumulator variables in loops. Suppose we
397 have a simple one-block loop containing:
399 x_1 = phi(x_0, x_2)
400 b = a + x_1
401 c = b + d
402 x_2 = c + e
404 As shown, each iteration of the calculation into x is fully
405 dependent upon the iteration before it. We would prefer to
406 see this in the form:
408 x_1 = phi(x_0, x_2)
409 b = a + d
410 c = b + e
411 x_2 = c + x_1
413 If the loop is unrolled, the calculations of b and c from
414 different iterations can be interleaved.
416 To obtain this result during reassociation, we bias the rank
417 of the phi definition x_1 upward, when it is recognized as an
418 accumulator pattern. The artificial rank causes it to be
419 added last, providing the desired independence. */
421 if (TREE_CODE (e) == SSA_NAME)
423 ssa_op_iter iter;
424 gimple *stmt;
425 long rank;
426 tree op;
428 if (SSA_NAME_IS_DEFAULT_DEF (e))
429 return find_operand_rank (e);
431 stmt = SSA_NAME_DEF_STMT (e);
432 if (gimple_code (stmt) == GIMPLE_PHI)
433 return phi_rank (stmt);
435 if (!is_gimple_assign (stmt))
436 return bb_rank[gimple_bb (stmt)->index];
438 /* If we already have a rank for this expression, use that. */
439 rank = find_operand_rank (e);
440 if (rank != -1)
441 return rank;
443 /* Otherwise, find the maximum rank for the operands. As an
444 exception, remove the bias from loop-carried phis when propagating
445 the rank so that dependent operations are not also biased. */
446 /* Simply walk over all SSA uses - this takes advatage of the
447 fact that non-SSA operands are is_gimple_min_invariant and
448 thus have rank 0. */
449 rank = 0;
450 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
451 rank = propagate_rank (rank, op);
453 if (dump_file && (dump_flags & TDF_DETAILS))
455 fprintf (dump_file, "Rank for ");
456 print_generic_expr (dump_file, e, 0);
457 fprintf (dump_file, " is %ld\n", (rank + 1));
460 /* Note the rank in the hashtable so we don't recompute it. */
461 insert_operand_rank (e, (rank + 1));
462 return (rank + 1);
465 /* Constants, globals, etc., are rank 0 */
466 return 0;
470 /* We want integer ones to end up last no matter what, since they are
471 the ones we can do the most with. */
472 #define INTEGER_CONST_TYPE 1 << 3
473 #define FLOAT_CONST_TYPE 1 << 2
474 #define OTHER_CONST_TYPE 1 << 1
476 /* Classify an invariant tree into integer, float, or other, so that
477 we can sort them to be near other constants of the same type. */
478 static inline int
479 constant_type (tree t)
481 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
482 return INTEGER_CONST_TYPE;
483 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
484 return FLOAT_CONST_TYPE;
485 else
486 return OTHER_CONST_TYPE;
489 /* qsort comparison function to sort operand entries PA and PB by rank
490 so that the sorted array is ordered by rank in decreasing order. */
491 static int
492 sort_by_operand_rank (const void *pa, const void *pb)
494 const operand_entry *oea = *(const operand_entry *const *)pa;
495 const operand_entry *oeb = *(const operand_entry *const *)pb;
497 /* It's nicer for optimize_expression if constants that are likely
498 to fold when added/multiplied//whatever are put next to each
499 other. Since all constants have rank 0, order them by type. */
500 if (oeb->rank == 0 && oea->rank == 0)
502 if (constant_type (oeb->op) != constant_type (oea->op))
503 return constant_type (oeb->op) - constant_type (oea->op);
504 else
505 /* To make sorting result stable, we use unique IDs to determine
506 order. */
507 return oeb->id - oea->id;
510 /* Lastly, make sure the versions that are the same go next to each
511 other. */
512 if ((oeb->rank - oea->rank == 0)
513 && TREE_CODE (oea->op) == SSA_NAME
514 && TREE_CODE (oeb->op) == SSA_NAME)
516 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
517 versions of removed SSA_NAMEs, so if possible, prefer to sort
518 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
519 See PR60418. */
520 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
521 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
522 && !oea->stmt_to_insert
523 && !oeb->stmt_to_insert
524 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
526 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
527 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
528 basic_block bba = gimple_bb (stmta);
529 basic_block bbb = gimple_bb (stmtb);
530 if (bbb != bba)
532 if (bb_rank[bbb->index] != bb_rank[bba->index])
533 return bb_rank[bbb->index] - bb_rank[bba->index];
535 else
537 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
538 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
539 if (da != db)
540 return da ? 1 : -1;
544 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
545 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
546 else
547 return oeb->id - oea->id;
550 if (oeb->rank != oea->rank)
551 return oeb->rank - oea->rank;
552 else
553 return oeb->id - oea->id;
556 /* Add an operand entry to *OPS for the tree operand OP. */
558 static void
559 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
561 operand_entry *oe = operand_entry_pool.allocate ();
563 oe->op = op;
564 oe->rank = get_rank (op);
565 oe->id = next_operand_entry_id++;
566 oe->count = 1;
567 oe->stmt_to_insert = stmt_to_insert;
568 ops->safe_push (oe);
571 /* Add an operand entry to *OPS for the tree operand OP with repeat
572 count REPEAT. */
574 static void
575 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
576 HOST_WIDE_INT repeat)
578 operand_entry *oe = operand_entry_pool.allocate ();
580 oe->op = op;
581 oe->rank = get_rank (op);
582 oe->id = next_operand_entry_id++;
583 oe->count = repeat;
584 oe->stmt_to_insert = NULL;
585 ops->safe_push (oe);
587 reassociate_stats.pows_encountered++;
590 /* Return true if STMT is reassociable operation containing a binary
591 operation with tree code CODE, and is inside LOOP. */
593 static bool
594 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
596 basic_block bb = gimple_bb (stmt);
598 if (gimple_bb (stmt) == NULL)
599 return false;
601 if (!flow_bb_inside_loop_p (loop, bb))
602 return false;
604 if (is_gimple_assign (stmt)
605 && gimple_assign_rhs_code (stmt) == code
606 && has_single_use (gimple_assign_lhs (stmt)))
607 return true;
609 return false;
613 /* Return true if STMT is a nop-conversion. */
615 static bool
616 gimple_nop_conversion_p (gimple *stmt)
618 if (gassign *ass = dyn_cast <gassign *> (stmt))
620 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
621 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
622 TREE_TYPE (gimple_assign_rhs1 (ass))))
623 return true;
625 return false;
628 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
629 operand of the negate operation. Otherwise, return NULL. */
631 static tree
632 get_unary_op (tree name, enum tree_code opcode)
634 gimple *stmt = SSA_NAME_DEF_STMT (name);
636 /* Look through nop conversions (sign changes). */
637 if (gimple_nop_conversion_p (stmt)
638 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
639 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
641 if (!is_gimple_assign (stmt))
642 return NULL_TREE;
644 if (gimple_assign_rhs_code (stmt) == opcode)
645 return gimple_assign_rhs1 (stmt);
646 return NULL_TREE;
649 /* Return true if OP1 and OP2 have the same value if casted to either type. */
651 static bool
652 ops_equal_values_p (tree op1, tree op2)
654 if (op1 == op2)
655 return true;
657 tree orig_op1 = op1;
658 if (TREE_CODE (op1) == SSA_NAME)
660 gimple *stmt = SSA_NAME_DEF_STMT (op1);
661 if (gimple_nop_conversion_p (stmt))
663 op1 = gimple_assign_rhs1 (stmt);
664 if (op1 == op2)
665 return true;
669 if (TREE_CODE (op2) == SSA_NAME)
671 gimple *stmt = SSA_NAME_DEF_STMT (op2);
672 if (gimple_nop_conversion_p (stmt))
674 op2 = gimple_assign_rhs1 (stmt);
675 if (op1 == op2
676 || orig_op1 == op2)
677 return true;
681 return false;
685 /* If CURR and LAST are a pair of ops that OPCODE allows us to
686 eliminate through equivalences, do so, remove them from OPS, and
687 return true. Otherwise, return false. */
689 static bool
690 eliminate_duplicate_pair (enum tree_code opcode,
691 vec<operand_entry *> *ops,
692 bool *all_done,
693 unsigned int i,
694 operand_entry *curr,
695 operand_entry *last)
698 /* If we have two of the same op, and the opcode is & |, min, or max,
699 we can eliminate one of them.
700 If we have two of the same op, and the opcode is ^, we can
701 eliminate both of them. */
703 if (last && last->op == curr->op)
705 switch (opcode)
707 case MAX_EXPR:
708 case MIN_EXPR:
709 case BIT_IOR_EXPR:
710 case BIT_AND_EXPR:
711 if (dump_file && (dump_flags & TDF_DETAILS))
713 fprintf (dump_file, "Equivalence: ");
714 print_generic_expr (dump_file, curr->op, 0);
715 fprintf (dump_file, " [&|minmax] ");
716 print_generic_expr (dump_file, last->op, 0);
717 fprintf (dump_file, " -> ");
718 print_generic_stmt (dump_file, last->op, 0);
721 ops->ordered_remove (i);
722 reassociate_stats.ops_eliminated ++;
724 return true;
726 case BIT_XOR_EXPR:
727 if (dump_file && (dump_flags & TDF_DETAILS))
729 fprintf (dump_file, "Equivalence: ");
730 print_generic_expr (dump_file, curr->op, 0);
731 fprintf (dump_file, " ^ ");
732 print_generic_expr (dump_file, last->op, 0);
733 fprintf (dump_file, " -> nothing\n");
736 reassociate_stats.ops_eliminated += 2;
738 if (ops->length () == 2)
740 ops->truncate (0);
741 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
742 *all_done = true;
744 else
746 ops->ordered_remove (i-1);
747 ops->ordered_remove (i-1);
750 return true;
752 default:
753 break;
756 return false;
759 static vec<tree> plus_negates;
761 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
762 expression, look in OPS for a corresponding positive operation to cancel
763 it out. If we find one, remove the other from OPS, replace
764 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
765 return false. */
767 static bool
768 eliminate_plus_minus_pair (enum tree_code opcode,
769 vec<operand_entry *> *ops,
770 unsigned int currindex,
771 operand_entry *curr)
773 tree negateop;
774 tree notop;
775 unsigned int i;
776 operand_entry *oe;
778 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
779 return false;
781 negateop = get_unary_op (curr->op, NEGATE_EXPR);
782 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
783 if (negateop == NULL_TREE && notop == NULL_TREE)
784 return false;
786 /* Any non-negated version will have a rank that is one less than
787 the current rank. So once we hit those ranks, if we don't find
788 one, we can stop. */
790 for (i = currindex + 1;
791 ops->iterate (i, &oe)
792 && oe->rank >= curr->rank - 1 ;
793 i++)
795 if (negateop
796 && ops_equal_values_p (oe->op, negateop))
798 if (dump_file && (dump_flags & TDF_DETAILS))
800 fprintf (dump_file, "Equivalence: ");
801 print_generic_expr (dump_file, negateop, 0);
802 fprintf (dump_file, " + -");
803 print_generic_expr (dump_file, oe->op, 0);
804 fprintf (dump_file, " -> 0\n");
807 ops->ordered_remove (i);
808 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
809 ops->ordered_remove (currindex);
810 reassociate_stats.ops_eliminated ++;
812 return true;
814 else if (notop
815 && ops_equal_values_p (oe->op, notop))
817 tree op_type = TREE_TYPE (oe->op);
819 if (dump_file && (dump_flags & TDF_DETAILS))
821 fprintf (dump_file, "Equivalence: ");
822 print_generic_expr (dump_file, notop, 0);
823 fprintf (dump_file, " + ~");
824 print_generic_expr (dump_file, oe->op, 0);
825 fprintf (dump_file, " -> -1\n");
828 ops->ordered_remove (i);
829 add_to_ops_vec (ops, build_all_ones_cst (op_type));
830 ops->ordered_remove (currindex);
831 reassociate_stats.ops_eliminated ++;
833 return true;
837 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
838 save it for later inspection in repropagate_negates(). */
839 if (negateop != NULL_TREE
840 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
841 plus_negates.safe_push (curr->op);
843 return false;
846 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
847 bitwise not expression, look in OPS for a corresponding operand to
848 cancel it out. If we find one, remove the other from OPS, replace
849 OPS[CURRINDEX] with 0, and return true. Otherwise, return
850 false. */
852 static bool
853 eliminate_not_pairs (enum tree_code opcode,
854 vec<operand_entry *> *ops,
855 unsigned int currindex,
856 operand_entry *curr)
858 tree notop;
859 unsigned int i;
860 operand_entry *oe;
862 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
863 || TREE_CODE (curr->op) != SSA_NAME)
864 return false;
866 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
867 if (notop == NULL_TREE)
868 return false;
870 /* Any non-not version will have a rank that is one less than
871 the current rank. So once we hit those ranks, if we don't find
872 one, we can stop. */
874 for (i = currindex + 1;
875 ops->iterate (i, &oe)
876 && oe->rank >= curr->rank - 1;
877 i++)
879 if (oe->op == notop)
881 if (dump_file && (dump_flags & TDF_DETAILS))
883 fprintf (dump_file, "Equivalence: ");
884 print_generic_expr (dump_file, notop, 0);
885 if (opcode == BIT_AND_EXPR)
886 fprintf (dump_file, " & ~");
887 else if (opcode == BIT_IOR_EXPR)
888 fprintf (dump_file, " | ~");
889 print_generic_expr (dump_file, oe->op, 0);
890 if (opcode == BIT_AND_EXPR)
891 fprintf (dump_file, " -> 0\n");
892 else if (opcode == BIT_IOR_EXPR)
893 fprintf (dump_file, " -> -1\n");
896 if (opcode == BIT_AND_EXPR)
897 oe->op = build_zero_cst (TREE_TYPE (oe->op));
898 else if (opcode == BIT_IOR_EXPR)
899 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
901 reassociate_stats.ops_eliminated += ops->length () - 1;
902 ops->truncate (0);
903 ops->quick_push (oe);
904 return true;
908 return false;
911 /* Use constant value that may be present in OPS to try to eliminate
912 operands. Note that this function is only really used when we've
913 eliminated ops for other reasons, or merged constants. Across
914 single statements, fold already does all of this, plus more. There
915 is little point in duplicating logic, so I've only included the
916 identities that I could ever construct testcases to trigger. */
918 static void
919 eliminate_using_constants (enum tree_code opcode,
920 vec<operand_entry *> *ops)
922 operand_entry *oelast = ops->last ();
923 tree type = TREE_TYPE (oelast->op);
925 if (oelast->rank == 0
926 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
928 switch (opcode)
930 case BIT_AND_EXPR:
931 if (integer_zerop (oelast->op))
933 if (ops->length () != 1)
935 if (dump_file && (dump_flags & TDF_DETAILS))
936 fprintf (dump_file, "Found & 0, removing all other ops\n");
938 reassociate_stats.ops_eliminated += ops->length () - 1;
940 ops->truncate (0);
941 ops->quick_push (oelast);
942 return;
945 else if (integer_all_onesp (oelast->op))
947 if (ops->length () != 1)
949 if (dump_file && (dump_flags & TDF_DETAILS))
950 fprintf (dump_file, "Found & -1, removing\n");
951 ops->pop ();
952 reassociate_stats.ops_eliminated++;
955 break;
956 case BIT_IOR_EXPR:
957 if (integer_all_onesp (oelast->op))
959 if (ops->length () != 1)
961 if (dump_file && (dump_flags & TDF_DETAILS))
962 fprintf (dump_file, "Found | -1, removing all other ops\n");
964 reassociate_stats.ops_eliminated += ops->length () - 1;
966 ops->truncate (0);
967 ops->quick_push (oelast);
968 return;
971 else if (integer_zerop (oelast->op))
973 if (ops->length () != 1)
975 if (dump_file && (dump_flags & TDF_DETAILS))
976 fprintf (dump_file, "Found | 0, removing\n");
977 ops->pop ();
978 reassociate_stats.ops_eliminated++;
981 break;
982 case MULT_EXPR:
983 if (integer_zerop (oelast->op)
984 || (FLOAT_TYPE_P (type)
985 && !HONOR_NANS (type)
986 && !HONOR_SIGNED_ZEROS (type)
987 && real_zerop (oelast->op)))
989 if (ops->length () != 1)
991 if (dump_file && (dump_flags & TDF_DETAILS))
992 fprintf (dump_file, "Found * 0, removing all other ops\n");
994 reassociate_stats.ops_eliminated += ops->length () - 1;
995 ops->truncate (1);
996 ops->quick_push (oelast);
997 return;
1000 else if (integer_onep (oelast->op)
1001 || (FLOAT_TYPE_P (type)
1002 && !HONOR_SNANS (type)
1003 && real_onep (oelast->op)))
1005 if (ops->length () != 1)
1007 if (dump_file && (dump_flags & TDF_DETAILS))
1008 fprintf (dump_file, "Found * 1, removing\n");
1009 ops->pop ();
1010 reassociate_stats.ops_eliminated++;
1011 return;
1014 break;
1015 case BIT_XOR_EXPR:
1016 case PLUS_EXPR:
1017 case MINUS_EXPR:
1018 if (integer_zerop (oelast->op)
1019 || (FLOAT_TYPE_P (type)
1020 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1021 && fold_real_zero_addition_p (type, oelast->op,
1022 opcode == MINUS_EXPR)))
1024 if (ops->length () != 1)
1026 if (dump_file && (dump_flags & TDF_DETAILS))
1027 fprintf (dump_file, "Found [|^+] 0, removing\n");
1028 ops->pop ();
1029 reassociate_stats.ops_eliminated++;
1030 return;
1033 break;
1034 default:
1035 break;
1041 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1042 bool, bool);
1044 /* Structure for tracking and counting operands. */
1045 struct oecount {
1046 int cnt;
1047 int id;
1048 enum tree_code oecode;
1049 tree op;
1053 /* The heap for the oecount hashtable and the sorted list of operands. */
1054 static vec<oecount> cvec;
1057 /* Oecount hashtable helpers. */
1059 struct oecount_hasher : int_hash <int, 0, 1>
1061 static inline hashval_t hash (int);
1062 static inline bool equal (int, int);
1065 /* Hash function for oecount. */
1067 inline hashval_t
1068 oecount_hasher::hash (int p)
1070 const oecount *c = &cvec[p - 42];
1071 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1074 /* Comparison function for oecount. */
1076 inline bool
1077 oecount_hasher::equal (int p1, int p2)
1079 const oecount *c1 = &cvec[p1 - 42];
1080 const oecount *c2 = &cvec[p2 - 42];
1081 return (c1->oecode == c2->oecode
1082 && c1->op == c2->op);
1085 /* Comparison function for qsort sorting oecount elements by count. */
1087 static int
1088 oecount_cmp (const void *p1, const void *p2)
1090 const oecount *c1 = (const oecount *)p1;
1091 const oecount *c2 = (const oecount *)p2;
1092 if (c1->cnt != c2->cnt)
1093 return c1->cnt - c2->cnt;
1094 else
1095 /* If counts are identical, use unique IDs to stabilize qsort. */
1096 return c1->id - c2->id;
1099 /* Return TRUE iff STMT represents a builtin call that raises OP
1100 to some exponent. */
1102 static bool
1103 stmt_is_power_of_op (gimple *stmt, tree op)
1105 if (!is_gimple_call (stmt))
1106 return false;
1108 switch (gimple_call_combined_fn (stmt))
1110 CASE_CFN_POW:
1111 CASE_CFN_POWI:
1112 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1114 default:
1115 return false;
1119 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1120 in place and return the result. Assumes that stmt_is_power_of_op
1121 was previously called for STMT and returned TRUE. */
1123 static HOST_WIDE_INT
1124 decrement_power (gimple *stmt)
1126 REAL_VALUE_TYPE c, cint;
1127 HOST_WIDE_INT power;
1128 tree arg1;
1130 switch (gimple_call_combined_fn (stmt))
1132 CASE_CFN_POW:
1133 arg1 = gimple_call_arg (stmt, 1);
1134 c = TREE_REAL_CST (arg1);
1135 power = real_to_integer (&c) - 1;
1136 real_from_integer (&cint, VOIDmode, power, SIGNED);
1137 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1138 return power;
1140 CASE_CFN_POWI:
1141 arg1 = gimple_call_arg (stmt, 1);
1142 power = TREE_INT_CST_LOW (arg1) - 1;
1143 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1144 return power;
1146 default:
1147 gcc_unreachable ();
1151 /* Find the single immediate use of STMT's LHS, and replace it
1152 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1153 replace *DEF with OP as well. */
1155 static void
1156 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1158 tree lhs;
1159 gimple *use_stmt;
1160 use_operand_p use;
1161 gimple_stmt_iterator gsi;
1163 if (is_gimple_call (stmt))
1164 lhs = gimple_call_lhs (stmt);
1165 else
1166 lhs = gimple_assign_lhs (stmt);
1168 gcc_assert (has_single_use (lhs));
1169 single_imm_use (lhs, &use, &use_stmt);
1170 if (lhs == *def)
1171 *def = op;
1172 SET_USE (use, op);
1173 if (TREE_CODE (op) != SSA_NAME)
1174 update_stmt (use_stmt);
1175 gsi = gsi_for_stmt (stmt);
1176 unlink_stmt_vdef (stmt);
1177 reassoc_remove_stmt (&gsi);
1178 release_defs (stmt);
1181 /* Walks the linear chain with result *DEF searching for an operation
1182 with operand OP and code OPCODE removing that from the chain. *DEF
1183 is updated if there is only one operand but no operation left. */
1185 static void
1186 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1188 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1192 tree name;
1194 if (opcode == MULT_EXPR)
1196 if (stmt_is_power_of_op (stmt, op))
1198 if (decrement_power (stmt) == 1)
1199 propagate_op_to_single_use (op, stmt, def);
1200 return;
1202 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
1203 && gimple_assign_rhs1 (stmt) == op)
1205 propagate_op_to_single_use (op, stmt, def);
1206 return;
1210 name = gimple_assign_rhs1 (stmt);
1212 /* If this is the operation we look for and one of the operands
1213 is ours simply propagate the other operand into the stmts
1214 single use. */
1215 if (gimple_assign_rhs_code (stmt) == opcode
1216 && (name == op
1217 || gimple_assign_rhs2 (stmt) == op))
1219 if (name == op)
1220 name = gimple_assign_rhs2 (stmt);
1221 propagate_op_to_single_use (name, stmt, def);
1222 return;
1225 /* We might have a multiply of two __builtin_pow* calls, and
1226 the operand might be hiding in the rightmost one. Likewise
1227 this can happen for a negate. */
1228 if (opcode == MULT_EXPR
1229 && gimple_assign_rhs_code (stmt) == opcode
1230 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1231 && has_single_use (gimple_assign_rhs2 (stmt)))
1233 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1234 if (stmt_is_power_of_op (stmt2, op))
1236 if (decrement_power (stmt2) == 1)
1237 propagate_op_to_single_use (op, stmt2, def);
1238 return;
1240 else if (is_gimple_assign (stmt2)
1241 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR
1242 && gimple_assign_rhs1 (stmt2) == op)
1244 propagate_op_to_single_use (op, stmt2, def);
1245 return;
1249 /* Continue walking the chain. */
1250 gcc_assert (name != op
1251 && TREE_CODE (name) == SSA_NAME);
1252 stmt = SSA_NAME_DEF_STMT (name);
1254 while (1);
1257 /* Returns true if statement S1 dominates statement S2. Like
1258 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1260 static bool
1261 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1263 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1265 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1266 SSA_NAME. Assume it lives at the beginning of function and
1267 thus dominates everything. */
1268 if (!bb1 || s1 == s2)
1269 return true;
1271 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1272 if (!bb2)
1273 return false;
1275 if (bb1 == bb2)
1277 /* PHIs in the same basic block are assumed to be
1278 executed all in parallel, if only one stmt is a PHI,
1279 it dominates the other stmt in the same basic block. */
1280 if (gimple_code (s1) == GIMPLE_PHI)
1281 return true;
1283 if (gimple_code (s2) == GIMPLE_PHI)
1284 return false;
1286 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1288 if (gimple_uid (s1) < gimple_uid (s2))
1289 return true;
1291 if (gimple_uid (s1) > gimple_uid (s2))
1292 return false;
1294 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1295 unsigned int uid = gimple_uid (s1);
1296 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1298 gimple *s = gsi_stmt (gsi);
1299 if (gimple_uid (s) != uid)
1300 break;
1301 if (s == s2)
1302 return true;
1305 return false;
1308 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1311 /* Insert STMT after INSERT_POINT. */
1313 static void
1314 insert_stmt_after (gimple *stmt, gimple *insert_point)
1316 gimple_stmt_iterator gsi;
1317 basic_block bb;
1319 if (gimple_code (insert_point) == GIMPLE_PHI)
1320 bb = gimple_bb (insert_point);
1321 else if (!stmt_ends_bb_p (insert_point))
1323 gsi = gsi_for_stmt (insert_point);
1324 gimple_set_uid (stmt, gimple_uid (insert_point));
1325 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1326 return;
1328 else
1329 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1330 thus if it must end a basic block, it should be a call that can
1331 throw, or some assignment that can throw. If it throws, the LHS
1332 of it will not be initialized though, so only valid places using
1333 the SSA_NAME should be dominated by the fallthru edge. */
1334 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1335 gsi = gsi_after_labels (bb);
1336 if (gsi_end_p (gsi))
1338 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1339 gimple_set_uid (stmt,
1340 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1342 else
1343 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1344 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1347 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1348 the result. Places the statement after the definition of either
1349 OP1 or OP2. Returns the new statement. */
1351 static gimple *
1352 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1354 gimple *op1def = NULL, *op2def = NULL;
1355 gimple_stmt_iterator gsi;
1356 tree op;
1357 gassign *sum;
1359 /* Create the addition statement. */
1360 op = make_ssa_name (type);
1361 sum = gimple_build_assign (op, opcode, op1, op2);
1363 /* Find an insertion place and insert. */
1364 if (TREE_CODE (op1) == SSA_NAME)
1365 op1def = SSA_NAME_DEF_STMT (op1);
1366 if (TREE_CODE (op2) == SSA_NAME)
1367 op2def = SSA_NAME_DEF_STMT (op2);
1368 if ((!op1def || gimple_nop_p (op1def))
1369 && (!op2def || gimple_nop_p (op2def)))
1371 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1372 if (gsi_end_p (gsi))
1374 gimple_stmt_iterator gsi2
1375 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1376 gimple_set_uid (sum,
1377 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1379 else
1380 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1381 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1383 else
1385 gimple *insert_point;
1386 if ((!op1def || gimple_nop_p (op1def))
1387 || (op2def && !gimple_nop_p (op2def)
1388 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1389 insert_point = op2def;
1390 else
1391 insert_point = op1def;
1392 insert_stmt_after (sum, insert_point);
1394 update_stmt (sum);
1396 return sum;
1399 /* Perform un-distribution of divisions and multiplications.
1400 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1401 to (A + B) / X for real X.
1403 The algorithm is organized as follows.
1405 - First we walk the addition chain *OPS looking for summands that
1406 are defined by a multiplication or a real division. This results
1407 in the candidates bitmap with relevant indices into *OPS.
1409 - Second we build the chains of multiplications or divisions for
1410 these candidates, counting the number of occurrences of (operand, code)
1411 pairs in all of the candidates chains.
1413 - Third we sort the (operand, code) pairs by number of occurrence and
1414 process them starting with the pair with the most uses.
1416 * For each such pair we walk the candidates again to build a
1417 second candidate bitmap noting all multiplication/division chains
1418 that have at least one occurrence of (operand, code).
1420 * We build an alternate addition chain only covering these
1421 candidates with one (operand, code) operation removed from their
1422 multiplication/division chain.
1424 * The first candidate gets replaced by the alternate addition chain
1425 multiplied/divided by the operand.
1427 * All candidate chains get disabled for further processing and
1428 processing of (operand, code) pairs continues.
1430 The alternate addition chains built are re-processed by the main
1431 reassociation algorithm which allows optimizing a * x * y + b * y * x
1432 to (a + b ) * x * y in one invocation of the reassociation pass. */
1434 static bool
1435 undistribute_ops_list (enum tree_code opcode,
1436 vec<operand_entry *> *ops, struct loop *loop)
1438 unsigned int length = ops->length ();
1439 operand_entry *oe1;
1440 unsigned i, j;
1441 sbitmap candidates, candidates2;
1442 unsigned nr_candidates, nr_candidates2;
1443 sbitmap_iterator sbi0;
1444 vec<operand_entry *> *subops;
1445 bool changed = false;
1446 int next_oecount_id = 0;
1448 if (length <= 1
1449 || opcode != PLUS_EXPR)
1450 return false;
1452 /* Build a list of candidates to process. */
1453 candidates = sbitmap_alloc (length);
1454 bitmap_clear (candidates);
1455 nr_candidates = 0;
1456 FOR_EACH_VEC_ELT (*ops, i, oe1)
1458 enum tree_code dcode;
1459 gimple *oe1def;
1461 if (TREE_CODE (oe1->op) != SSA_NAME)
1462 continue;
1463 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1464 if (!is_gimple_assign (oe1def))
1465 continue;
1466 dcode = gimple_assign_rhs_code (oe1def);
1467 if ((dcode != MULT_EXPR
1468 && dcode != RDIV_EXPR)
1469 || !is_reassociable_op (oe1def, dcode, loop))
1470 continue;
1472 bitmap_set_bit (candidates, i);
1473 nr_candidates++;
1476 if (nr_candidates < 2)
1478 sbitmap_free (candidates);
1479 return false;
1482 if (dump_file && (dump_flags & TDF_DETAILS))
1484 fprintf (dump_file, "searching for un-distribute opportunities ");
1485 print_generic_expr (dump_file,
1486 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1487 fprintf (dump_file, " %d\n", nr_candidates);
1490 /* Build linearized sub-operand lists and the counting table. */
1491 cvec.create (0);
1493 hash_table<oecount_hasher> ctable (15);
1495 /* ??? Macro arguments cannot have multi-argument template types in
1496 them. This typedef is needed to workaround that limitation. */
1497 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1498 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1499 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1501 gimple *oedef;
1502 enum tree_code oecode;
1503 unsigned j;
1505 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1506 oecode = gimple_assign_rhs_code (oedef);
1507 linearize_expr_tree (&subops[i], oedef,
1508 associative_tree_code (oecode), false);
1510 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1512 oecount c;
1513 int *slot;
1514 int idx;
1515 c.oecode = oecode;
1516 c.cnt = 1;
1517 c.id = next_oecount_id++;
1518 c.op = oe1->op;
1519 cvec.safe_push (c);
1520 idx = cvec.length () + 41;
1521 slot = ctable.find_slot (idx, INSERT);
1522 if (!*slot)
1524 *slot = idx;
1526 else
1528 cvec.pop ();
1529 cvec[*slot - 42].cnt++;
1534 /* Sort the counting table. */
1535 cvec.qsort (oecount_cmp);
1537 if (dump_file && (dump_flags & TDF_DETAILS))
1539 oecount *c;
1540 fprintf (dump_file, "Candidates:\n");
1541 FOR_EACH_VEC_ELT (cvec, j, c)
1543 fprintf (dump_file, " %u %s: ", c->cnt,
1544 c->oecode == MULT_EXPR
1545 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1546 print_generic_expr (dump_file, c->op, 0);
1547 fprintf (dump_file, "\n");
1551 /* Process the (operand, code) pairs in order of most occurrence. */
1552 candidates2 = sbitmap_alloc (length);
1553 while (!cvec.is_empty ())
1555 oecount *c = &cvec.last ();
1556 if (c->cnt < 2)
1557 break;
1559 /* Now collect the operands in the outer chain that contain
1560 the common operand in their inner chain. */
1561 bitmap_clear (candidates2);
1562 nr_candidates2 = 0;
1563 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1565 gimple *oedef;
1566 enum tree_code oecode;
1567 unsigned j;
1568 tree op = (*ops)[i]->op;
1570 /* If we undistributed in this chain already this may be
1571 a constant. */
1572 if (TREE_CODE (op) != SSA_NAME)
1573 continue;
1575 oedef = SSA_NAME_DEF_STMT (op);
1576 oecode = gimple_assign_rhs_code (oedef);
1577 if (oecode != c->oecode)
1578 continue;
1580 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1582 if (oe1->op == c->op)
1584 bitmap_set_bit (candidates2, i);
1585 ++nr_candidates2;
1586 break;
1591 if (nr_candidates2 >= 2)
1593 operand_entry *oe1, *oe2;
1594 gimple *prod;
1595 int first = bitmap_first_set_bit (candidates2);
1597 /* Build the new addition chain. */
1598 oe1 = (*ops)[first];
1599 if (dump_file && (dump_flags & TDF_DETAILS))
1601 fprintf (dump_file, "Building (");
1602 print_generic_expr (dump_file, oe1->op, 0);
1604 zero_one_operation (&oe1->op, c->oecode, c->op);
1605 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1607 gimple *sum;
1608 oe2 = (*ops)[i];
1609 if (dump_file && (dump_flags & TDF_DETAILS))
1611 fprintf (dump_file, " + ");
1612 print_generic_expr (dump_file, oe2->op, 0);
1614 zero_one_operation (&oe2->op, c->oecode, c->op);
1615 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1616 oe1->op, oe2->op, opcode);
1617 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1618 oe2->rank = 0;
1619 oe1->op = gimple_get_lhs (sum);
1622 /* Apply the multiplication/division. */
1623 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1624 oe1->op, c->op, c->oecode);
1625 if (dump_file && (dump_flags & TDF_DETAILS))
1627 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1628 print_generic_expr (dump_file, c->op, 0);
1629 fprintf (dump_file, "\n");
1632 /* Record it in the addition chain and disable further
1633 undistribution with this op. */
1634 oe1->op = gimple_assign_lhs (prod);
1635 oe1->rank = get_rank (oe1->op);
1636 subops[first].release ();
1638 changed = true;
1641 cvec.pop ();
1644 for (i = 0; i < ops->length (); ++i)
1645 subops[i].release ();
1646 free (subops);
1647 cvec.release ();
1648 sbitmap_free (candidates);
1649 sbitmap_free (candidates2);
1651 return changed;
1654 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1655 expression, examine the other OPS to see if any of them are comparisons
1656 of the same values, which we may be able to combine or eliminate.
1657 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1659 static bool
1660 eliminate_redundant_comparison (enum tree_code opcode,
1661 vec<operand_entry *> *ops,
1662 unsigned int currindex,
1663 operand_entry *curr)
1665 tree op1, op2;
1666 enum tree_code lcode, rcode;
1667 gimple *def1, *def2;
1668 int i;
1669 operand_entry *oe;
1671 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1672 return false;
1674 /* Check that CURR is a comparison. */
1675 if (TREE_CODE (curr->op) != SSA_NAME)
1676 return false;
1677 def1 = SSA_NAME_DEF_STMT (curr->op);
1678 if (!is_gimple_assign (def1))
1679 return false;
1680 lcode = gimple_assign_rhs_code (def1);
1681 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1682 return false;
1683 op1 = gimple_assign_rhs1 (def1);
1684 op2 = gimple_assign_rhs2 (def1);
1686 /* Now look for a similar comparison in the remaining OPS. */
1687 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1689 tree t;
1691 if (TREE_CODE (oe->op) != SSA_NAME)
1692 continue;
1693 def2 = SSA_NAME_DEF_STMT (oe->op);
1694 if (!is_gimple_assign (def2))
1695 continue;
1696 rcode = gimple_assign_rhs_code (def2);
1697 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1698 continue;
1700 /* If we got here, we have a match. See if we can combine the
1701 two comparisons. */
1702 if (opcode == BIT_IOR_EXPR)
1703 t = maybe_fold_or_comparisons (lcode, op1, op2,
1704 rcode, gimple_assign_rhs1 (def2),
1705 gimple_assign_rhs2 (def2));
1706 else
1707 t = maybe_fold_and_comparisons (lcode, op1, op2,
1708 rcode, gimple_assign_rhs1 (def2),
1709 gimple_assign_rhs2 (def2));
1710 if (!t)
1711 continue;
1713 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1714 always give us a boolean_type_node value back. If the original
1715 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1716 we need to convert. */
1717 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1718 t = fold_convert (TREE_TYPE (curr->op), t);
1720 if (TREE_CODE (t) != INTEGER_CST
1721 && !operand_equal_p (t, curr->op, 0))
1723 enum tree_code subcode;
1724 tree newop1, newop2;
1725 if (!COMPARISON_CLASS_P (t))
1726 continue;
1727 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1728 STRIP_USELESS_TYPE_CONVERSION (newop1);
1729 STRIP_USELESS_TYPE_CONVERSION (newop2);
1730 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1731 continue;
1734 if (dump_file && (dump_flags & TDF_DETAILS))
1736 fprintf (dump_file, "Equivalence: ");
1737 print_generic_expr (dump_file, curr->op, 0);
1738 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1739 print_generic_expr (dump_file, oe->op, 0);
1740 fprintf (dump_file, " -> ");
1741 print_generic_expr (dump_file, t, 0);
1742 fprintf (dump_file, "\n");
1745 /* Now we can delete oe, as it has been subsumed by the new combined
1746 expression t. */
1747 ops->ordered_remove (i);
1748 reassociate_stats.ops_eliminated ++;
1750 /* If t is the same as curr->op, we're done. Otherwise we must
1751 replace curr->op with t. Special case is if we got a constant
1752 back, in which case we add it to the end instead of in place of
1753 the current entry. */
1754 if (TREE_CODE (t) == INTEGER_CST)
1756 ops->ordered_remove (currindex);
1757 add_to_ops_vec (ops, t);
1759 else if (!operand_equal_p (t, curr->op, 0))
1761 gimple *sum;
1762 enum tree_code subcode;
1763 tree newop1;
1764 tree newop2;
1765 gcc_assert (COMPARISON_CLASS_P (t));
1766 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1767 STRIP_USELESS_TYPE_CONVERSION (newop1);
1768 STRIP_USELESS_TYPE_CONVERSION (newop2);
1769 gcc_checking_assert (is_gimple_val (newop1)
1770 && is_gimple_val (newop2));
1771 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1772 curr->op = gimple_get_lhs (sum);
1774 return true;
1777 return false;
1780 /* If the stmt that defines operand has to be inserted, insert it
1781 before the use. */
1782 static void
1783 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
1785 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1786 gimple_set_uid (stmt_to_insert, gimple_uid (stmt));
1787 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
1791 /* Transform repeated addition of same values into multiply with
1792 constant. */
1793 static bool
1794 transform_add_to_multiply (vec<operand_entry *> *ops)
1796 operand_entry *oe;
1797 tree op = NULL_TREE;
1798 int j;
1799 int i, start = -1, end = 0, count = 0;
1800 vec<std::pair <int, int> > indxs = vNULL;
1801 bool changed = false;
1803 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1804 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1805 || !flag_unsafe_math_optimizations))
1806 return false;
1808 /* Look for repeated operands. */
1809 FOR_EACH_VEC_ELT (*ops, i, oe)
1811 if (start == -1)
1813 count = 1;
1814 op = oe->op;
1815 start = i;
1817 else if (operand_equal_p (oe->op, op, 0))
1819 count++;
1820 end = i;
1822 else
1824 if (count > 1)
1825 indxs.safe_push (std::make_pair (start, end));
1826 count = 1;
1827 op = oe->op;
1828 start = i;
1832 if (count > 1)
1833 indxs.safe_push (std::make_pair (start, end));
1835 for (j = indxs.length () - 1; j >= 0; --j)
1837 /* Convert repeated operand addition to multiplication. */
1838 start = indxs[j].first;
1839 end = indxs[j].second;
1840 op = (*ops)[start]->op;
1841 count = end - start + 1;
1842 for (i = end; i >= start; --i)
1843 ops->unordered_remove (i);
1844 tree tmp = make_ssa_name (TREE_TYPE (op));
1845 tree cst = build_int_cst (integer_type_node, count);
1846 gassign *mul_stmt
1847 = gimple_build_assign (tmp, MULT_EXPR,
1848 op, fold_convert (TREE_TYPE (op), cst));
1849 gimple_set_visited (mul_stmt, true);
1850 add_to_ops_vec (ops, tmp, mul_stmt);
1851 changed = true;
1854 return changed;
1858 /* Perform various identities and other optimizations on the list of
1859 operand entries, stored in OPS. The tree code for the binary
1860 operation between all the operands is OPCODE. */
1862 static void
1863 optimize_ops_list (enum tree_code opcode,
1864 vec<operand_entry *> *ops)
1866 unsigned int length = ops->length ();
1867 unsigned int i;
1868 operand_entry *oe;
1869 operand_entry *oelast = NULL;
1870 bool iterate = false;
1872 if (length == 1)
1873 return;
1875 oelast = ops->last ();
1877 /* If the last two are constants, pop the constants off, merge them
1878 and try the next two. */
1879 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1881 operand_entry *oelm1 = (*ops)[length - 2];
1883 if (oelm1->rank == 0
1884 && is_gimple_min_invariant (oelm1->op)
1885 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1886 TREE_TYPE (oelast->op)))
1888 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1889 oelm1->op, oelast->op);
1891 if (folded && is_gimple_min_invariant (folded))
1893 if (dump_file && (dump_flags & TDF_DETAILS))
1894 fprintf (dump_file, "Merging constants\n");
1896 ops->pop ();
1897 ops->pop ();
1899 add_to_ops_vec (ops, folded);
1900 reassociate_stats.constants_eliminated++;
1902 optimize_ops_list (opcode, ops);
1903 return;
1908 eliminate_using_constants (opcode, ops);
1909 oelast = NULL;
1911 for (i = 0; ops->iterate (i, &oe);)
1913 bool done = false;
1915 if (eliminate_not_pairs (opcode, ops, i, oe))
1916 return;
1917 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1918 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1919 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1921 if (done)
1922 return;
1923 iterate = true;
1924 oelast = NULL;
1925 continue;
1927 oelast = oe;
1928 i++;
1931 length = ops->length ();
1932 oelast = ops->last ();
1934 if (iterate)
1935 optimize_ops_list (opcode, ops);
1938 /* The following functions are subroutines to optimize_range_tests and allow
1939 it to try to change a logical combination of comparisons into a range
1940 test.
1942 For example, both
1943 X == 2 || X == 5 || X == 3 || X == 4
1945 X >= 2 && X <= 5
1946 are converted to
1947 (unsigned) (X - 2) <= 3
1949 For more information see comments above fold_test_range in fold-const.c,
1950 this implementation is for GIMPLE. */
1952 struct range_entry
1954 tree exp;
1955 tree low;
1956 tree high;
1957 bool in_p;
1958 bool strict_overflow_p;
1959 unsigned int idx, next;
1962 /* This is similar to make_range in fold-const.c, but on top of
1963 GIMPLE instead of trees. If EXP is non-NULL, it should be
1964 an SSA_NAME and STMT argument is ignored, otherwise STMT
1965 argument should be a GIMPLE_COND. */
1967 static void
1968 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1970 int in_p;
1971 tree low, high;
1972 bool is_bool, strict_overflow_p;
1974 r->exp = NULL_TREE;
1975 r->in_p = false;
1976 r->strict_overflow_p = false;
1977 r->low = NULL_TREE;
1978 r->high = NULL_TREE;
1979 if (exp != NULL_TREE
1980 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1981 return;
1983 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1984 and see if we can refine the range. Some of the cases below may not
1985 happen, but it doesn't seem worth worrying about this. We "continue"
1986 the outer loop when we've changed something; otherwise we "break"
1987 the switch, which will "break" the while. */
1988 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1989 high = low;
1990 in_p = 0;
1991 strict_overflow_p = false;
1992 is_bool = false;
1993 if (exp == NULL_TREE)
1994 is_bool = true;
1995 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
1997 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
1998 is_bool = true;
1999 else
2000 return;
2002 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2003 is_bool = true;
2005 while (1)
2007 enum tree_code code;
2008 tree arg0, arg1, exp_type;
2009 tree nexp;
2010 location_t loc;
2012 if (exp != NULL_TREE)
2014 if (TREE_CODE (exp) != SSA_NAME
2015 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2016 break;
2018 stmt = SSA_NAME_DEF_STMT (exp);
2019 if (!is_gimple_assign (stmt))
2020 break;
2022 code = gimple_assign_rhs_code (stmt);
2023 arg0 = gimple_assign_rhs1 (stmt);
2024 arg1 = gimple_assign_rhs2 (stmt);
2025 exp_type = TREE_TYPE (exp);
2027 else
2029 code = gimple_cond_code (stmt);
2030 arg0 = gimple_cond_lhs (stmt);
2031 arg1 = gimple_cond_rhs (stmt);
2032 exp_type = boolean_type_node;
2035 if (TREE_CODE (arg0) != SSA_NAME)
2036 break;
2037 loc = gimple_location (stmt);
2038 switch (code)
2040 case BIT_NOT_EXPR:
2041 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2042 /* Ensure the range is either +[-,0], +[0,0],
2043 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2044 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2045 or similar expression of unconditional true or
2046 false, it should not be negated. */
2047 && ((high && integer_zerop (high))
2048 || (low && integer_onep (low))))
2050 in_p = !in_p;
2051 exp = arg0;
2052 continue;
2054 break;
2055 case SSA_NAME:
2056 exp = arg0;
2057 continue;
2058 CASE_CONVERT:
2059 if (is_bool)
2060 goto do_default;
2061 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2063 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2064 is_bool = true;
2065 else
2066 return;
2068 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2069 is_bool = true;
2070 goto do_default;
2071 case EQ_EXPR:
2072 case NE_EXPR:
2073 case LT_EXPR:
2074 case LE_EXPR:
2075 case GE_EXPR:
2076 case GT_EXPR:
2077 is_bool = true;
2078 /* FALLTHRU */
2079 default:
2080 if (!is_bool)
2081 return;
2082 do_default:
2083 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2084 &low, &high, &in_p,
2085 &strict_overflow_p);
2086 if (nexp != NULL_TREE)
2088 exp = nexp;
2089 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2090 continue;
2092 break;
2094 break;
2096 if (is_bool)
2098 r->exp = exp;
2099 r->in_p = in_p;
2100 r->low = low;
2101 r->high = high;
2102 r->strict_overflow_p = strict_overflow_p;
2106 /* Comparison function for qsort. Sort entries
2107 without SSA_NAME exp first, then with SSA_NAMEs sorted
2108 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2109 by increasing ->low and if ->low is the same, by increasing
2110 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2111 maximum. */
2113 static int
2114 range_entry_cmp (const void *a, const void *b)
2116 const struct range_entry *p = (const struct range_entry *) a;
2117 const struct range_entry *q = (const struct range_entry *) b;
2119 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2121 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2123 /* Group range_entries for the same SSA_NAME together. */
2124 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2125 return -1;
2126 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2127 return 1;
2128 /* If ->low is different, NULL low goes first, then by
2129 ascending low. */
2130 if (p->low != NULL_TREE)
2132 if (q->low != NULL_TREE)
2134 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2135 p->low, q->low);
2136 if (tem && integer_onep (tem))
2137 return -1;
2138 tem = fold_binary (GT_EXPR, boolean_type_node,
2139 p->low, q->low);
2140 if (tem && integer_onep (tem))
2141 return 1;
2143 else
2144 return 1;
2146 else if (q->low != NULL_TREE)
2147 return -1;
2148 /* If ->high is different, NULL high goes last, before that by
2149 ascending high. */
2150 if (p->high != NULL_TREE)
2152 if (q->high != NULL_TREE)
2154 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2155 p->high, q->high);
2156 if (tem && integer_onep (tem))
2157 return -1;
2158 tem = fold_binary (GT_EXPR, boolean_type_node,
2159 p->high, q->high);
2160 if (tem && integer_onep (tem))
2161 return 1;
2163 else
2164 return -1;
2166 else if (q->high != NULL_TREE)
2167 return 1;
2168 /* If both ranges are the same, sort below by ascending idx. */
2170 else
2171 return 1;
2173 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2174 return -1;
2176 if (p->idx < q->idx)
2177 return -1;
2178 else
2180 gcc_checking_assert (p->idx > q->idx);
2181 return 1;
2185 /* Helper routine of optimize_range_test.
2186 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2187 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2188 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2189 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2190 an array of COUNT pointers to other ranges. Return
2191 true if the range merge has been successful.
2192 If OPCODE is ERROR_MARK, this is called from within
2193 maybe_optimize_range_tests and is performing inter-bb range optimization.
2194 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2195 oe->rank. */
2197 static bool
2198 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2199 struct range_entry **otherrangep,
2200 unsigned int count, enum tree_code opcode,
2201 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2202 bool in_p, tree low, tree high, bool strict_overflow_p)
2204 operand_entry *oe = (*ops)[range->idx];
2205 tree op = oe->op;
2206 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2207 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2208 location_t loc = gimple_location (stmt);
2209 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2210 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2211 in_p, low, high);
2212 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2213 gimple_stmt_iterator gsi;
2214 unsigned int i, uid;
2216 if (tem == NULL_TREE)
2217 return false;
2219 /* If op is default def SSA_NAME, there is no place to insert the
2220 new comparison. Give up, unless we can use OP itself as the
2221 range test. */
2222 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2224 if (op == range->exp
2225 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2226 || TREE_CODE (optype) == BOOLEAN_TYPE)
2227 && (op == tem
2228 || (TREE_CODE (tem) == EQ_EXPR
2229 && TREE_OPERAND (tem, 0) == op
2230 && integer_onep (TREE_OPERAND (tem, 1))))
2231 && opcode != BIT_IOR_EXPR
2232 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2234 stmt = NULL;
2235 tem = op;
2237 else
2238 return false;
2241 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2242 warning_at (loc, OPT_Wstrict_overflow,
2243 "assuming signed overflow does not occur "
2244 "when simplifying range test");
2246 if (dump_file && (dump_flags & TDF_DETAILS))
2248 struct range_entry *r;
2249 fprintf (dump_file, "Optimizing range tests ");
2250 print_generic_expr (dump_file, range->exp, 0);
2251 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2252 print_generic_expr (dump_file, range->low, 0);
2253 fprintf (dump_file, ", ");
2254 print_generic_expr (dump_file, range->high, 0);
2255 fprintf (dump_file, "]");
2256 for (i = 0; i < count; i++)
2258 if (otherrange)
2259 r = otherrange + i;
2260 else
2261 r = otherrangep[i];
2262 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2263 print_generic_expr (dump_file, r->low, 0);
2264 fprintf (dump_file, ", ");
2265 print_generic_expr (dump_file, r->high, 0);
2266 fprintf (dump_file, "]");
2268 fprintf (dump_file, "\n into ");
2269 print_generic_expr (dump_file, tem, 0);
2270 fprintf (dump_file, "\n");
2273 if (opcode == BIT_IOR_EXPR
2274 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2275 tem = invert_truthvalue_loc (loc, tem);
2277 tem = fold_convert_loc (loc, optype, tem);
2278 if (stmt)
2280 gsi = gsi_for_stmt (stmt);
2281 uid = gimple_uid (stmt);
2283 else
2285 gsi = gsi_none ();
2286 uid = 0;
2288 if (stmt == NULL)
2289 gcc_checking_assert (tem == op);
2290 /* In rare cases range->exp can be equal to lhs of stmt.
2291 In that case we have to insert after the stmt rather then before
2292 it. If stmt is a PHI, insert it at the start of the basic block. */
2293 else if (op != range->exp)
2295 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2296 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2297 GSI_SAME_STMT);
2298 gsi_prev (&gsi);
2300 else if (gimple_code (stmt) != GIMPLE_PHI)
2302 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2303 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2304 GSI_CONTINUE_LINKING);
2306 else
2308 gsi = gsi_after_labels (gimple_bb (stmt));
2309 if (!gsi_end_p (gsi))
2310 uid = gimple_uid (gsi_stmt (gsi));
2311 else
2313 gsi = gsi_start_bb (gimple_bb (stmt));
2314 uid = 1;
2315 while (!gsi_end_p (gsi))
2317 uid = gimple_uid (gsi_stmt (gsi));
2318 gsi_next (&gsi);
2321 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2322 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2323 GSI_SAME_STMT);
2324 if (gsi_end_p (gsi))
2325 gsi = gsi_last_bb (gimple_bb (stmt));
2326 else
2327 gsi_prev (&gsi);
2329 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2330 if (gimple_uid (gsi_stmt (gsi)))
2331 break;
2332 else
2333 gimple_set_uid (gsi_stmt (gsi), uid);
2335 oe->op = tem;
2336 range->exp = exp;
2337 range->low = low;
2338 range->high = high;
2339 range->in_p = in_p;
2340 range->strict_overflow_p = false;
2342 for (i = 0; i < count; i++)
2344 if (otherrange)
2345 range = otherrange + i;
2346 else
2347 range = otherrangep[i];
2348 oe = (*ops)[range->idx];
2349 /* Now change all the other range test immediate uses, so that
2350 those tests will be optimized away. */
2351 if (opcode == ERROR_MARK)
2353 if (oe->op)
2354 oe->op = build_int_cst (TREE_TYPE (oe->op),
2355 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2356 else
2357 oe->op = (oe->rank == BIT_IOR_EXPR
2358 ? boolean_false_node : boolean_true_node);
2360 else
2361 oe->op = error_mark_node;
2362 range->exp = NULL_TREE;
2364 return true;
2367 /* Optimize X == CST1 || X == CST2
2368 if popcount (CST1 ^ CST2) == 1 into
2369 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2370 Similarly for ranges. E.g.
2371 X != 2 && X != 3 && X != 10 && X != 11
2372 will be transformed by the previous optimization into
2373 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2374 and this loop can transform that into
2375 !(((X & ~8) - 2U) <= 1U). */
2377 static bool
2378 optimize_range_tests_xor (enum tree_code opcode, tree type,
2379 tree lowi, tree lowj, tree highi, tree highj,
2380 vec<operand_entry *> *ops,
2381 struct range_entry *rangei,
2382 struct range_entry *rangej)
2384 tree lowxor, highxor, tem, exp;
2385 /* Check lowi ^ lowj == highi ^ highj and
2386 popcount (lowi ^ lowj) == 1. */
2387 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2388 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2389 return false;
2390 if (!integer_pow2p (lowxor))
2391 return false;
2392 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2393 if (!tree_int_cst_equal (lowxor, highxor))
2394 return false;
2396 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2397 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2398 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2399 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2400 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2401 NULL, rangei->in_p, lowj, highj,
2402 rangei->strict_overflow_p
2403 || rangej->strict_overflow_p))
2404 return true;
2405 return false;
2408 /* Optimize X == CST1 || X == CST2
2409 if popcount (CST2 - CST1) == 1 into
2410 ((X - CST1) & ~(CST2 - CST1)) == 0.
2411 Similarly for ranges. E.g.
2412 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2413 || X == 75 || X == 45
2414 will be transformed by the previous optimization into
2415 (X - 43U) <= 3U || (X - 75U) <= 3U
2416 and this loop can transform that into
2417 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2418 static bool
2419 optimize_range_tests_diff (enum tree_code opcode, tree type,
2420 tree lowi, tree lowj, tree highi, tree highj,
2421 vec<operand_entry *> *ops,
2422 struct range_entry *rangei,
2423 struct range_entry *rangej)
2425 tree tem1, tem2, mask;
2426 /* Check highi - lowi == highj - lowj. */
2427 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2428 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2429 return false;
2430 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2431 if (!tree_int_cst_equal (tem1, tem2))
2432 return false;
2433 /* Check popcount (lowj - lowi) == 1. */
2434 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2435 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2436 return false;
2437 if (!integer_pow2p (tem1))
2438 return false;
2440 type = unsigned_type_for (type);
2441 tem1 = fold_convert (type, tem1);
2442 tem2 = fold_convert (type, tem2);
2443 lowi = fold_convert (type, lowi);
2444 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2445 tem1 = fold_binary (MINUS_EXPR, type,
2446 fold_convert (type, rangei->exp), lowi);
2447 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2448 lowj = build_int_cst (type, 0);
2449 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2450 NULL, rangei->in_p, lowj, tem2,
2451 rangei->strict_overflow_p
2452 || rangej->strict_overflow_p))
2453 return true;
2454 return false;
2457 /* It does some common checks for function optimize_range_tests_xor and
2458 optimize_range_tests_diff.
2459 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2460 Else it calls optimize_range_tests_diff. */
2462 static bool
2463 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2464 bool optimize_xor, vec<operand_entry *> *ops,
2465 struct range_entry *ranges)
2467 int i, j;
2468 bool any_changes = false;
2469 for (i = first; i < length; i++)
2471 tree lowi, highi, lowj, highj, type, tem;
2473 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2474 continue;
2475 type = TREE_TYPE (ranges[i].exp);
2476 if (!INTEGRAL_TYPE_P (type))
2477 continue;
2478 lowi = ranges[i].low;
2479 if (lowi == NULL_TREE)
2480 lowi = TYPE_MIN_VALUE (type);
2481 highi = ranges[i].high;
2482 if (highi == NULL_TREE)
2483 continue;
2484 for (j = i + 1; j < length && j < i + 64; j++)
2486 bool changes;
2487 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2488 continue;
2489 lowj = ranges[j].low;
2490 if (lowj == NULL_TREE)
2491 continue;
2492 highj = ranges[j].high;
2493 if (highj == NULL_TREE)
2494 highj = TYPE_MAX_VALUE (type);
2495 /* Check lowj > highi. */
2496 tem = fold_binary (GT_EXPR, boolean_type_node,
2497 lowj, highi);
2498 if (tem == NULL_TREE || !integer_onep (tem))
2499 continue;
2500 if (optimize_xor)
2501 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2502 highi, highj, ops,
2503 ranges + i, ranges + j);
2504 else
2505 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2506 highi, highj, ops,
2507 ranges + i, ranges + j);
2508 if (changes)
2510 any_changes = true;
2511 break;
2515 return any_changes;
2518 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2519 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2520 EXP on success, NULL otherwise. */
2522 static tree
2523 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2524 wide_int *mask, tree *totallowp)
2526 tree tem = int_const_binop (MINUS_EXPR, high, low);
2527 if (tem == NULL_TREE
2528 || TREE_CODE (tem) != INTEGER_CST
2529 || TREE_OVERFLOW (tem)
2530 || tree_int_cst_sgn (tem) == -1
2531 || compare_tree_int (tem, prec) != -1)
2532 return NULL_TREE;
2534 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2535 *mask = wi::shifted_mask (0, max, false, prec);
2536 if (TREE_CODE (exp) == BIT_AND_EXPR
2537 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2539 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2540 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2541 if (wi::popcount (msk) == 1
2542 && wi::ltu_p (msk, prec - max))
2544 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2545 max += msk.to_uhwi ();
2546 exp = TREE_OPERAND (exp, 0);
2547 if (integer_zerop (low)
2548 && TREE_CODE (exp) == PLUS_EXPR
2549 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2551 tree ret = TREE_OPERAND (exp, 0);
2552 STRIP_NOPS (ret);
2553 widest_int bias
2554 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2555 TYPE_PRECISION (TREE_TYPE (low))));
2556 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2557 if (totallowp)
2559 *totallowp = tbias;
2560 return ret;
2562 else if (!tree_int_cst_lt (totallow, tbias))
2563 return NULL_TREE;
2564 bias = wi::to_widest (tbias);
2565 bias -= wi::to_widest (totallow);
2566 if (bias >= 0 && bias < prec - max)
2568 *mask = wi::lshift (*mask, bias);
2569 return ret;
2574 if (totallowp)
2575 return exp;
2576 if (!tree_int_cst_lt (totallow, low))
2577 return exp;
2578 tem = int_const_binop (MINUS_EXPR, low, totallow);
2579 if (tem == NULL_TREE
2580 || TREE_CODE (tem) != INTEGER_CST
2581 || TREE_OVERFLOW (tem)
2582 || compare_tree_int (tem, prec - max) == 1)
2583 return NULL_TREE;
2585 *mask = wi::lshift (*mask, wi::to_widest (tem));
2586 return exp;
2589 /* Attempt to optimize small range tests using bit test.
2590 E.g.
2591 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2592 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2593 has been by earlier optimizations optimized into:
2594 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2595 As all the 43 through 82 range is less than 64 numbers,
2596 for 64-bit word targets optimize that into:
2597 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2599 static bool
2600 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2601 vec<operand_entry *> *ops,
2602 struct range_entry *ranges)
2604 int i, j;
2605 bool any_changes = false;
2606 int prec = GET_MODE_BITSIZE (word_mode);
2607 auto_vec<struct range_entry *, 64> candidates;
2609 for (i = first; i < length - 2; i++)
2611 tree lowi, highi, lowj, highj, type;
2613 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2614 continue;
2615 type = TREE_TYPE (ranges[i].exp);
2616 if (!INTEGRAL_TYPE_P (type))
2617 continue;
2618 lowi = ranges[i].low;
2619 if (lowi == NULL_TREE)
2620 lowi = TYPE_MIN_VALUE (type);
2621 highi = ranges[i].high;
2622 if (highi == NULL_TREE)
2623 continue;
2624 wide_int mask;
2625 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2626 highi, &mask, &lowi);
2627 if (exp == NULL_TREE)
2628 continue;
2629 bool strict_overflow_p = ranges[i].strict_overflow_p;
2630 candidates.truncate (0);
2631 int end = MIN (i + 64, length);
2632 for (j = i + 1; j < end; j++)
2634 tree exp2;
2635 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2636 continue;
2637 if (ranges[j].exp == exp)
2639 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2641 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2642 if (exp2 == exp)
2644 else if (TREE_CODE (exp2) == PLUS_EXPR)
2646 exp2 = TREE_OPERAND (exp2, 0);
2647 STRIP_NOPS (exp2);
2648 if (exp2 != exp)
2649 continue;
2651 else
2652 continue;
2654 else
2655 continue;
2656 lowj = ranges[j].low;
2657 if (lowj == NULL_TREE)
2658 continue;
2659 highj = ranges[j].high;
2660 if (highj == NULL_TREE)
2661 highj = TYPE_MAX_VALUE (type);
2662 wide_int mask2;
2663 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2664 highj, &mask2, NULL);
2665 if (exp2 != exp)
2666 continue;
2667 mask |= mask2;
2668 strict_overflow_p |= ranges[j].strict_overflow_p;
2669 candidates.safe_push (&ranges[j]);
2672 /* If we need otherwise 3 or more comparisons, use a bit test. */
2673 if (candidates.length () >= 2)
2675 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2676 wi::to_widest (lowi)
2677 + prec - 1 - wi::clz (mask));
2678 operand_entry *oe = (*ops)[ranges[i].idx];
2679 tree op = oe->op;
2680 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2681 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2682 location_t loc = gimple_location (stmt);
2683 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2685 /* See if it isn't cheaper to pretend the minimum value of the
2686 range is 0, if maximum value is small enough.
2687 We can avoid then subtraction of the minimum value, but the
2688 mask constant could be perhaps more expensive. */
2689 if (compare_tree_int (lowi, 0) > 0
2690 && compare_tree_int (high, prec) < 0)
2692 int cost_diff;
2693 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2694 rtx reg = gen_raw_REG (word_mode, 10000);
2695 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2696 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2697 GEN_INT (-m)), speed_p);
2698 rtx r = immed_wide_int_const (mask, word_mode);
2699 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2700 word_mode, speed_p);
2701 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2702 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2703 word_mode, speed_p);
2704 if (cost_diff > 0)
2706 mask = wi::lshift (mask, m);
2707 lowi = build_zero_cst (TREE_TYPE (lowi));
2711 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2712 false, lowi, high);
2713 if (tem == NULL_TREE || is_gimple_val (tem))
2714 continue;
2715 tree etype = unsigned_type_for (TREE_TYPE (exp));
2716 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2717 fold_convert_loc (loc, etype, exp),
2718 fold_convert_loc (loc, etype, lowi));
2719 exp = fold_convert_loc (loc, integer_type_node, exp);
2720 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2721 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2722 build_int_cst (word_type, 1), exp);
2723 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2724 wide_int_to_tree (word_type, mask));
2725 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2726 build_zero_cst (word_type));
2727 if (is_gimple_val (exp))
2728 continue;
2730 /* The shift might have undefined behavior if TEM is true,
2731 but reassociate_bb isn't prepared to have basic blocks
2732 split when it is running. So, temporarily emit a code
2733 with BIT_IOR_EXPR instead of &&, and fix it up in
2734 branch_fixup. */
2735 gimple_seq seq;
2736 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2737 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2738 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2739 gimple_seq seq2;
2740 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2741 gimple_seq_add_seq_without_update (&seq, seq2);
2742 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2743 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2744 gimple *g = gimple_build_assign (make_ssa_name (optype),
2745 BIT_IOR_EXPR, tem, exp);
2746 gimple_set_location (g, loc);
2747 gimple_seq_add_stmt_without_update (&seq, g);
2748 exp = gimple_assign_lhs (g);
2749 tree val = build_zero_cst (optype);
2750 if (update_range_test (&ranges[i], NULL, candidates.address (),
2751 candidates.length (), opcode, ops, exp,
2752 seq, false, val, val, strict_overflow_p))
2754 any_changes = true;
2755 reassoc_branch_fixups.safe_push (tem);
2757 else
2758 gimple_seq_discard (seq);
2761 return any_changes;
2764 /* Optimize range tests, similarly how fold_range_test optimizes
2765 it on trees. The tree code for the binary
2766 operation between all the operands is OPCODE.
2767 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2768 maybe_optimize_range_tests for inter-bb range optimization.
2769 In that case if oe->op is NULL, oe->id is bb->index whose
2770 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2771 the actual opcode. */
2773 static bool
2774 optimize_range_tests (enum tree_code opcode,
2775 vec<operand_entry *> *ops)
2777 unsigned int length = ops->length (), i, j, first;
2778 operand_entry *oe;
2779 struct range_entry *ranges;
2780 bool any_changes = false;
2782 if (length == 1)
2783 return false;
2785 ranges = XNEWVEC (struct range_entry, length);
2786 for (i = 0; i < length; i++)
2788 oe = (*ops)[i];
2789 ranges[i].idx = i;
2790 init_range_entry (ranges + i, oe->op,
2791 oe->op
2792 ? NULL
2793 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2794 /* For | invert it now, we will invert it again before emitting
2795 the optimized expression. */
2796 if (opcode == BIT_IOR_EXPR
2797 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2798 ranges[i].in_p = !ranges[i].in_p;
2801 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2802 for (i = 0; i < length; i++)
2803 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2804 break;
2806 /* Try to merge ranges. */
2807 for (first = i; i < length; i++)
2809 tree low = ranges[i].low;
2810 tree high = ranges[i].high;
2811 int in_p = ranges[i].in_p;
2812 bool strict_overflow_p = ranges[i].strict_overflow_p;
2813 int update_fail_count = 0;
2815 for (j = i + 1; j < length; j++)
2817 if (ranges[i].exp != ranges[j].exp)
2818 break;
2819 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2820 ranges[j].in_p, ranges[j].low, ranges[j].high))
2821 break;
2822 strict_overflow_p |= ranges[j].strict_overflow_p;
2825 if (j == i + 1)
2826 continue;
2828 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2829 opcode, ops, ranges[i].exp, NULL, in_p,
2830 low, high, strict_overflow_p))
2832 i = j - 1;
2833 any_changes = true;
2835 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2836 while update_range_test would fail. */
2837 else if (update_fail_count == 64)
2838 i = j - 1;
2839 else
2840 ++update_fail_count;
2843 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2844 ops, ranges);
2846 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2847 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2848 ops, ranges);
2849 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2850 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2851 ops, ranges);
2853 if (any_changes && opcode != ERROR_MARK)
2855 j = 0;
2856 FOR_EACH_VEC_ELT (*ops, i, oe)
2858 if (oe->op == error_mark_node)
2859 continue;
2860 else if (i != j)
2861 (*ops)[j] = oe;
2862 j++;
2864 ops->truncate (j);
2867 XDELETEVEC (ranges);
2868 return any_changes;
2871 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
2872 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
2873 otherwise the comparison code. */
2875 static tree_code
2876 ovce_extract_ops (tree var, gassign **rets, bool *reti)
2878 if (TREE_CODE (var) != SSA_NAME)
2879 return ERROR_MARK;
2881 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
2882 if (stmt == NULL)
2883 return ERROR_MARK;
2885 /* ??? If we start creating more COND_EXPR, we could perform
2886 this same optimization with them. For now, simplify. */
2887 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
2888 return ERROR_MARK;
2890 tree cond = gimple_assign_rhs1 (stmt);
2891 tree_code cmp = TREE_CODE (cond);
2892 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
2893 return ERROR_MARK;
2895 /* ??? For now, allow only canonical true and false result vectors.
2896 We could expand this to other constants should the need arise,
2897 but at the moment we don't create them. */
2898 tree t = gimple_assign_rhs2 (stmt);
2899 tree f = gimple_assign_rhs3 (stmt);
2900 bool inv;
2901 if (integer_all_onesp (t))
2902 inv = false;
2903 else if (integer_all_onesp (f))
2905 cmp = invert_tree_comparison (cmp, false);
2906 inv = true;
2908 else
2909 return ERROR_MARK;
2910 if (!integer_zerop (f))
2911 return ERROR_MARK;
2913 /* Success! */
2914 if (rets)
2915 *rets = stmt;
2916 if (reti)
2917 *reti = inv;
2918 return cmp;
2921 /* Optimize the condition of VEC_COND_EXPRs which have been combined
2922 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
2924 static bool
2925 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
2927 unsigned int length = ops->length (), i, j;
2928 bool any_changes = false;
2930 if (length == 1)
2931 return false;
2933 for (i = 0; i < length; ++i)
2935 tree elt0 = (*ops)[i]->op;
2937 gassign *stmt0;
2938 bool invert;
2939 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
2940 if (cmp0 == ERROR_MARK)
2941 continue;
2943 for (j = i + 1; j < length; ++j)
2945 tree &elt1 = (*ops)[j]->op;
2947 gassign *stmt1;
2948 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
2949 if (cmp1 == ERROR_MARK)
2950 continue;
2952 tree cond0 = gimple_assign_rhs1 (stmt0);
2953 tree x0 = TREE_OPERAND (cond0, 0);
2954 tree y0 = TREE_OPERAND (cond0, 1);
2956 tree cond1 = gimple_assign_rhs1 (stmt1);
2957 tree x1 = TREE_OPERAND (cond1, 0);
2958 tree y1 = TREE_OPERAND (cond1, 1);
2960 tree comb;
2961 if (opcode == BIT_AND_EXPR)
2962 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2963 else if (opcode == BIT_IOR_EXPR)
2964 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2965 else
2966 gcc_unreachable ();
2967 if (comb == NULL)
2968 continue;
2970 /* Success! */
2971 if (dump_file && (dump_flags & TDF_DETAILS))
2973 fprintf (dump_file, "Transforming ");
2974 print_generic_expr (dump_file, cond0, 0);
2975 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
2976 print_generic_expr (dump_file, cond1, 0);
2977 fprintf (dump_file, " into ");
2978 print_generic_expr (dump_file, comb, 0);
2979 fputc ('\n', dump_file);
2982 gimple_assign_set_rhs1 (stmt0, comb);
2983 if (invert)
2984 std::swap (*gimple_assign_rhs2_ptr (stmt0),
2985 *gimple_assign_rhs3_ptr (stmt0));
2986 update_stmt (stmt0);
2988 elt1 = error_mark_node;
2989 any_changes = true;
2993 if (any_changes)
2995 operand_entry *oe;
2996 j = 0;
2997 FOR_EACH_VEC_ELT (*ops, i, oe)
2999 if (oe->op == error_mark_node)
3000 continue;
3001 else if (i != j)
3002 (*ops)[j] = oe;
3003 j++;
3005 ops->truncate (j);
3008 return any_changes;
3011 /* Return true if STMT is a cast like:
3012 <bb N>:
3014 _123 = (int) _234;
3016 <bb M>:
3017 # _345 = PHI <_123(N), 1(...), 1(...)>
3018 where _234 has bool type, _123 has single use and
3019 bb N has a single successor M. This is commonly used in
3020 the last block of a range test. */
3022 static bool
3023 final_range_test_p (gimple *stmt)
3025 basic_block bb, rhs_bb;
3026 edge e;
3027 tree lhs, rhs;
3028 use_operand_p use_p;
3029 gimple *use_stmt;
3031 if (!gimple_assign_cast_p (stmt))
3032 return false;
3033 bb = gimple_bb (stmt);
3034 if (!single_succ_p (bb))
3035 return false;
3036 e = single_succ_edge (bb);
3037 if (e->flags & EDGE_COMPLEX)
3038 return false;
3040 lhs = gimple_assign_lhs (stmt);
3041 rhs = gimple_assign_rhs1 (stmt);
3042 if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3043 || TREE_CODE (rhs) != SSA_NAME
3044 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
3045 return false;
3047 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3048 if (!single_imm_use (lhs, &use_p, &use_stmt))
3049 return false;
3051 if (gimple_code (use_stmt) != GIMPLE_PHI
3052 || gimple_bb (use_stmt) != e->dest)
3053 return false;
3055 /* And that the rhs is defined in the same loop. */
3056 rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
3057 if (rhs_bb == NULL
3058 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3059 return false;
3061 return true;
3064 /* Return true if BB is suitable basic block for inter-bb range test
3065 optimization. If BACKWARD is true, BB should be the only predecessor
3066 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3067 or compared with to find a common basic block to which all conditions
3068 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3069 be the only predecessor of BB. */
3071 static bool
3072 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3073 bool backward)
3075 edge_iterator ei, ei2;
3076 edge e, e2;
3077 gimple *stmt;
3078 gphi_iterator gsi;
3079 bool other_edge_seen = false;
3080 bool is_cond;
3082 if (test_bb == bb)
3083 return false;
3084 /* Check last stmt first. */
3085 stmt = last_stmt (bb);
3086 if (stmt == NULL
3087 || (gimple_code (stmt) != GIMPLE_COND
3088 && (backward || !final_range_test_p (stmt)))
3089 || gimple_visited_p (stmt)
3090 || stmt_could_throw_p (stmt)
3091 || *other_bb == bb)
3092 return false;
3093 is_cond = gimple_code (stmt) == GIMPLE_COND;
3094 if (is_cond)
3096 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3097 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3098 to *OTHER_BB (if not set yet, try to find it out). */
3099 if (EDGE_COUNT (bb->succs) != 2)
3100 return false;
3101 FOR_EACH_EDGE (e, ei, bb->succs)
3103 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3104 return false;
3105 if (e->dest == test_bb)
3107 if (backward)
3108 continue;
3109 else
3110 return false;
3112 if (e->dest == bb)
3113 return false;
3114 if (*other_bb == NULL)
3116 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3117 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3118 return false;
3119 else if (e->dest == e2->dest)
3120 *other_bb = e->dest;
3121 if (*other_bb == NULL)
3122 return false;
3124 if (e->dest == *other_bb)
3125 other_edge_seen = true;
3126 else if (backward)
3127 return false;
3129 if (*other_bb == NULL || !other_edge_seen)
3130 return false;
3132 else if (single_succ (bb) != *other_bb)
3133 return false;
3135 /* Now check all PHIs of *OTHER_BB. */
3136 e = find_edge (bb, *other_bb);
3137 e2 = find_edge (test_bb, *other_bb);
3138 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3140 gphi *phi = gsi.phi ();
3141 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3142 corresponding to BB and TEST_BB predecessor must be the same. */
3143 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3144 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3146 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3147 one of the PHIs should have the lhs of the last stmt in
3148 that block as PHI arg and that PHI should have 0 or 1
3149 corresponding to it in all other range test basic blocks
3150 considered. */
3151 if (!is_cond)
3153 if (gimple_phi_arg_def (phi, e->dest_idx)
3154 == gimple_assign_lhs (stmt)
3155 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3156 || integer_onep (gimple_phi_arg_def (phi,
3157 e2->dest_idx))))
3158 continue;
3160 else
3162 gimple *test_last = last_stmt (test_bb);
3163 if (gimple_code (test_last) != GIMPLE_COND
3164 && gimple_phi_arg_def (phi, e2->dest_idx)
3165 == gimple_assign_lhs (test_last)
3166 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3167 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3168 continue;
3171 return false;
3174 return true;
3177 /* Return true if BB doesn't have side-effects that would disallow
3178 range test optimization, all SSA_NAMEs set in the bb are consumed
3179 in the bb and there are no PHIs. */
3181 static bool
3182 no_side_effect_bb (basic_block bb)
3184 gimple_stmt_iterator gsi;
3185 gimple *last;
3187 if (!gimple_seq_empty_p (phi_nodes (bb)))
3188 return false;
3189 last = last_stmt (bb);
3190 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3192 gimple *stmt = gsi_stmt (gsi);
3193 tree lhs;
3194 imm_use_iterator imm_iter;
3195 use_operand_p use_p;
3197 if (is_gimple_debug (stmt))
3198 continue;
3199 if (gimple_has_side_effects (stmt))
3200 return false;
3201 if (stmt == last)
3202 return true;
3203 if (!is_gimple_assign (stmt))
3204 return false;
3205 lhs = gimple_assign_lhs (stmt);
3206 if (TREE_CODE (lhs) != SSA_NAME)
3207 return false;
3208 if (gimple_assign_rhs_could_trap_p (stmt))
3209 return false;
3210 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3212 gimple *use_stmt = USE_STMT (use_p);
3213 if (is_gimple_debug (use_stmt))
3214 continue;
3215 if (gimple_bb (use_stmt) != bb)
3216 return false;
3219 return false;
3222 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3223 return true and fill in *OPS recursively. */
3225 static bool
3226 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3227 struct loop *loop)
3229 gimple *stmt = SSA_NAME_DEF_STMT (var);
3230 tree rhs[2];
3231 int i;
3233 if (!is_reassociable_op (stmt, code, loop))
3234 return false;
3236 rhs[0] = gimple_assign_rhs1 (stmt);
3237 rhs[1] = gimple_assign_rhs2 (stmt);
3238 gimple_set_visited (stmt, true);
3239 for (i = 0; i < 2; i++)
3240 if (TREE_CODE (rhs[i]) == SSA_NAME
3241 && !get_ops (rhs[i], code, ops, loop)
3242 && has_single_use (rhs[i]))
3244 operand_entry *oe = operand_entry_pool.allocate ();
3246 oe->op = rhs[i];
3247 oe->rank = code;
3248 oe->id = 0;
3249 oe->count = 1;
3250 oe->stmt_to_insert = NULL;
3251 ops->safe_push (oe);
3253 return true;
3256 /* Find the ops that were added by get_ops starting from VAR, see if
3257 they were changed during update_range_test and if yes, create new
3258 stmts. */
3260 static tree
3261 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3262 unsigned int *pidx, struct loop *loop)
3264 gimple *stmt = SSA_NAME_DEF_STMT (var);
3265 tree rhs[4];
3266 int i;
3268 if (!is_reassociable_op (stmt, code, loop))
3269 return NULL;
3271 rhs[0] = gimple_assign_rhs1 (stmt);
3272 rhs[1] = gimple_assign_rhs2 (stmt);
3273 rhs[2] = rhs[0];
3274 rhs[3] = rhs[1];
3275 for (i = 0; i < 2; i++)
3276 if (TREE_CODE (rhs[i]) == SSA_NAME)
3278 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3279 if (rhs[2 + i] == NULL_TREE)
3281 if (has_single_use (rhs[i]))
3282 rhs[2 + i] = ops[(*pidx)++]->op;
3283 else
3284 rhs[2 + i] = rhs[i];
3287 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3288 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3290 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3291 var = make_ssa_name (TREE_TYPE (var));
3292 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3293 rhs[2], rhs[3]);
3294 gimple_set_uid (g, gimple_uid (stmt));
3295 gimple_set_visited (g, true);
3296 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3298 return var;
3301 /* Structure to track the initial value passed to get_ops and
3302 the range in the ops vector for each basic block. */
3304 struct inter_bb_range_test_entry
3306 tree op;
3307 unsigned int first_idx, last_idx;
3310 /* Inter-bb range test optimization.
3312 Returns TRUE if a gimple conditional is optimized to a true/false,
3313 otherwise return FALSE.
3315 This indicates to the caller that it should run a CFG cleanup pass
3316 once reassociation is completed. */
3318 static bool
3319 maybe_optimize_range_tests (gimple *stmt)
3321 basic_block first_bb = gimple_bb (stmt);
3322 basic_block last_bb = first_bb;
3323 basic_block other_bb = NULL;
3324 basic_block bb;
3325 edge_iterator ei;
3326 edge e;
3327 auto_vec<operand_entry *> ops;
3328 auto_vec<inter_bb_range_test_entry> bbinfo;
3329 bool any_changes = false;
3330 bool cfg_cleanup_needed = false;
3332 /* Consider only basic blocks that end with GIMPLE_COND or
3333 a cast statement satisfying final_range_test_p. All
3334 but the last bb in the first_bb .. last_bb range
3335 should end with GIMPLE_COND. */
3336 if (gimple_code (stmt) == GIMPLE_COND)
3338 if (EDGE_COUNT (first_bb->succs) != 2)
3339 return cfg_cleanup_needed;
3341 else if (final_range_test_p (stmt))
3342 other_bb = single_succ (first_bb);
3343 else
3344 return cfg_cleanup_needed;
3346 if (stmt_could_throw_p (stmt))
3347 return cfg_cleanup_needed;
3349 /* As relative ordering of post-dominator sons isn't fixed,
3350 maybe_optimize_range_tests can be called first on any
3351 bb in the range we want to optimize. So, start searching
3352 backwards, if first_bb can be set to a predecessor. */
3353 while (single_pred_p (first_bb))
3355 basic_block pred_bb = single_pred (first_bb);
3356 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3357 break;
3358 if (!no_side_effect_bb (first_bb))
3359 break;
3360 first_bb = pred_bb;
3362 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3363 Before starting forward search in last_bb successors, find
3364 out the other_bb. */
3365 if (first_bb == last_bb)
3367 other_bb = NULL;
3368 /* As non-GIMPLE_COND last stmt always terminates the range,
3369 if forward search didn't discover anything, just give up. */
3370 if (gimple_code (stmt) != GIMPLE_COND)
3371 return cfg_cleanup_needed;
3372 /* Look at both successors. Either it ends with a GIMPLE_COND
3373 and satisfies suitable_cond_bb, or ends with a cast and
3374 other_bb is that cast's successor. */
3375 FOR_EACH_EDGE (e, ei, first_bb->succs)
3376 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3377 || e->dest == first_bb)
3378 return cfg_cleanup_needed;
3379 else if (single_pred_p (e->dest))
3381 stmt = last_stmt (e->dest);
3382 if (stmt
3383 && gimple_code (stmt) == GIMPLE_COND
3384 && EDGE_COUNT (e->dest->succs) == 2)
3386 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3387 break;
3388 else
3389 other_bb = NULL;
3391 else if (stmt
3392 && final_range_test_p (stmt)
3393 && find_edge (first_bb, single_succ (e->dest)))
3395 other_bb = single_succ (e->dest);
3396 if (other_bb == first_bb)
3397 other_bb = NULL;
3400 if (other_bb == NULL)
3401 return cfg_cleanup_needed;
3403 /* Now do the forward search, moving last_bb to successor bbs
3404 that aren't other_bb. */
3405 while (EDGE_COUNT (last_bb->succs) == 2)
3407 FOR_EACH_EDGE (e, ei, last_bb->succs)
3408 if (e->dest != other_bb)
3409 break;
3410 if (e == NULL)
3411 break;
3412 if (!single_pred_p (e->dest))
3413 break;
3414 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3415 break;
3416 if (!no_side_effect_bb (e->dest))
3417 break;
3418 last_bb = e->dest;
3420 if (first_bb == last_bb)
3421 return cfg_cleanup_needed;
3422 /* Here basic blocks first_bb through last_bb's predecessor
3423 end with GIMPLE_COND, all of them have one of the edges to
3424 other_bb and another to another block in the range,
3425 all blocks except first_bb don't have side-effects and
3426 last_bb ends with either GIMPLE_COND, or cast satisfying
3427 final_range_test_p. */
3428 for (bb = last_bb; ; bb = single_pred (bb))
3430 enum tree_code code;
3431 tree lhs, rhs;
3432 inter_bb_range_test_entry bb_ent;
3434 bb_ent.op = NULL_TREE;
3435 bb_ent.first_idx = ops.length ();
3436 bb_ent.last_idx = bb_ent.first_idx;
3437 e = find_edge (bb, other_bb);
3438 stmt = last_stmt (bb);
3439 gimple_set_visited (stmt, true);
3440 if (gimple_code (stmt) != GIMPLE_COND)
3442 use_operand_p use_p;
3443 gimple *phi;
3444 edge e2;
3445 unsigned int d;
3447 lhs = gimple_assign_lhs (stmt);
3448 rhs = gimple_assign_rhs1 (stmt);
3449 gcc_assert (bb == last_bb);
3451 /* stmt is
3452 _123 = (int) _234;
3454 followed by:
3455 <bb M>:
3456 # _345 = PHI <_123(N), 1(...), 1(...)>
3458 or 0 instead of 1. If it is 0, the _234
3459 range test is anded together with all the
3460 other range tests, if it is 1, it is ored with
3461 them. */
3462 single_imm_use (lhs, &use_p, &phi);
3463 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3464 e2 = find_edge (first_bb, other_bb);
3465 d = e2->dest_idx;
3466 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3467 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3468 code = BIT_AND_EXPR;
3469 else
3471 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3472 code = BIT_IOR_EXPR;
3475 /* If _234 SSA_NAME_DEF_STMT is
3476 _234 = _567 | _789;
3477 (or &, corresponding to 1/0 in the phi arguments,
3478 push into ops the individual range test arguments
3479 of the bitwise or resp. and, recursively. */
3480 if (!get_ops (rhs, code, &ops,
3481 loop_containing_stmt (stmt))
3482 && has_single_use (rhs))
3484 /* Otherwise, push the _234 range test itself. */
3485 operand_entry *oe = operand_entry_pool.allocate ();
3487 oe->op = rhs;
3488 oe->rank = code;
3489 oe->id = 0;
3490 oe->count = 1;
3491 oe->stmt_to_insert = NULL;
3492 ops.safe_push (oe);
3493 bb_ent.last_idx++;
3495 else
3496 bb_ent.last_idx = ops.length ();
3497 bb_ent.op = rhs;
3498 bbinfo.safe_push (bb_ent);
3499 continue;
3501 /* Otherwise stmt is GIMPLE_COND. */
3502 code = gimple_cond_code (stmt);
3503 lhs = gimple_cond_lhs (stmt);
3504 rhs = gimple_cond_rhs (stmt);
3505 if (TREE_CODE (lhs) == SSA_NAME
3506 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3507 && ((code != EQ_EXPR && code != NE_EXPR)
3508 || rhs != boolean_false_node
3509 /* Either push into ops the individual bitwise
3510 or resp. and operands, depending on which
3511 edge is other_bb. */
3512 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3513 ^ (code == EQ_EXPR))
3514 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3515 loop_containing_stmt (stmt))))
3517 /* Or push the GIMPLE_COND stmt itself. */
3518 operand_entry *oe = operand_entry_pool.allocate ();
3520 oe->op = NULL;
3521 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3522 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3523 /* oe->op = NULL signs that there is no SSA_NAME
3524 for the range test, and oe->id instead is the
3525 basic block number, at which's end the GIMPLE_COND
3526 is. */
3527 oe->id = bb->index;
3528 oe->count = 1;
3529 oe->stmt_to_insert = NULL;
3530 ops.safe_push (oe);
3531 bb_ent.op = NULL;
3532 bb_ent.last_idx++;
3534 else if (ops.length () > bb_ent.first_idx)
3536 bb_ent.op = lhs;
3537 bb_ent.last_idx = ops.length ();
3539 bbinfo.safe_push (bb_ent);
3540 if (bb == first_bb)
3541 break;
3543 if (ops.length () > 1)
3544 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3545 if (any_changes)
3547 unsigned int idx, max_idx = 0;
3548 /* update_ops relies on has_single_use predicates returning the
3549 same values as it did during get_ops earlier. Additionally it
3550 never removes statements, only adds new ones and it should walk
3551 from the single imm use and check the predicate already before
3552 making those changes.
3553 On the other side, the handling of GIMPLE_COND directly can turn
3554 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3555 it needs to be done in a separate loop afterwards. */
3556 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3558 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3559 && bbinfo[idx].op != NULL_TREE)
3561 tree new_op;
3563 max_idx = idx;
3564 stmt = last_stmt (bb);
3565 new_op = update_ops (bbinfo[idx].op,
3566 (enum tree_code)
3567 ops[bbinfo[idx].first_idx]->rank,
3568 ops, &bbinfo[idx].first_idx,
3569 loop_containing_stmt (stmt));
3570 if (new_op == NULL_TREE)
3572 gcc_assert (bb == last_bb);
3573 new_op = ops[bbinfo[idx].first_idx++]->op;
3575 if (bbinfo[idx].op != new_op)
3577 imm_use_iterator iter;
3578 use_operand_p use_p;
3579 gimple *use_stmt, *cast_stmt = NULL;
3581 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3582 if (is_gimple_debug (use_stmt))
3583 continue;
3584 else if (gimple_code (use_stmt) == GIMPLE_COND
3585 || gimple_code (use_stmt) == GIMPLE_PHI)
3586 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3587 SET_USE (use_p, new_op);
3588 else if (gimple_assign_cast_p (use_stmt))
3589 cast_stmt = use_stmt;
3590 else
3591 gcc_unreachable ();
3592 if (cast_stmt)
3594 gcc_assert (bb == last_bb);
3595 tree lhs = gimple_assign_lhs (cast_stmt);
3596 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3597 enum tree_code rhs_code
3598 = gimple_assign_rhs_code (cast_stmt);
3599 gassign *g;
3600 if (is_gimple_min_invariant (new_op))
3602 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3603 g = gimple_build_assign (new_lhs, new_op);
3605 else
3606 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3607 gimple_stmt_iterator gsi = gsi_for_stmt (cast_stmt);
3608 gimple_set_uid (g, gimple_uid (cast_stmt));
3609 gimple_set_visited (g, true);
3610 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3611 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3612 if (is_gimple_debug (use_stmt))
3613 continue;
3614 else if (gimple_code (use_stmt) == GIMPLE_COND
3615 || gimple_code (use_stmt) == GIMPLE_PHI)
3616 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3617 SET_USE (use_p, new_lhs);
3618 else
3619 gcc_unreachable ();
3623 if (bb == first_bb)
3624 break;
3626 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3628 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3629 && bbinfo[idx].op == NULL_TREE
3630 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3632 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3634 if (idx > max_idx)
3635 max_idx = idx;
3637 /* If we collapse the conditional to a true/false
3638 condition, then bubble that knowledge up to our caller. */
3639 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3641 gimple_cond_make_false (cond_stmt);
3642 cfg_cleanup_needed = true;
3644 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3646 gimple_cond_make_true (cond_stmt);
3647 cfg_cleanup_needed = true;
3649 else
3651 gimple_cond_set_code (cond_stmt, NE_EXPR);
3652 gimple_cond_set_lhs (cond_stmt,
3653 ops[bbinfo[idx].first_idx]->op);
3654 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3656 update_stmt (cond_stmt);
3658 if (bb == first_bb)
3659 break;
3662 /* The above changes could result in basic blocks after the first
3663 modified one, up to and including last_bb, to be executed even if
3664 they would not be in the original program. If the value ranges of
3665 assignment lhs' in those bbs were dependent on the conditions
3666 guarding those basic blocks which now can change, the VRs might
3667 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
3668 are only used within the same bb, it should be not a big deal if
3669 we just reset all the VRs in those bbs. See PR68671. */
3670 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
3671 reset_flow_sensitive_info_in_bb (bb);
3673 return cfg_cleanup_needed;
3676 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3677 of STMT in it's operands. This is also known as a "destructive
3678 update" operation. */
3680 static bool
3681 is_phi_for_stmt (gimple *stmt, tree operand)
3683 gimple *def_stmt;
3684 gphi *def_phi;
3685 tree lhs;
3686 use_operand_p arg_p;
3687 ssa_op_iter i;
3689 if (TREE_CODE (operand) != SSA_NAME)
3690 return false;
3692 lhs = gimple_assign_lhs (stmt);
3694 def_stmt = SSA_NAME_DEF_STMT (operand);
3695 def_phi = dyn_cast <gphi *> (def_stmt);
3696 if (!def_phi)
3697 return false;
3699 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3700 if (lhs == USE_FROM_PTR (arg_p))
3701 return true;
3702 return false;
3705 /* Remove def stmt of VAR if VAR has zero uses and recurse
3706 on rhs1 operand if so. */
3708 static void
3709 remove_visited_stmt_chain (tree var)
3711 gimple *stmt;
3712 gimple_stmt_iterator gsi;
3714 while (1)
3716 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3717 return;
3718 stmt = SSA_NAME_DEF_STMT (var);
3719 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3721 var = gimple_assign_rhs1 (stmt);
3722 gsi = gsi_for_stmt (stmt);
3723 reassoc_remove_stmt (&gsi);
3724 release_defs (stmt);
3726 else
3727 return;
3731 /* This function checks three consequtive operands in
3732 passed operands vector OPS starting from OPINDEX and
3733 swaps two operands if it is profitable for binary operation
3734 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3736 We pair ops with the same rank if possible.
3738 The alternative we try is to see if STMT is a destructive
3739 update style statement, which is like:
3740 b = phi (a, ...)
3741 a = c + b;
3742 In that case, we want to use the destructive update form to
3743 expose the possible vectorizer sum reduction opportunity.
3744 In that case, the third operand will be the phi node. This
3745 check is not performed if STMT is null.
3747 We could, of course, try to be better as noted above, and do a
3748 lot of work to try to find these opportunities in >3 operand
3749 cases, but it is unlikely to be worth it. */
3751 static void
3752 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3753 unsigned int opindex, gimple *stmt)
3755 operand_entry *oe1, *oe2, *oe3;
3757 oe1 = ops[opindex];
3758 oe2 = ops[opindex + 1];
3759 oe3 = ops[opindex + 2];
3761 if ((oe1->rank == oe2->rank
3762 && oe2->rank != oe3->rank)
3763 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3764 && !is_phi_for_stmt (stmt, oe1->op)
3765 && !is_phi_for_stmt (stmt, oe2->op)))
3767 operand_entry temp = *oe3;
3768 oe3->op = oe1->op;
3769 oe3->rank = oe1->rank;
3770 oe1->op = temp.op;
3771 oe1->rank= temp.rank;
3773 else if ((oe1->rank == oe3->rank
3774 && oe2->rank != oe3->rank)
3775 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3776 && !is_phi_for_stmt (stmt, oe1->op)
3777 && !is_phi_for_stmt (stmt, oe3->op)))
3779 operand_entry temp = *oe2;
3780 oe2->op = oe1->op;
3781 oe2->rank = oe1->rank;
3782 oe1->op = temp.op;
3783 oe1->rank = temp.rank;
3787 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3788 two definitions, otherwise return STMT. */
3790 static inline gimple *
3791 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3793 if (TREE_CODE (rhs1) == SSA_NAME
3794 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3795 stmt = SSA_NAME_DEF_STMT (rhs1);
3796 if (TREE_CODE (rhs2) == SSA_NAME
3797 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3798 stmt = SSA_NAME_DEF_STMT (rhs2);
3799 return stmt;
3802 /* Recursively rewrite our linearized statements so that the operators
3803 match those in OPS[OPINDEX], putting the computation in rank
3804 order. Return new lhs. */
3806 static tree
3807 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3808 vec<operand_entry *> ops, bool changed)
3810 tree rhs1 = gimple_assign_rhs1 (stmt);
3811 tree rhs2 = gimple_assign_rhs2 (stmt);
3812 tree lhs = gimple_assign_lhs (stmt);
3813 operand_entry *oe;
3815 /* The final recursion case for this function is that you have
3816 exactly two operations left.
3817 If we had exactly one op in the entire list to start with, we
3818 would have never called this function, and the tail recursion
3819 rewrites them one at a time. */
3820 if (opindex + 2 == ops.length ())
3822 operand_entry *oe1, *oe2;
3824 oe1 = ops[opindex];
3825 oe2 = ops[opindex + 1];
3827 if (rhs1 != oe1->op || rhs2 != oe2->op)
3829 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3830 unsigned int uid = gimple_uid (stmt);
3832 if (dump_file && (dump_flags & TDF_DETAILS))
3834 fprintf (dump_file, "Transforming ");
3835 print_gimple_stmt (dump_file, stmt, 0, 0);
3838 /* Even when changed is false, reassociation could have e.g. removed
3839 some redundant operations, so unless we are just swapping the
3840 arguments or unless there is no change at all (then we just
3841 return lhs), force creation of a new SSA_NAME. */
3842 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3844 gimple *insert_point
3845 = find_insert_point (stmt, oe1->op, oe2->op);
3846 /* If the stmt that defines operand has to be inserted, insert it
3847 before the use. */
3848 if (oe1->stmt_to_insert)
3849 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
3850 if (oe2->stmt_to_insert)
3851 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
3852 lhs = make_ssa_name (TREE_TYPE (lhs));
3853 stmt
3854 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3855 oe1->op, oe2->op);
3856 gimple_set_uid (stmt, uid);
3857 gimple_set_visited (stmt, true);
3858 if (insert_point == gsi_stmt (gsi))
3859 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3860 else
3861 insert_stmt_after (stmt, insert_point);
3863 else
3865 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3866 == stmt);
3867 /* If the stmt that defines operand has to be inserted, insert it
3868 before the use. */
3869 if (oe1->stmt_to_insert)
3870 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
3871 if (oe2->stmt_to_insert)
3872 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
3873 gimple_assign_set_rhs1 (stmt, oe1->op);
3874 gimple_assign_set_rhs2 (stmt, oe2->op);
3875 update_stmt (stmt);
3878 if (rhs1 != oe1->op && rhs1 != oe2->op)
3879 remove_visited_stmt_chain (rhs1);
3881 if (dump_file && (dump_flags & TDF_DETAILS))
3883 fprintf (dump_file, " into ");
3884 print_gimple_stmt (dump_file, stmt, 0, 0);
3887 return lhs;
3890 /* If we hit here, we should have 3 or more ops left. */
3891 gcc_assert (opindex + 2 < ops.length ());
3893 /* Rewrite the next operator. */
3894 oe = ops[opindex];
3896 /* If the stmt that defines operand has to be inserted, insert it
3897 before the use. */
3898 if (oe->stmt_to_insert)
3899 insert_stmt_before_use (stmt, oe->stmt_to_insert);
3901 /* Recurse on the LHS of the binary operator, which is guaranteed to
3902 be the non-leaf side. */
3903 tree new_rhs1
3904 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3905 changed || oe->op != rhs2);
3907 if (oe->op != rhs2 || new_rhs1 != rhs1)
3909 if (dump_file && (dump_flags & TDF_DETAILS))
3911 fprintf (dump_file, "Transforming ");
3912 print_gimple_stmt (dump_file, stmt, 0, 0);
3915 /* If changed is false, this is either opindex == 0
3916 or all outer rhs2's were equal to corresponding oe->op,
3917 and powi_result is NULL.
3918 That means lhs is equivalent before and after reassociation.
3919 Otherwise ensure the old lhs SSA_NAME is not reused and
3920 create a new stmt as well, so that any debug stmts will be
3921 properly adjusted. */
3922 if (changed)
3924 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3925 unsigned int uid = gimple_uid (stmt);
3926 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
3928 lhs = make_ssa_name (TREE_TYPE (lhs));
3929 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3930 new_rhs1, oe->op);
3931 gimple_set_uid (stmt, uid);
3932 gimple_set_visited (stmt, true);
3933 if (insert_point == gsi_stmt (gsi))
3934 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3935 else
3936 insert_stmt_after (stmt, insert_point);
3938 else
3940 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
3941 == stmt);
3942 gimple_assign_set_rhs1 (stmt, new_rhs1);
3943 gimple_assign_set_rhs2 (stmt, oe->op);
3944 update_stmt (stmt);
3947 if (dump_file && (dump_flags & TDF_DETAILS))
3949 fprintf (dump_file, " into ");
3950 print_gimple_stmt (dump_file, stmt, 0, 0);
3953 return lhs;
3956 /* Find out how many cycles we need to compute statements chain.
3957 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
3958 maximum number of independent statements we may execute per cycle. */
3960 static int
3961 get_required_cycles (int ops_num, int cpu_width)
3963 int res;
3964 int elog;
3965 unsigned int rest;
3967 /* While we have more than 2 * cpu_width operands
3968 we may reduce number of operands by cpu_width
3969 per cycle. */
3970 res = ops_num / (2 * cpu_width);
3972 /* Remained operands count may be reduced twice per cycle
3973 until we have only one operand. */
3974 rest = (unsigned)(ops_num - res * cpu_width);
3975 elog = exact_log2 (rest);
3976 if (elog >= 0)
3977 res += elog;
3978 else
3979 res += floor_log2 (rest) + 1;
3981 return res;
3984 /* Returns an optimal number of registers to use for computation of
3985 given statements. */
3987 static int
3988 get_reassociation_width (int ops_num, enum tree_code opc,
3989 machine_mode mode)
3991 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
3992 int width;
3993 int width_min;
3994 int cycles_best;
3996 if (param_width > 0)
3997 width = param_width;
3998 else
3999 width = targetm.sched.reassociation_width (opc, mode);
4001 if (width == 1)
4002 return width;
4004 /* Get the minimal time required for sequence computation. */
4005 cycles_best = get_required_cycles (ops_num, width);
4007 /* Check if we may use less width and still compute sequence for
4008 the same time. It will allow us to reduce registers usage.
4009 get_required_cycles is monotonically increasing with lower width
4010 so we can perform a binary search for the minimal width that still
4011 results in the optimal cycle count. */
4012 width_min = 1;
4013 while (width > width_min)
4015 int width_mid = (width + width_min) / 2;
4017 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4018 width = width_mid;
4019 else if (width_min < width_mid)
4020 width_min = width_mid;
4021 else
4022 break;
4025 return width;
4028 /* Recursively rewrite our linearized statements so that the operators
4029 match those in OPS[OPINDEX], putting the computation in rank
4030 order and trying to allow operations to be executed in
4031 parallel. */
4033 static void
4034 rewrite_expr_tree_parallel (gassign *stmt, int width,
4035 vec<operand_entry *> ops)
4037 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4038 int op_num = ops.length ();
4039 int stmt_num = op_num - 1;
4040 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4041 int op_index = op_num - 1;
4042 int stmt_index = 0;
4043 int ready_stmts_end = 0;
4044 int i = 0;
4045 gimple *stmt1 = NULL, *stmt2 = NULL;
4046 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4048 /* We start expression rewriting from the top statements.
4049 So, in this loop we create a full list of statements
4050 we will work with. */
4051 stmts[stmt_num - 1] = stmt;
4052 for (i = stmt_num - 2; i >= 0; i--)
4053 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4055 for (i = 0; i < stmt_num; i++)
4057 tree op1, op2;
4059 /* Determine whether we should use results of
4060 already handled statements or not. */
4061 if (ready_stmts_end == 0
4062 && (i - stmt_index >= width || op_index < 1))
4063 ready_stmts_end = i;
4065 /* Now we choose operands for the next statement. Non zero
4066 value in ready_stmts_end means here that we should use
4067 the result of already generated statements as new operand. */
4068 if (ready_stmts_end > 0)
4070 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4071 if (ready_stmts_end > stmt_index)
4072 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4073 else if (op_index >= 0)
4075 operand_entry *oe = ops[op_index--];
4076 stmt2 = oe->stmt_to_insert;
4077 op2 = oe->op;
4079 else
4081 gcc_assert (stmt_index < i);
4082 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4085 if (stmt_index >= ready_stmts_end)
4086 ready_stmts_end = 0;
4088 else
4090 if (op_index > 1)
4091 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4092 operand_entry *oe2 = ops[op_index--];
4093 operand_entry *oe1 = ops[op_index--];
4094 op2 = oe2->op;
4095 stmt2 = oe2->stmt_to_insert;
4096 op1 = oe1->op;
4097 stmt1 = oe1->stmt_to_insert;
4100 /* If we emit the last statement then we should put
4101 operands into the last statement. It will also
4102 break the loop. */
4103 if (op_index < 0 && stmt_index == i)
4104 i = stmt_num - 1;
4106 if (dump_file && (dump_flags & TDF_DETAILS))
4108 fprintf (dump_file, "Transforming ");
4109 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4112 /* We keep original statement only for the last one. All
4113 others are recreated. */
4114 if (i == stmt_num - 1)
4116 /* If the stmt that defines operand has to be inserted, insert it
4117 before the use. */
4118 if (stmt1)
4119 insert_stmt_before_use (stmts[i], stmt1);
4120 if (stmt2)
4121 insert_stmt_before_use (stmts[i], stmt2);
4122 gimple_assign_set_rhs1 (stmts[i], op1);
4123 gimple_assign_set_rhs2 (stmts[i], op2);
4124 update_stmt (stmts[i]);
4126 else
4128 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4129 /* If the stmt that defines operand has to be inserted, insert it
4130 before new build_and_add stmt after it is created. */
4131 if (stmt1)
4132 insert_stmt_before_use (stmts[i], stmt1);
4133 if (stmt2)
4134 insert_stmt_before_use (stmts[i], stmt2);
4136 if (dump_file && (dump_flags & TDF_DETAILS))
4138 fprintf (dump_file, " into ");
4139 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4143 remove_visited_stmt_chain (last_rhs1);
4146 /* Transform STMT, which is really (A +B) + (C + D) into the left
4147 linear form, ((A+B)+C)+D.
4148 Recurse on D if necessary. */
4150 static void
4151 linearize_expr (gimple *stmt)
4153 gimple_stmt_iterator gsi;
4154 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4155 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4156 gimple *oldbinrhs = binrhs;
4157 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4158 gimple *newbinrhs = NULL;
4159 struct loop *loop = loop_containing_stmt (stmt);
4160 tree lhs = gimple_assign_lhs (stmt);
4162 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4163 && is_reassociable_op (binrhs, rhscode, loop));
4165 gsi = gsi_for_stmt (stmt);
4167 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4168 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4169 gimple_assign_rhs_code (binrhs),
4170 gimple_assign_lhs (binlhs),
4171 gimple_assign_rhs2 (binrhs));
4172 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4173 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4174 gimple_set_uid (binrhs, gimple_uid (stmt));
4176 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4177 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4179 if (dump_file && (dump_flags & TDF_DETAILS))
4181 fprintf (dump_file, "Linearized: ");
4182 print_gimple_stmt (dump_file, stmt, 0, 0);
4185 reassociate_stats.linearized++;
4186 update_stmt (stmt);
4188 gsi = gsi_for_stmt (oldbinrhs);
4189 reassoc_remove_stmt (&gsi);
4190 release_defs (oldbinrhs);
4192 gimple_set_visited (stmt, true);
4193 gimple_set_visited (binlhs, true);
4194 gimple_set_visited (binrhs, true);
4196 /* Tail recurse on the new rhs if it still needs reassociation. */
4197 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4198 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4199 want to change the algorithm while converting to tuples. */
4200 linearize_expr (stmt);
4203 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4204 it. Otherwise, return NULL. */
4206 static gimple *
4207 get_single_immediate_use (tree lhs)
4209 use_operand_p immuse;
4210 gimple *immusestmt;
4212 if (TREE_CODE (lhs) == SSA_NAME
4213 && single_imm_use (lhs, &immuse, &immusestmt)
4214 && is_gimple_assign (immusestmt))
4215 return immusestmt;
4217 return NULL;
4220 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4221 representing the negated value. Insertions of any necessary
4222 instructions go before GSI.
4223 This function is recursive in that, if you hand it "a_5" as the
4224 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4225 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4227 static tree
4228 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4230 gimple *negatedefstmt = NULL;
4231 tree resultofnegate;
4232 gimple_stmt_iterator gsi;
4233 unsigned int uid;
4235 /* If we are trying to negate a name, defined by an add, negate the
4236 add operands instead. */
4237 if (TREE_CODE (tonegate) == SSA_NAME)
4238 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4239 if (TREE_CODE (tonegate) == SSA_NAME
4240 && is_gimple_assign (negatedefstmt)
4241 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4242 && has_single_use (gimple_assign_lhs (negatedefstmt))
4243 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4245 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4246 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4247 tree lhs = gimple_assign_lhs (negatedefstmt);
4248 gimple *g;
4250 gsi = gsi_for_stmt (negatedefstmt);
4251 rhs1 = negate_value (rhs1, &gsi);
4253 gsi = gsi_for_stmt (negatedefstmt);
4254 rhs2 = negate_value (rhs2, &gsi);
4256 gsi = gsi_for_stmt (negatedefstmt);
4257 lhs = make_ssa_name (TREE_TYPE (lhs));
4258 gimple_set_visited (negatedefstmt, true);
4259 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4260 gimple_set_uid (g, gimple_uid (negatedefstmt));
4261 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4262 return lhs;
4265 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4266 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4267 NULL_TREE, true, GSI_SAME_STMT);
4268 gsi = *gsip;
4269 uid = gimple_uid (gsi_stmt (gsi));
4270 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4272 gimple *stmt = gsi_stmt (gsi);
4273 if (gimple_uid (stmt) != 0)
4274 break;
4275 gimple_set_uid (stmt, uid);
4277 return resultofnegate;
4280 /* Return true if we should break up the subtract in STMT into an add
4281 with negate. This is true when we the subtract operands are really
4282 adds, or the subtract itself is used in an add expression. In
4283 either case, breaking up the subtract into an add with negate
4284 exposes the adds to reassociation. */
4286 static bool
4287 should_break_up_subtract (gimple *stmt)
4289 tree lhs = gimple_assign_lhs (stmt);
4290 tree binlhs = gimple_assign_rhs1 (stmt);
4291 tree binrhs = gimple_assign_rhs2 (stmt);
4292 gimple *immusestmt;
4293 struct loop *loop = loop_containing_stmt (stmt);
4295 if (TREE_CODE (binlhs) == SSA_NAME
4296 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4297 return true;
4299 if (TREE_CODE (binrhs) == SSA_NAME
4300 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4301 return true;
4303 if (TREE_CODE (lhs) == SSA_NAME
4304 && (immusestmt = get_single_immediate_use (lhs))
4305 && is_gimple_assign (immusestmt)
4306 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4307 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4308 return true;
4309 return false;
4312 /* Transform STMT from A - B into A + -B. */
4314 static void
4315 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4317 tree rhs1 = gimple_assign_rhs1 (stmt);
4318 tree rhs2 = gimple_assign_rhs2 (stmt);
4320 if (dump_file && (dump_flags & TDF_DETAILS))
4322 fprintf (dump_file, "Breaking up subtract ");
4323 print_gimple_stmt (dump_file, stmt, 0, 0);
4326 rhs2 = negate_value (rhs2, gsip);
4327 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4328 update_stmt (stmt);
4331 /* Determine whether STMT is a builtin call that raises an SSA name
4332 to an integer power and has only one use. If so, and this is early
4333 reassociation and unsafe math optimizations are permitted, place
4334 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4335 If any of these conditions does not hold, return FALSE. */
4337 static bool
4338 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4340 tree arg1;
4341 REAL_VALUE_TYPE c, cint;
4343 switch (gimple_call_combined_fn (stmt))
4345 CASE_CFN_POW:
4346 if (flag_errno_math)
4347 return false;
4349 *base = gimple_call_arg (stmt, 0);
4350 arg1 = gimple_call_arg (stmt, 1);
4352 if (TREE_CODE (arg1) != REAL_CST)
4353 return false;
4355 c = TREE_REAL_CST (arg1);
4357 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4358 return false;
4360 *exponent = real_to_integer (&c);
4361 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4362 if (!real_identical (&c, &cint))
4363 return false;
4365 break;
4367 CASE_CFN_POWI:
4368 *base = gimple_call_arg (stmt, 0);
4369 arg1 = gimple_call_arg (stmt, 1);
4371 if (!tree_fits_shwi_p (arg1))
4372 return false;
4374 *exponent = tree_to_shwi (arg1);
4375 break;
4377 default:
4378 return false;
4381 /* Expanding negative exponents is generally unproductive, so we don't
4382 complicate matters with those. Exponents of zero and one should
4383 have been handled by expression folding. */
4384 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4385 return false;
4387 return true;
4390 /* Try to derive and add operand entry for OP to *OPS. Return false if
4391 unsuccessful. */
4393 static bool
4394 try_special_add_to_ops (vec<operand_entry *> *ops,
4395 enum tree_code code,
4396 tree op, gimple* def_stmt)
4398 tree base = NULL_TREE;
4399 HOST_WIDE_INT exponent = 0;
4401 if (TREE_CODE (op) != SSA_NAME
4402 || ! has_single_use (op))
4403 return false;
4405 if (code == MULT_EXPR
4406 && reassoc_insert_powi_p
4407 && flag_unsafe_math_optimizations
4408 && is_gimple_call (def_stmt)
4409 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4411 add_repeat_to_ops_vec (ops, base, exponent);
4412 gimple_set_visited (def_stmt, true);
4413 return true;
4415 else if (code == MULT_EXPR
4416 && is_gimple_assign (def_stmt)
4417 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4418 && !HONOR_SNANS (TREE_TYPE (op))
4419 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4420 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4422 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4423 tree cst = build_minus_one_cst (TREE_TYPE (op));
4424 add_to_ops_vec (ops, rhs1);
4425 add_to_ops_vec (ops, cst);
4426 gimple_set_visited (def_stmt, true);
4427 return true;
4430 return false;
4433 /* Recursively linearize a binary expression that is the RHS of STMT.
4434 Place the operands of the expression tree in the vector named OPS. */
4436 static void
4437 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4438 bool is_associative, bool set_visited)
4440 tree binlhs = gimple_assign_rhs1 (stmt);
4441 tree binrhs = gimple_assign_rhs2 (stmt);
4442 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4443 bool binlhsisreassoc = false;
4444 bool binrhsisreassoc = false;
4445 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4446 struct loop *loop = loop_containing_stmt (stmt);
4448 if (set_visited)
4449 gimple_set_visited (stmt, true);
4451 if (TREE_CODE (binlhs) == SSA_NAME)
4453 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4454 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4455 && !stmt_could_throw_p (binlhsdef));
4458 if (TREE_CODE (binrhs) == SSA_NAME)
4460 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4461 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4462 && !stmt_could_throw_p (binrhsdef));
4465 /* If the LHS is not reassociable, but the RHS is, we need to swap
4466 them. If neither is reassociable, there is nothing we can do, so
4467 just put them in the ops vector. If the LHS is reassociable,
4468 linearize it. If both are reassociable, then linearize the RHS
4469 and the LHS. */
4471 if (!binlhsisreassoc)
4473 /* If this is not a associative operation like division, give up. */
4474 if (!is_associative)
4476 add_to_ops_vec (ops, binrhs);
4477 return;
4480 if (!binrhsisreassoc)
4482 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4483 add_to_ops_vec (ops, binrhs);
4485 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
4486 add_to_ops_vec (ops, binlhs);
4488 return;
4491 if (dump_file && (dump_flags & TDF_DETAILS))
4493 fprintf (dump_file, "swapping operands of ");
4494 print_gimple_stmt (dump_file, stmt, 0, 0);
4497 swap_ssa_operands (stmt,
4498 gimple_assign_rhs1_ptr (stmt),
4499 gimple_assign_rhs2_ptr (stmt));
4500 update_stmt (stmt);
4502 if (dump_file && (dump_flags & TDF_DETAILS))
4504 fprintf (dump_file, " is now ");
4505 print_gimple_stmt (dump_file, stmt, 0, 0);
4508 /* We want to make it so the lhs is always the reassociative op,
4509 so swap. */
4510 std::swap (binlhs, binrhs);
4512 else if (binrhsisreassoc)
4514 linearize_expr (stmt);
4515 binlhs = gimple_assign_rhs1 (stmt);
4516 binrhs = gimple_assign_rhs2 (stmt);
4519 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4520 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4521 rhscode, loop));
4522 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4523 is_associative, set_visited);
4525 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4526 add_to_ops_vec (ops, binrhs);
4529 /* Repropagate the negates back into subtracts, since no other pass
4530 currently does it. */
4532 static void
4533 repropagate_negates (void)
4535 unsigned int i = 0;
4536 tree negate;
4538 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4540 gimple *user = get_single_immediate_use (negate);
4542 if (!user || !is_gimple_assign (user))
4543 continue;
4545 /* The negate operand can be either operand of a PLUS_EXPR
4546 (it can be the LHS if the RHS is a constant for example).
4548 Force the negate operand to the RHS of the PLUS_EXPR, then
4549 transform the PLUS_EXPR into a MINUS_EXPR. */
4550 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4552 /* If the negated operand appears on the LHS of the
4553 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4554 to force the negated operand to the RHS of the PLUS_EXPR. */
4555 if (gimple_assign_rhs1 (user) == negate)
4557 swap_ssa_operands (user,
4558 gimple_assign_rhs1_ptr (user),
4559 gimple_assign_rhs2_ptr (user));
4562 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4563 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4564 if (gimple_assign_rhs2 (user) == negate)
4566 tree rhs1 = gimple_assign_rhs1 (user);
4567 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4568 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4569 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4570 update_stmt (user);
4573 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4575 if (gimple_assign_rhs1 (user) == negate)
4577 /* We have
4578 x = -a
4579 y = x - b
4580 which we transform into
4581 x = a + b
4582 y = -x .
4583 This pushes down the negate which we possibly can merge
4584 into some other operation, hence insert it into the
4585 plus_negates vector. */
4586 gimple *feed = SSA_NAME_DEF_STMT (negate);
4587 tree a = gimple_assign_rhs1 (feed);
4588 tree b = gimple_assign_rhs2 (user);
4589 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4590 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4591 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4592 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4593 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4594 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4595 user = gsi_stmt (gsi2);
4596 update_stmt (user);
4597 reassoc_remove_stmt (&gsi);
4598 release_defs (feed);
4599 plus_negates.safe_push (gimple_assign_lhs (user));
4601 else
4603 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4604 rid of one operation. */
4605 gimple *feed = SSA_NAME_DEF_STMT (negate);
4606 tree a = gimple_assign_rhs1 (feed);
4607 tree rhs1 = gimple_assign_rhs1 (user);
4608 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4609 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4610 update_stmt (gsi_stmt (gsi));
4616 /* Returns true if OP is of a type for which we can do reassociation.
4617 That is for integral or non-saturating fixed-point types, and for
4618 floating point type when associative-math is enabled. */
4620 static bool
4621 can_reassociate_p (tree op)
4623 tree type = TREE_TYPE (op);
4624 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4625 || NON_SAT_FIXED_POINT_TYPE_P (type)
4626 || (flag_associative_math && FLOAT_TYPE_P (type)))
4627 return true;
4628 return false;
4631 /* Break up subtract operations in block BB.
4633 We do this top down because we don't know whether the subtract is
4634 part of a possible chain of reassociation except at the top.
4636 IE given
4637 d = f + g
4638 c = a + e
4639 b = c - d
4640 q = b - r
4641 k = t - q
4643 we want to break up k = t - q, but we won't until we've transformed q
4644 = b - r, which won't be broken up until we transform b = c - d.
4646 En passant, clear the GIMPLE visited flag on every statement
4647 and set UIDs within each basic block. */
4649 static void
4650 break_up_subtract_bb (basic_block bb)
4652 gimple_stmt_iterator gsi;
4653 basic_block son;
4654 unsigned int uid = 1;
4656 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4658 gimple *stmt = gsi_stmt (gsi);
4659 gimple_set_visited (stmt, false);
4660 gimple_set_uid (stmt, uid++);
4662 if (!is_gimple_assign (stmt)
4663 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4664 continue;
4666 /* Look for simple gimple subtract operations. */
4667 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4669 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4670 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4671 continue;
4673 /* Check for a subtract used only in an addition. If this
4674 is the case, transform it into add of a negate for better
4675 reassociation. IE transform C = A-B into C = A + -B if C
4676 is only used in an addition. */
4677 if (should_break_up_subtract (stmt))
4678 break_up_subtract (stmt, &gsi);
4680 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4681 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4682 plus_negates.safe_push (gimple_assign_lhs (stmt));
4684 for (son = first_dom_son (CDI_DOMINATORS, bb);
4685 son;
4686 son = next_dom_son (CDI_DOMINATORS, son))
4687 break_up_subtract_bb (son);
4690 /* Used for repeated factor analysis. */
4691 struct repeat_factor
4693 /* An SSA name that occurs in a multiply chain. */
4694 tree factor;
4696 /* Cached rank of the factor. */
4697 unsigned rank;
4699 /* Number of occurrences of the factor in the chain. */
4700 HOST_WIDE_INT count;
4702 /* An SSA name representing the product of this factor and
4703 all factors appearing later in the repeated factor vector. */
4704 tree repr;
4708 static vec<repeat_factor> repeat_factor_vec;
4710 /* Used for sorting the repeat factor vector. Sort primarily by
4711 ascending occurrence count, secondarily by descending rank. */
4713 static int
4714 compare_repeat_factors (const void *x1, const void *x2)
4716 const repeat_factor *rf1 = (const repeat_factor *) x1;
4717 const repeat_factor *rf2 = (const repeat_factor *) x2;
4719 if (rf1->count != rf2->count)
4720 return rf1->count - rf2->count;
4722 return rf2->rank - rf1->rank;
4725 /* Look for repeated operands in OPS in the multiply tree rooted at
4726 STMT. Replace them with an optimal sequence of multiplies and powi
4727 builtin calls, and remove the used operands from OPS. Return an
4728 SSA name representing the value of the replacement sequence. */
4730 static tree
4731 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4733 unsigned i, j, vec_len;
4734 int ii;
4735 operand_entry *oe;
4736 repeat_factor *rf1, *rf2;
4737 repeat_factor rfnew;
4738 tree result = NULL_TREE;
4739 tree target_ssa, iter_result;
4740 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4741 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4742 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4743 gimple *mul_stmt, *pow_stmt;
4745 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4746 target. */
4747 if (!powi_fndecl)
4748 return NULL_TREE;
4750 /* Allocate the repeated factor vector. */
4751 repeat_factor_vec.create (10);
4753 /* Scan the OPS vector for all SSA names in the product and build
4754 up a vector of occurrence counts for each factor. */
4755 FOR_EACH_VEC_ELT (*ops, i, oe)
4757 if (TREE_CODE (oe->op) == SSA_NAME)
4759 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4761 if (rf1->factor == oe->op)
4763 rf1->count += oe->count;
4764 break;
4768 if (j >= repeat_factor_vec.length ())
4770 rfnew.factor = oe->op;
4771 rfnew.rank = oe->rank;
4772 rfnew.count = oe->count;
4773 rfnew.repr = NULL_TREE;
4774 repeat_factor_vec.safe_push (rfnew);
4779 /* Sort the repeated factor vector by (a) increasing occurrence count,
4780 and (b) decreasing rank. */
4781 repeat_factor_vec.qsort (compare_repeat_factors);
4783 /* It is generally best to combine as many base factors as possible
4784 into a product before applying __builtin_powi to the result.
4785 However, the sort order chosen for the repeated factor vector
4786 allows us to cache partial results for the product of the base
4787 factors for subsequent use. When we already have a cached partial
4788 result from a previous iteration, it is best to make use of it
4789 before looking for another __builtin_pow opportunity.
4791 As an example, consider x * x * y * y * y * z * z * z * z.
4792 We want to first compose the product x * y * z, raise it to the
4793 second power, then multiply this by y * z, and finally multiply
4794 by z. This can be done in 5 multiplies provided we cache y * z
4795 for use in both expressions:
4797 t1 = y * z
4798 t2 = t1 * x
4799 t3 = t2 * t2
4800 t4 = t1 * t3
4801 result = t4 * z
4803 If we instead ignored the cached y * z and first multiplied by
4804 the __builtin_pow opportunity z * z, we would get the inferior:
4806 t1 = y * z
4807 t2 = t1 * x
4808 t3 = t2 * t2
4809 t4 = z * z
4810 t5 = t3 * t4
4811 result = t5 * y */
4813 vec_len = repeat_factor_vec.length ();
4815 /* Repeatedly look for opportunities to create a builtin_powi call. */
4816 while (true)
4818 HOST_WIDE_INT power;
4820 /* First look for the largest cached product of factors from
4821 preceding iterations. If found, create a builtin_powi for
4822 it if the minimum occurrence count for its factors is at
4823 least 2, or just use this cached product as our next
4824 multiplicand if the minimum occurrence count is 1. */
4825 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4827 if (rf1->repr && rf1->count > 0)
4828 break;
4831 if (j < vec_len)
4833 power = rf1->count;
4835 if (power == 1)
4837 iter_result = rf1->repr;
4839 if (dump_file && (dump_flags & TDF_DETAILS))
4841 unsigned elt;
4842 repeat_factor *rf;
4843 fputs ("Multiplying by cached product ", dump_file);
4844 for (elt = j; elt < vec_len; elt++)
4846 rf = &repeat_factor_vec[elt];
4847 print_generic_expr (dump_file, rf->factor, 0);
4848 if (elt < vec_len - 1)
4849 fputs (" * ", dump_file);
4851 fputs ("\n", dump_file);
4854 else
4856 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4857 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4858 build_int_cst (integer_type_node,
4859 power));
4860 gimple_call_set_lhs (pow_stmt, iter_result);
4861 gimple_set_location (pow_stmt, gimple_location (stmt));
4862 gimple_set_uid (pow_stmt, gimple_uid (stmt));
4863 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4865 if (dump_file && (dump_flags & TDF_DETAILS))
4867 unsigned elt;
4868 repeat_factor *rf;
4869 fputs ("Building __builtin_pow call for cached product (",
4870 dump_file);
4871 for (elt = j; elt < vec_len; elt++)
4873 rf = &repeat_factor_vec[elt];
4874 print_generic_expr (dump_file, rf->factor, 0);
4875 if (elt < vec_len - 1)
4876 fputs (" * ", dump_file);
4878 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4879 power);
4883 else
4885 /* Otherwise, find the first factor in the repeated factor
4886 vector whose occurrence count is at least 2. If no such
4887 factor exists, there are no builtin_powi opportunities
4888 remaining. */
4889 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4891 if (rf1->count >= 2)
4892 break;
4895 if (j >= vec_len)
4896 break;
4898 power = rf1->count;
4900 if (dump_file && (dump_flags & TDF_DETAILS))
4902 unsigned elt;
4903 repeat_factor *rf;
4904 fputs ("Building __builtin_pow call for (", dump_file);
4905 for (elt = j; elt < vec_len; elt++)
4907 rf = &repeat_factor_vec[elt];
4908 print_generic_expr (dump_file, rf->factor, 0);
4909 if (elt < vec_len - 1)
4910 fputs (" * ", dump_file);
4912 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4915 reassociate_stats.pows_created++;
4917 /* Visit each element of the vector in reverse order (so that
4918 high-occurrence elements are visited first, and within the
4919 same occurrence count, lower-ranked elements are visited
4920 first). Form a linear product of all elements in this order
4921 whose occurrencce count is at least that of element J.
4922 Record the SSA name representing the product of each element
4923 with all subsequent elements in the vector. */
4924 if (j == vec_len - 1)
4925 rf1->repr = rf1->factor;
4926 else
4928 for (ii = vec_len - 2; ii >= (int)j; ii--)
4930 tree op1, op2;
4932 rf1 = &repeat_factor_vec[ii];
4933 rf2 = &repeat_factor_vec[ii + 1];
4935 /* Init the last factor's representative to be itself. */
4936 if (!rf2->repr)
4937 rf2->repr = rf2->factor;
4939 op1 = rf1->factor;
4940 op2 = rf2->repr;
4942 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
4943 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
4944 op1, op2);
4945 gimple_set_location (mul_stmt, gimple_location (stmt));
4946 gimple_set_uid (mul_stmt, gimple_uid (stmt));
4947 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4948 rf1->repr = target_ssa;
4950 /* Don't reprocess the multiply we just introduced. */
4951 gimple_set_visited (mul_stmt, true);
4955 /* Form a call to __builtin_powi for the maximum product
4956 just formed, raised to the power obtained earlier. */
4957 rf1 = &repeat_factor_vec[j];
4958 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4959 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4960 build_int_cst (integer_type_node,
4961 power));
4962 gimple_call_set_lhs (pow_stmt, iter_result);
4963 gimple_set_location (pow_stmt, gimple_location (stmt));
4964 gimple_set_uid (pow_stmt, gimple_uid (stmt));
4965 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4968 /* If we previously formed at least one other builtin_powi call,
4969 form the product of this one and those others. */
4970 if (result)
4972 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
4973 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
4974 result, iter_result);
4975 gimple_set_location (mul_stmt, gimple_location (stmt));
4976 gimple_set_uid (mul_stmt, gimple_uid (stmt));
4977 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
4978 gimple_set_visited (mul_stmt, true);
4979 result = new_result;
4981 else
4982 result = iter_result;
4984 /* Decrement the occurrence count of each element in the product
4985 by the count found above, and remove this many copies of each
4986 factor from OPS. */
4987 for (i = j; i < vec_len; i++)
4989 unsigned k = power;
4990 unsigned n;
4992 rf1 = &repeat_factor_vec[i];
4993 rf1->count -= power;
4995 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
4997 if (oe->op == rf1->factor)
4999 if (oe->count <= k)
5001 ops->ordered_remove (n);
5002 k -= oe->count;
5004 if (k == 0)
5005 break;
5007 else
5009 oe->count -= k;
5010 break;
5017 /* At this point all elements in the repeated factor vector have a
5018 remaining occurrence count of 0 or 1, and those with a count of 1
5019 don't have cached representatives. Re-sort the ops vector and
5020 clean up. */
5021 ops->qsort (sort_by_operand_rank);
5022 repeat_factor_vec.release ();
5024 /* Return the final product computed herein. Note that there may
5025 still be some elements with single occurrence count left in OPS;
5026 those will be handled by the normal reassociation logic. */
5027 return result;
5030 /* Attempt to optimize
5031 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5032 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5034 static void
5035 attempt_builtin_copysign (vec<operand_entry *> *ops)
5037 operand_entry *oe;
5038 unsigned int i;
5039 unsigned int length = ops->length ();
5040 tree cst = ops->last ()->op;
5042 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5043 return;
5045 FOR_EACH_VEC_ELT (*ops, i, oe)
5047 if (TREE_CODE (oe->op) == SSA_NAME
5048 && has_single_use (oe->op))
5050 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5051 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5053 tree arg0, arg1;
5054 switch (gimple_call_combined_fn (old_call))
5056 CASE_CFN_COPYSIGN:
5057 arg0 = gimple_call_arg (old_call, 0);
5058 arg1 = gimple_call_arg (old_call, 1);
5059 /* The first argument of copysign must be a constant,
5060 otherwise there's nothing to do. */
5061 if (TREE_CODE (arg0) == REAL_CST)
5063 tree type = TREE_TYPE (arg0);
5064 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5065 /* If we couldn't fold to a single constant, skip it.
5066 That happens e.g. for inexact multiplication when
5067 -frounding-math. */
5068 if (mul == NULL_TREE)
5069 break;
5070 /* Instead of adjusting OLD_CALL, let's build a new
5071 call to not leak the LHS and prevent keeping bogus
5072 debug statements. DCE will clean up the old call. */
5073 gcall *new_call;
5074 if (gimple_call_internal_p (old_call))
5075 new_call = gimple_build_call_internal
5076 (IFN_COPYSIGN, 2, mul, arg1);
5077 else
5078 new_call = gimple_build_call
5079 (gimple_call_fndecl (old_call), 2, mul, arg1);
5080 tree lhs = make_ssa_name (type);
5081 gimple_call_set_lhs (new_call, lhs);
5082 gimple_set_location (new_call,
5083 gimple_location (old_call));
5084 insert_stmt_after (new_call, old_call);
5085 /* We've used the constant, get rid of it. */
5086 ops->pop ();
5087 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5088 /* Handle the CST1 < 0 case by negating the result. */
5089 if (cst1_neg)
5091 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5092 gimple *negate_stmt
5093 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5094 insert_stmt_after (negate_stmt, new_call);
5095 oe->op = negrhs;
5097 else
5098 oe->op = lhs;
5099 if (dump_file && (dump_flags & TDF_DETAILS))
5101 fprintf (dump_file, "Optimizing copysign: ");
5102 print_generic_expr (dump_file, cst, 0);
5103 fprintf (dump_file, " * COPYSIGN (");
5104 print_generic_expr (dump_file, arg0, 0);
5105 fprintf (dump_file, ", ");
5106 print_generic_expr (dump_file, arg1, 0);
5107 fprintf (dump_file, ") into %sCOPYSIGN (",
5108 cst1_neg ? "-" : "");
5109 print_generic_expr (dump_file, mul, 0);
5110 fprintf (dump_file, ", ");
5111 print_generic_expr (dump_file, arg1, 0);
5112 fprintf (dump_file, "\n");
5114 return;
5116 break;
5117 default:
5118 break;
5125 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5127 static void
5128 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5130 tree rhs1;
5132 if (dump_file && (dump_flags & TDF_DETAILS))
5134 fprintf (dump_file, "Transforming ");
5135 print_gimple_stmt (dump_file, stmt, 0, 0);
5138 rhs1 = gimple_assign_rhs1 (stmt);
5139 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5140 update_stmt (stmt);
5141 remove_visited_stmt_chain (rhs1);
5143 if (dump_file && (dump_flags & TDF_DETAILS))
5145 fprintf (dump_file, " into ");
5146 print_gimple_stmt (dump_file, stmt, 0, 0);
5150 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5152 static void
5153 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5154 tree rhs1, tree rhs2)
5156 if (dump_file && (dump_flags & TDF_DETAILS))
5158 fprintf (dump_file, "Transforming ");
5159 print_gimple_stmt (dump_file, stmt, 0, 0);
5162 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5163 update_stmt (gsi_stmt (*gsi));
5164 remove_visited_stmt_chain (rhs1);
5166 if (dump_file && (dump_flags & TDF_DETAILS))
5168 fprintf (dump_file, " into ");
5169 print_gimple_stmt (dump_file, stmt, 0, 0);
5173 /* Reassociate expressions in basic block BB and its post-dominator as
5174 children.
5176 Bubble up return status from maybe_optimize_range_tests. */
5178 static bool
5179 reassociate_bb (basic_block bb)
5181 gimple_stmt_iterator gsi;
5182 basic_block son;
5183 gimple *stmt = last_stmt (bb);
5184 bool cfg_cleanup_needed = false;
5186 if (stmt && !gimple_visited_p (stmt))
5187 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5189 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5191 stmt = gsi_stmt (gsi);
5193 if (is_gimple_assign (stmt)
5194 && !stmt_could_throw_p (stmt))
5196 tree lhs, rhs1, rhs2;
5197 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5199 /* If this is not a gimple binary expression, there is
5200 nothing for us to do with it. */
5201 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5202 continue;
5204 /* If this was part of an already processed statement,
5205 we don't need to touch it again. */
5206 if (gimple_visited_p (stmt))
5208 /* This statement might have become dead because of previous
5209 reassociations. */
5210 if (has_zero_uses (gimple_get_lhs (stmt)))
5212 reassoc_remove_stmt (&gsi);
5213 release_defs (stmt);
5214 /* We might end up removing the last stmt above which
5215 places the iterator to the end of the sequence.
5216 Reset it to the last stmt in this case which might
5217 be the end of the sequence as well if we removed
5218 the last statement of the sequence. In which case
5219 we need to bail out. */
5220 if (gsi_end_p (gsi))
5222 gsi = gsi_last_bb (bb);
5223 if (gsi_end_p (gsi))
5224 break;
5227 continue;
5230 lhs = gimple_assign_lhs (stmt);
5231 rhs1 = gimple_assign_rhs1 (stmt);
5232 rhs2 = gimple_assign_rhs2 (stmt);
5234 /* For non-bit or min/max operations we can't associate
5235 all types. Verify that here. */
5236 if (rhs_code != BIT_IOR_EXPR
5237 && rhs_code != BIT_AND_EXPR
5238 && rhs_code != BIT_XOR_EXPR
5239 && rhs_code != MIN_EXPR
5240 && rhs_code != MAX_EXPR
5241 && (!can_reassociate_p (lhs)
5242 || !can_reassociate_p (rhs1)
5243 || !can_reassociate_p (rhs2)))
5244 continue;
5246 if (associative_tree_code (rhs_code))
5248 auto_vec<operand_entry *> ops;
5249 tree powi_result = NULL_TREE;
5250 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5252 /* There may be no immediate uses left by the time we
5253 get here because we may have eliminated them all. */
5254 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5255 continue;
5257 gimple_set_visited (stmt, true);
5258 linearize_expr_tree (&ops, stmt, true, true);
5259 ops.qsort (sort_by_operand_rank);
5260 optimize_ops_list (rhs_code, &ops);
5261 if (undistribute_ops_list (rhs_code, &ops,
5262 loop_containing_stmt (stmt)))
5264 ops.qsort (sort_by_operand_rank);
5265 optimize_ops_list (rhs_code, &ops);
5268 if (rhs_code == PLUS_EXPR
5269 && transform_add_to_multiply (&ops))
5270 ops.qsort (sort_by_operand_rank);
5272 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5274 if (is_vector)
5275 optimize_vec_cond_expr (rhs_code, &ops);
5276 else
5277 optimize_range_tests (rhs_code, &ops);
5280 if (rhs_code == MULT_EXPR && !is_vector)
5282 attempt_builtin_copysign (&ops);
5284 if (reassoc_insert_powi_p
5285 && flag_unsafe_math_optimizations)
5286 powi_result = attempt_builtin_powi (stmt, &ops);
5289 operand_entry *last;
5290 bool negate_result = false;
5291 if (ops.length () > 1
5292 && rhs_code == MULT_EXPR)
5294 last = ops.last ();
5295 if (((TREE_CODE (last->op) == INTEGER_CST
5296 && integer_minus_onep (last->op))
5297 || real_minus_onep (last->op))
5298 && !HONOR_SNANS (TREE_TYPE (lhs))
5299 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5300 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5302 ops.pop ();
5303 negate_result = true;
5307 /* If the operand vector is now empty, all operands were
5308 consumed by the __builtin_powi optimization. */
5309 if (ops.length () == 0)
5310 transform_stmt_to_copy (&gsi, stmt, powi_result);
5311 else if (ops.length () == 1)
5313 tree last_op = ops.last ()->op;
5315 /* If the stmt that defines operand has to be inserted, insert it
5316 before the use. */
5317 if (ops.last ()->stmt_to_insert)
5318 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5319 if (powi_result)
5320 transform_stmt_to_multiply (&gsi, stmt, last_op,
5321 powi_result);
5322 else
5323 transform_stmt_to_copy (&gsi, stmt, last_op);
5325 else
5327 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5328 int ops_num = ops.length ();
5329 int width = get_reassociation_width (ops_num, rhs_code, mode);
5330 tree new_lhs = lhs;
5332 if (dump_file && (dump_flags & TDF_DETAILS))
5333 fprintf (dump_file,
5334 "Width = %d was chosen for reassociation\n", width);
5336 if (width > 1
5337 && ops.length () > 3)
5338 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5339 width, ops);
5340 else
5342 /* When there are three operands left, we want
5343 to make sure the ones that get the double
5344 binary op are chosen wisely. */
5345 int len = ops.length ();
5346 if (len >= 3)
5347 swap_ops_for_binary_stmt (ops, len - 3, stmt);
5349 new_lhs = rewrite_expr_tree (stmt, 0, ops,
5350 powi_result != NULL);
5353 /* If we combined some repeated factors into a
5354 __builtin_powi call, multiply that result by the
5355 reassociated operands. */
5356 if (powi_result)
5358 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5359 tree type = TREE_TYPE (lhs);
5360 tree target_ssa = make_temp_ssa_name (type, NULL,
5361 "reassocpow");
5362 gimple_set_lhs (lhs_stmt, target_ssa);
5363 update_stmt (lhs_stmt);
5364 if (lhs != new_lhs)
5365 target_ssa = new_lhs;
5366 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5367 powi_result, target_ssa);
5368 gimple_set_location (mul_stmt, gimple_location (stmt));
5369 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5370 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5374 if (negate_result)
5376 stmt = SSA_NAME_DEF_STMT (lhs);
5377 tree tmp = make_ssa_name (TREE_TYPE (lhs));
5378 gimple_set_lhs (stmt, tmp);
5379 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5380 tmp);
5381 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5382 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5383 update_stmt (stmt);
5388 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5389 son;
5390 son = next_dom_son (CDI_POST_DOMINATORS, son))
5391 cfg_cleanup_needed |= reassociate_bb (son);
5393 return cfg_cleanup_needed;
5396 /* Add jumps around shifts for range tests turned into bit tests.
5397 For each SSA_NAME VAR we have code like:
5398 VAR = ...; // final stmt of range comparison
5399 // bit test here...;
5400 OTHERVAR = ...; // final stmt of the bit test sequence
5401 RES = VAR | OTHERVAR;
5402 Turn the above into:
5403 VAR = ...;
5404 if (VAR != 0)
5405 goto <l3>;
5406 else
5407 goto <l2>;
5408 <l2>:
5409 // bit test here...;
5410 OTHERVAR = ...;
5411 <l3>:
5412 # RES = PHI<1(l1), OTHERVAR(l2)>; */
5414 static void
5415 branch_fixup (void)
5417 tree var;
5418 unsigned int i;
5420 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5422 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5423 gimple *use_stmt;
5424 use_operand_p use;
5425 bool ok = single_imm_use (var, &use, &use_stmt);
5426 gcc_assert (ok
5427 && is_gimple_assign (use_stmt)
5428 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5429 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5431 basic_block cond_bb = gimple_bb (def_stmt);
5432 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5433 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5435 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5436 gimple *g = gimple_build_cond (NE_EXPR, var,
5437 build_zero_cst (TREE_TYPE (var)),
5438 NULL_TREE, NULL_TREE);
5439 location_t loc = gimple_location (use_stmt);
5440 gimple_set_location (g, loc);
5441 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5443 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5444 etrue->probability = REG_BR_PROB_BASE / 2;
5445 etrue->count = cond_bb->count / 2;
5446 edge efalse = find_edge (cond_bb, then_bb);
5447 efalse->flags = EDGE_FALSE_VALUE;
5448 efalse->probability -= etrue->probability;
5449 efalse->count -= etrue->count;
5450 then_bb->count -= etrue->count;
5452 tree othervar = NULL_TREE;
5453 if (gimple_assign_rhs1 (use_stmt) == var)
5454 othervar = gimple_assign_rhs2 (use_stmt);
5455 else if (gimple_assign_rhs2 (use_stmt) == var)
5456 othervar = gimple_assign_rhs1 (use_stmt);
5457 else
5458 gcc_unreachable ();
5459 tree lhs = gimple_assign_lhs (use_stmt);
5460 gphi *phi = create_phi_node (lhs, merge_bb);
5461 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5462 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5463 gsi = gsi_for_stmt (use_stmt);
5464 gsi_remove (&gsi, true);
5466 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5467 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5469 reassoc_branch_fixups.release ();
5472 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5473 void debug_ops_vector (vec<operand_entry *> ops);
5475 /* Dump the operand entry vector OPS to FILE. */
5477 void
5478 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5480 operand_entry *oe;
5481 unsigned int i;
5483 FOR_EACH_VEC_ELT (ops, i, oe)
5485 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5486 print_generic_expr (file, oe->op, 0);
5487 fprintf (file, "\n");
5491 /* Dump the operand entry vector OPS to STDERR. */
5493 DEBUG_FUNCTION void
5494 debug_ops_vector (vec<operand_entry *> ops)
5496 dump_ops_vector (stderr, ops);
5499 /* Bubble up return status from reassociate_bb. */
5501 static bool
5502 do_reassoc (void)
5504 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5505 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5508 /* Initialize the reassociation pass. */
5510 static void
5511 init_reassoc (void)
5513 int i;
5514 long rank = 2;
5515 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5517 /* Find the loops, so that we can prevent moving calculations in
5518 them. */
5519 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5521 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5523 next_operand_entry_id = 0;
5525 /* Reverse RPO (Reverse Post Order) will give us something where
5526 deeper loops come later. */
5527 pre_and_rev_post_order_compute (NULL, bbs, false);
5528 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5529 operand_rank = new hash_map<tree, long>;
5531 /* Give each default definition a distinct rank. This includes
5532 parameters and the static chain. Walk backwards over all
5533 SSA names so that we get proper rank ordering according
5534 to tree_swap_operands_p. */
5535 for (i = num_ssa_names - 1; i > 0; --i)
5537 tree name = ssa_name (i);
5538 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5539 insert_operand_rank (name, ++rank);
5542 /* Set up rank for each BB */
5543 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5544 bb_rank[bbs[i]] = ++rank << 16;
5546 free (bbs);
5547 calculate_dominance_info (CDI_POST_DOMINATORS);
5548 plus_negates = vNULL;
5551 /* Cleanup after the reassociation pass, and print stats if
5552 requested. */
5554 static void
5555 fini_reassoc (void)
5557 statistics_counter_event (cfun, "Linearized",
5558 reassociate_stats.linearized);
5559 statistics_counter_event (cfun, "Constants eliminated",
5560 reassociate_stats.constants_eliminated);
5561 statistics_counter_event (cfun, "Ops eliminated",
5562 reassociate_stats.ops_eliminated);
5563 statistics_counter_event (cfun, "Statements rewritten",
5564 reassociate_stats.rewritten);
5565 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5566 reassociate_stats.pows_encountered);
5567 statistics_counter_event (cfun, "Built-in powi calls created",
5568 reassociate_stats.pows_created);
5570 delete operand_rank;
5571 operand_entry_pool.release ();
5572 free (bb_rank);
5573 plus_negates.release ();
5574 free_dominance_info (CDI_POST_DOMINATORS);
5575 loop_optimizer_finalize ();
5578 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
5579 insertion of __builtin_powi calls.
5581 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5582 optimization of a gimple conditional. Otherwise returns zero. */
5584 static unsigned int
5585 execute_reassoc (bool insert_powi_p)
5587 reassoc_insert_powi_p = insert_powi_p;
5589 init_reassoc ();
5591 bool cfg_cleanup_needed;
5592 cfg_cleanup_needed = do_reassoc ();
5593 repropagate_negates ();
5594 branch_fixup ();
5596 fini_reassoc ();
5597 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5600 namespace {
5602 const pass_data pass_data_reassoc =
5604 GIMPLE_PASS, /* type */
5605 "reassoc", /* name */
5606 OPTGROUP_NONE, /* optinfo_flags */
5607 TV_TREE_REASSOC, /* tv_id */
5608 ( PROP_cfg | PROP_ssa ), /* properties_required */
5609 0, /* properties_provided */
5610 0, /* properties_destroyed */
5611 0, /* todo_flags_start */
5612 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5615 class pass_reassoc : public gimple_opt_pass
5617 public:
5618 pass_reassoc (gcc::context *ctxt)
5619 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5622 /* opt_pass methods: */
5623 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5624 void set_pass_param (unsigned int n, bool param)
5626 gcc_assert (n == 0);
5627 insert_powi_p = param;
5629 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5630 virtual unsigned int execute (function *)
5631 { return execute_reassoc (insert_powi_p); }
5633 private:
5634 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
5635 point 3a in the pass header comment. */
5636 bool insert_powi_p;
5637 }; // class pass_reassoc
5639 } // anon namespace
5641 gimple_opt_pass *
5642 make_pass_reassoc (gcc::context *ctxt)
5644 return new pass_reassoc (ctxt);