2016-09-08 Steven G. Kargl <kargl@gcc.gnu.org>
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob7fd7550ab331df8a59af4098aead7527dbf4c77b
1 /* Reassociation for trees.
2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "tm_p.h"
33 #include "ssa.h"
34 #include "optabs-tree.h"
35 #include "gimple-pretty-print.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "cfganal.h"
40 #include "gimple-fold.h"
41 #include "tree-eh.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
44 #include "tree-cfg.h"
45 #include "tree-ssa-loop.h"
46 #include "flags.h"
47 #include "tree-ssa.h"
48 #include "langhooks.h"
49 #include "cfgloop.h"
50 #include "params.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
69 3. Optimization of the operand lists, eliminating things like a +
70 -a, a & a, etc.
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
104 those on it.
106 You want to first merge all leaves of the same rank, as much as
107 possible.
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
122 mergetmp2 = d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
131 So we have
133 build binary op
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
159 mergetmp = op1 + op2
160 newstmt = mergetmp + op3
162 instead of
163 mergetmp = op2 + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
168 can do.
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p;
180 /* Statistics */
181 static struct
183 int linearized;
184 int constants_eliminated;
185 int ops_eliminated;
186 int rewritten;
187 int pows_encountered;
188 int pows_created;
189 } reassociate_stats;
191 /* Operator, rank pair. */
192 struct operand_entry
194 unsigned int rank;
195 int id;
196 tree op;
197 unsigned int count;
198 gimple *stmt_to_insert;
201 static object_allocator<operand_entry> operand_entry_pool
202 ("operand entry pool");
204 /* This is used to assign a unique ID to each struct operand_entry
205 so that qsort results are identical on different hosts. */
206 static int next_operand_entry_id;
208 /* Starting rank number for a given basic block, so that we can rank
209 operations using unmovable instructions in that BB based on the bb
210 depth. */
211 static long *bb_rank;
213 /* Operand->rank hashtable. */
214 static hash_map<tree, long> *operand_rank;
216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
217 all basic blocks the CFG should be adjusted - basic blocks
218 split right after that SSA_NAME's definition statement and before
219 the only use, which must be a bit ior. */
220 static vec<tree> reassoc_branch_fixups;
222 /* Forward decls. */
223 static long get_rank (tree);
224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
227 possibly added by gsi_remove. */
229 bool
230 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232 gimple *stmt = gsi_stmt (*gsi);
234 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
235 return gsi_remove (gsi, true);
237 gimple_stmt_iterator prev = *gsi;
238 gsi_prev (&prev);
239 unsigned uid = gimple_uid (stmt);
240 basic_block bb = gimple_bb (stmt);
241 bool ret = gsi_remove (gsi, true);
242 if (!gsi_end_p (prev))
243 gsi_next (&prev);
244 else
245 prev = gsi_start_bb (bb);
246 gimple *end_stmt = gsi_stmt (*gsi);
247 while ((stmt = gsi_stmt (prev)) != end_stmt)
249 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
250 gimple_set_uid (stmt, uid);
251 gsi_next (&prev);
253 return ret;
256 /* Bias amount for loop-carried phis. We want this to be larger than
257 the depth of any reassociation tree we can see, but not larger than
258 the rank difference between two blocks. */
259 #define PHI_LOOP_BIAS (1 << 15)
261 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
262 an innermost loop, and the phi has only a single use which is inside
263 the loop, then the rank is the block rank of the loop latch plus an
264 extra bias for the loop-carried dependence. This causes expressions
265 calculated into an accumulator variable to be independent for each
266 iteration of the loop. If STMT is some other phi, the rank is the
267 block rank of its containing block. */
268 static long
269 phi_rank (gimple *stmt)
271 basic_block bb = gimple_bb (stmt);
272 struct loop *father = bb->loop_father;
273 tree res;
274 unsigned i;
275 use_operand_p use;
276 gimple *use_stmt;
278 /* We only care about real loops (those with a latch). */
279 if (!father->latch)
280 return bb_rank[bb->index];
282 /* Interesting phis must be in headers of innermost loops. */
283 if (bb != father->header
284 || father->inner)
285 return bb_rank[bb->index];
287 /* Ignore virtual SSA_NAMEs. */
288 res = gimple_phi_result (stmt);
289 if (virtual_operand_p (res))
290 return bb_rank[bb->index];
292 /* The phi definition must have a single use, and that use must be
293 within the loop. Otherwise this isn't an accumulator pattern. */
294 if (!single_imm_use (res, &use, &use_stmt)
295 || gimple_bb (use_stmt)->loop_father != father)
296 return bb_rank[bb->index];
298 /* Look for phi arguments from within the loop. If found, bias this phi. */
299 for (i = 0; i < gimple_phi_num_args (stmt); i++)
301 tree arg = gimple_phi_arg_def (stmt, i);
302 if (TREE_CODE (arg) == SSA_NAME
303 && !SSA_NAME_IS_DEFAULT_DEF (arg))
305 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
306 if (gimple_bb (def_stmt)->loop_father == father)
307 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
311 /* Must be an uninteresting phi. */
312 return bb_rank[bb->index];
315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
316 loop-carried dependence of an innermost loop, return TRUE; else
317 return FALSE. */
318 static bool
319 loop_carried_phi (tree exp)
321 gimple *phi_stmt;
322 long block_rank;
324 if (TREE_CODE (exp) != SSA_NAME
325 || SSA_NAME_IS_DEFAULT_DEF (exp))
326 return false;
328 phi_stmt = SSA_NAME_DEF_STMT (exp);
330 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
331 return false;
333 /* Non-loop-carried phis have block rank. Loop-carried phis have
334 an additional bias added in. If this phi doesn't have block rank,
335 it's biased and should not be propagated. */
336 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338 if (phi_rank (phi_stmt) != block_rank)
339 return true;
341 return false;
344 /* Return the maximum of RANK and the rank that should be propagated
345 from expression OP. For most operands, this is just the rank of OP.
346 For loop-carried phis, the value is zero to avoid undoing the bias
347 in favor of the phi. */
348 static long
349 propagate_rank (long rank, tree op)
351 long op_rank;
353 if (loop_carried_phi (op))
354 return rank;
356 op_rank = get_rank (op);
358 return MAX (rank, op_rank);
361 /* Look up the operand rank structure for expression E. */
363 static inline long
364 find_operand_rank (tree e)
366 long *slot = operand_rank->get (e);
367 return slot ? *slot : -1;
370 /* Insert {E,RANK} into the operand rank hashtable. */
372 static inline void
373 insert_operand_rank (tree e, long rank)
375 gcc_assert (rank > 0);
376 gcc_assert (!operand_rank->put (e, rank));
379 /* Given an expression E, return the rank of the expression. */
381 static long
382 get_rank (tree e)
384 /* SSA_NAME's have the rank of the expression they are the result
386 For globals and uninitialized values, the rank is 0.
387 For function arguments, use the pre-setup rank.
388 For PHI nodes, stores, asm statements, etc, we use the rank of
389 the BB.
390 For simple operations, the rank is the maximum rank of any of
391 its operands, or the bb_rank, whichever is less.
392 I make no claims that this is optimal, however, it gives good
393 results. */
395 /* We make an exception to the normal ranking system to break
396 dependences of accumulator variables in loops. Suppose we
397 have a simple one-block loop containing:
399 x_1 = phi(x_0, x_2)
400 b = a + x_1
401 c = b + d
402 x_2 = c + e
404 As shown, each iteration of the calculation into x is fully
405 dependent upon the iteration before it. We would prefer to
406 see this in the form:
408 x_1 = phi(x_0, x_2)
409 b = a + d
410 c = b + e
411 x_2 = c + x_1
413 If the loop is unrolled, the calculations of b and c from
414 different iterations can be interleaved.
416 To obtain this result during reassociation, we bias the rank
417 of the phi definition x_1 upward, when it is recognized as an
418 accumulator pattern. The artificial rank causes it to be
419 added last, providing the desired independence. */
421 if (TREE_CODE (e) == SSA_NAME)
423 ssa_op_iter iter;
424 gimple *stmt;
425 long rank;
426 tree op;
428 if (SSA_NAME_IS_DEFAULT_DEF (e))
429 return find_operand_rank (e);
431 stmt = SSA_NAME_DEF_STMT (e);
432 if (gimple_code (stmt) == GIMPLE_PHI)
433 return phi_rank (stmt);
435 if (!is_gimple_assign (stmt))
436 return bb_rank[gimple_bb (stmt)->index];
438 /* If we already have a rank for this expression, use that. */
439 rank = find_operand_rank (e);
440 if (rank != -1)
441 return rank;
443 /* Otherwise, find the maximum rank for the operands. As an
444 exception, remove the bias from loop-carried phis when propagating
445 the rank so that dependent operations are not also biased. */
446 /* Simply walk over all SSA uses - this takes advatage of the
447 fact that non-SSA operands are is_gimple_min_invariant and
448 thus have rank 0. */
449 rank = 0;
450 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
451 rank = propagate_rank (rank, op);
453 if (dump_file && (dump_flags & TDF_DETAILS))
455 fprintf (dump_file, "Rank for ");
456 print_generic_expr (dump_file, e, 0);
457 fprintf (dump_file, " is %ld\n", (rank + 1));
460 /* Note the rank in the hashtable so we don't recompute it. */
461 insert_operand_rank (e, (rank + 1));
462 return (rank + 1);
465 /* Constants, globals, etc., are rank 0 */
466 return 0;
470 /* We want integer ones to end up last no matter what, since they are
471 the ones we can do the most with. */
472 #define INTEGER_CONST_TYPE 1 << 3
473 #define FLOAT_CONST_TYPE 1 << 2
474 #define OTHER_CONST_TYPE 1 << 1
476 /* Classify an invariant tree into integer, float, or other, so that
477 we can sort them to be near other constants of the same type. */
478 static inline int
479 constant_type (tree t)
481 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
482 return INTEGER_CONST_TYPE;
483 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
484 return FLOAT_CONST_TYPE;
485 else
486 return OTHER_CONST_TYPE;
489 /* qsort comparison function to sort operand entries PA and PB by rank
490 so that the sorted array is ordered by rank in decreasing order. */
491 static int
492 sort_by_operand_rank (const void *pa, const void *pb)
494 const operand_entry *oea = *(const operand_entry *const *)pa;
495 const operand_entry *oeb = *(const operand_entry *const *)pb;
497 /* It's nicer for optimize_expression if constants that are likely
498 to fold when added/multiplied//whatever are put next to each
499 other. Since all constants have rank 0, order them by type. */
500 if (oeb->rank == 0 && oea->rank == 0)
502 if (constant_type (oeb->op) != constant_type (oea->op))
503 return constant_type (oeb->op) - constant_type (oea->op);
504 else
505 /* To make sorting result stable, we use unique IDs to determine
506 order. */
507 return oeb->id - oea->id;
510 /* Lastly, make sure the versions that are the same go next to each
511 other. */
512 if ((oeb->rank - oea->rank == 0)
513 && TREE_CODE (oea->op) == SSA_NAME
514 && TREE_CODE (oeb->op) == SSA_NAME)
516 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
517 versions of removed SSA_NAMEs, so if possible, prefer to sort
518 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
519 See PR60418. */
520 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
521 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
522 && !oea->stmt_to_insert
523 && !oeb->stmt_to_insert
524 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
526 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
527 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
528 basic_block bba = gimple_bb (stmta);
529 basic_block bbb = gimple_bb (stmtb);
530 if (bbb != bba)
532 if (bb_rank[bbb->index] != bb_rank[bba->index])
533 return bb_rank[bbb->index] - bb_rank[bba->index];
535 else
537 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
538 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
539 if (da != db)
540 return da ? 1 : -1;
544 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
545 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
546 else
547 return oeb->id - oea->id;
550 if (oeb->rank != oea->rank)
551 return oeb->rank - oea->rank;
552 else
553 return oeb->id - oea->id;
556 /* Add an operand entry to *OPS for the tree operand OP. */
558 static void
559 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
561 operand_entry *oe = operand_entry_pool.allocate ();
563 oe->op = op;
564 oe->rank = get_rank (op);
565 oe->id = next_operand_entry_id++;
566 oe->count = 1;
567 oe->stmt_to_insert = stmt_to_insert;
568 ops->safe_push (oe);
571 /* Add an operand entry to *OPS for the tree operand OP with repeat
572 count REPEAT. */
574 static void
575 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
576 HOST_WIDE_INT repeat)
578 operand_entry *oe = operand_entry_pool.allocate ();
580 oe->op = op;
581 oe->rank = get_rank (op);
582 oe->id = next_operand_entry_id++;
583 oe->count = repeat;
584 oe->stmt_to_insert = NULL;
585 ops->safe_push (oe);
587 reassociate_stats.pows_encountered++;
590 /* Return true if STMT is reassociable operation containing a binary
591 operation with tree code CODE, and is inside LOOP. */
593 static bool
594 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
596 basic_block bb = gimple_bb (stmt);
598 if (gimple_bb (stmt) == NULL)
599 return false;
601 if (!flow_bb_inside_loop_p (loop, bb))
602 return false;
604 if (is_gimple_assign (stmt)
605 && gimple_assign_rhs_code (stmt) == code
606 && has_single_use (gimple_assign_lhs (stmt)))
607 return true;
609 return false;
613 /* Return true if STMT is a nop-conversion. */
615 static bool
616 gimple_nop_conversion_p (gimple *stmt)
618 if (gassign *ass = dyn_cast <gassign *> (stmt))
620 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
621 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
622 TREE_TYPE (gimple_assign_rhs1 (ass))))
623 return true;
625 return false;
628 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
629 operand of the negate operation. Otherwise, return NULL. */
631 static tree
632 get_unary_op (tree name, enum tree_code opcode)
634 gimple *stmt = SSA_NAME_DEF_STMT (name);
636 /* Look through nop conversions (sign changes). */
637 if (gimple_nop_conversion_p (stmt)
638 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
639 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
641 if (!is_gimple_assign (stmt))
642 return NULL_TREE;
644 if (gimple_assign_rhs_code (stmt) == opcode)
645 return gimple_assign_rhs1 (stmt);
646 return NULL_TREE;
649 /* Return true if OP1 and OP2 have the same value if casted to either type. */
651 static bool
652 ops_equal_values_p (tree op1, tree op2)
654 if (op1 == op2)
655 return true;
657 tree orig_op1 = op1;
658 if (TREE_CODE (op1) == SSA_NAME)
660 gimple *stmt = SSA_NAME_DEF_STMT (op1);
661 if (gimple_nop_conversion_p (stmt))
663 op1 = gimple_assign_rhs1 (stmt);
664 if (op1 == op2)
665 return true;
669 if (TREE_CODE (op2) == SSA_NAME)
671 gimple *stmt = SSA_NAME_DEF_STMT (op2);
672 if (gimple_nop_conversion_p (stmt))
674 op2 = gimple_assign_rhs1 (stmt);
675 if (op1 == op2
676 || orig_op1 == op2)
677 return true;
681 return false;
685 /* If CURR and LAST are a pair of ops that OPCODE allows us to
686 eliminate through equivalences, do so, remove them from OPS, and
687 return true. Otherwise, return false. */
689 static bool
690 eliminate_duplicate_pair (enum tree_code opcode,
691 vec<operand_entry *> *ops,
692 bool *all_done,
693 unsigned int i,
694 operand_entry *curr,
695 operand_entry *last)
698 /* If we have two of the same op, and the opcode is & |, min, or max,
699 we can eliminate one of them.
700 If we have two of the same op, and the opcode is ^, we can
701 eliminate both of them. */
703 if (last && last->op == curr->op)
705 switch (opcode)
707 case MAX_EXPR:
708 case MIN_EXPR:
709 case BIT_IOR_EXPR:
710 case BIT_AND_EXPR:
711 if (dump_file && (dump_flags & TDF_DETAILS))
713 fprintf (dump_file, "Equivalence: ");
714 print_generic_expr (dump_file, curr->op, 0);
715 fprintf (dump_file, " [&|minmax] ");
716 print_generic_expr (dump_file, last->op, 0);
717 fprintf (dump_file, " -> ");
718 print_generic_stmt (dump_file, last->op, 0);
721 ops->ordered_remove (i);
722 reassociate_stats.ops_eliminated ++;
724 return true;
726 case BIT_XOR_EXPR:
727 if (dump_file && (dump_flags & TDF_DETAILS))
729 fprintf (dump_file, "Equivalence: ");
730 print_generic_expr (dump_file, curr->op, 0);
731 fprintf (dump_file, " ^ ");
732 print_generic_expr (dump_file, last->op, 0);
733 fprintf (dump_file, " -> nothing\n");
736 reassociate_stats.ops_eliminated += 2;
738 if (ops->length () == 2)
740 ops->truncate (0);
741 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
742 *all_done = true;
744 else
746 ops->ordered_remove (i-1);
747 ops->ordered_remove (i-1);
750 return true;
752 default:
753 break;
756 return false;
759 static vec<tree> plus_negates;
761 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
762 expression, look in OPS for a corresponding positive operation to cancel
763 it out. If we find one, remove the other from OPS, replace
764 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
765 return false. */
767 static bool
768 eliminate_plus_minus_pair (enum tree_code opcode,
769 vec<operand_entry *> *ops,
770 unsigned int currindex,
771 operand_entry *curr)
773 tree negateop;
774 tree notop;
775 unsigned int i;
776 operand_entry *oe;
778 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
779 return false;
781 negateop = get_unary_op (curr->op, NEGATE_EXPR);
782 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
783 if (negateop == NULL_TREE && notop == NULL_TREE)
784 return false;
786 /* Any non-negated version will have a rank that is one less than
787 the current rank. So once we hit those ranks, if we don't find
788 one, we can stop. */
790 for (i = currindex + 1;
791 ops->iterate (i, &oe)
792 && oe->rank >= curr->rank - 1 ;
793 i++)
795 if (negateop
796 && ops_equal_values_p (oe->op, negateop))
798 if (dump_file && (dump_flags & TDF_DETAILS))
800 fprintf (dump_file, "Equivalence: ");
801 print_generic_expr (dump_file, negateop, 0);
802 fprintf (dump_file, " + -");
803 print_generic_expr (dump_file, oe->op, 0);
804 fprintf (dump_file, " -> 0\n");
807 ops->ordered_remove (i);
808 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
809 ops->ordered_remove (currindex);
810 reassociate_stats.ops_eliminated ++;
812 return true;
814 else if (notop
815 && ops_equal_values_p (oe->op, notop))
817 tree op_type = TREE_TYPE (oe->op);
819 if (dump_file && (dump_flags & TDF_DETAILS))
821 fprintf (dump_file, "Equivalence: ");
822 print_generic_expr (dump_file, notop, 0);
823 fprintf (dump_file, " + ~");
824 print_generic_expr (dump_file, oe->op, 0);
825 fprintf (dump_file, " -> -1\n");
828 ops->ordered_remove (i);
829 add_to_ops_vec (ops, build_all_ones_cst (op_type));
830 ops->ordered_remove (currindex);
831 reassociate_stats.ops_eliminated ++;
833 return true;
837 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
838 save it for later inspection in repropagate_negates(). */
839 if (negateop != NULL_TREE
840 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
841 plus_negates.safe_push (curr->op);
843 return false;
846 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
847 bitwise not expression, look in OPS for a corresponding operand to
848 cancel it out. If we find one, remove the other from OPS, replace
849 OPS[CURRINDEX] with 0, and return true. Otherwise, return
850 false. */
852 static bool
853 eliminate_not_pairs (enum tree_code opcode,
854 vec<operand_entry *> *ops,
855 unsigned int currindex,
856 operand_entry *curr)
858 tree notop;
859 unsigned int i;
860 operand_entry *oe;
862 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
863 || TREE_CODE (curr->op) != SSA_NAME)
864 return false;
866 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
867 if (notop == NULL_TREE)
868 return false;
870 /* Any non-not version will have a rank that is one less than
871 the current rank. So once we hit those ranks, if we don't find
872 one, we can stop. */
874 for (i = currindex + 1;
875 ops->iterate (i, &oe)
876 && oe->rank >= curr->rank - 1;
877 i++)
879 if (oe->op == notop)
881 if (dump_file && (dump_flags & TDF_DETAILS))
883 fprintf (dump_file, "Equivalence: ");
884 print_generic_expr (dump_file, notop, 0);
885 if (opcode == BIT_AND_EXPR)
886 fprintf (dump_file, " & ~");
887 else if (opcode == BIT_IOR_EXPR)
888 fprintf (dump_file, " | ~");
889 print_generic_expr (dump_file, oe->op, 0);
890 if (opcode == BIT_AND_EXPR)
891 fprintf (dump_file, " -> 0\n");
892 else if (opcode == BIT_IOR_EXPR)
893 fprintf (dump_file, " -> -1\n");
896 if (opcode == BIT_AND_EXPR)
897 oe->op = build_zero_cst (TREE_TYPE (oe->op));
898 else if (opcode == BIT_IOR_EXPR)
899 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
901 reassociate_stats.ops_eliminated += ops->length () - 1;
902 ops->truncate (0);
903 ops->quick_push (oe);
904 return true;
908 return false;
911 /* Use constant value that may be present in OPS to try to eliminate
912 operands. Note that this function is only really used when we've
913 eliminated ops for other reasons, or merged constants. Across
914 single statements, fold already does all of this, plus more. There
915 is little point in duplicating logic, so I've only included the
916 identities that I could ever construct testcases to trigger. */
918 static void
919 eliminate_using_constants (enum tree_code opcode,
920 vec<operand_entry *> *ops)
922 operand_entry *oelast = ops->last ();
923 tree type = TREE_TYPE (oelast->op);
925 if (oelast->rank == 0
926 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
928 switch (opcode)
930 case BIT_AND_EXPR:
931 if (integer_zerop (oelast->op))
933 if (ops->length () != 1)
935 if (dump_file && (dump_flags & TDF_DETAILS))
936 fprintf (dump_file, "Found & 0, removing all other ops\n");
938 reassociate_stats.ops_eliminated += ops->length () - 1;
940 ops->truncate (0);
941 ops->quick_push (oelast);
942 return;
945 else if (integer_all_onesp (oelast->op))
947 if (ops->length () != 1)
949 if (dump_file && (dump_flags & TDF_DETAILS))
950 fprintf (dump_file, "Found & -1, removing\n");
951 ops->pop ();
952 reassociate_stats.ops_eliminated++;
955 break;
956 case BIT_IOR_EXPR:
957 if (integer_all_onesp (oelast->op))
959 if (ops->length () != 1)
961 if (dump_file && (dump_flags & TDF_DETAILS))
962 fprintf (dump_file, "Found | -1, removing all other ops\n");
964 reassociate_stats.ops_eliminated += ops->length () - 1;
966 ops->truncate (0);
967 ops->quick_push (oelast);
968 return;
971 else if (integer_zerop (oelast->op))
973 if (ops->length () != 1)
975 if (dump_file && (dump_flags & TDF_DETAILS))
976 fprintf (dump_file, "Found | 0, removing\n");
977 ops->pop ();
978 reassociate_stats.ops_eliminated++;
981 break;
982 case MULT_EXPR:
983 if (integer_zerop (oelast->op)
984 || (FLOAT_TYPE_P (type)
985 && !HONOR_NANS (type)
986 && !HONOR_SIGNED_ZEROS (type)
987 && real_zerop (oelast->op)))
989 if (ops->length () != 1)
991 if (dump_file && (dump_flags & TDF_DETAILS))
992 fprintf (dump_file, "Found * 0, removing all other ops\n");
994 reassociate_stats.ops_eliminated += ops->length () - 1;
995 ops->truncate (1);
996 ops->quick_push (oelast);
997 return;
1000 else if (integer_onep (oelast->op)
1001 || (FLOAT_TYPE_P (type)
1002 && !HONOR_SNANS (type)
1003 && real_onep (oelast->op)))
1005 if (ops->length () != 1)
1007 if (dump_file && (dump_flags & TDF_DETAILS))
1008 fprintf (dump_file, "Found * 1, removing\n");
1009 ops->pop ();
1010 reassociate_stats.ops_eliminated++;
1011 return;
1014 break;
1015 case BIT_XOR_EXPR:
1016 case PLUS_EXPR:
1017 case MINUS_EXPR:
1018 if (integer_zerop (oelast->op)
1019 || (FLOAT_TYPE_P (type)
1020 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1021 && fold_real_zero_addition_p (type, oelast->op,
1022 opcode == MINUS_EXPR)))
1024 if (ops->length () != 1)
1026 if (dump_file && (dump_flags & TDF_DETAILS))
1027 fprintf (dump_file, "Found [|^+] 0, removing\n");
1028 ops->pop ();
1029 reassociate_stats.ops_eliminated++;
1030 return;
1033 break;
1034 default:
1035 break;
1041 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1042 bool, bool);
1044 /* Structure for tracking and counting operands. */
1045 struct oecount {
1046 int cnt;
1047 int id;
1048 enum tree_code oecode;
1049 tree op;
1053 /* The heap for the oecount hashtable and the sorted list of operands. */
1054 static vec<oecount> cvec;
1057 /* Oecount hashtable helpers. */
1059 struct oecount_hasher : int_hash <int, 0, 1>
1061 static inline hashval_t hash (int);
1062 static inline bool equal (int, int);
1065 /* Hash function for oecount. */
1067 inline hashval_t
1068 oecount_hasher::hash (int p)
1070 const oecount *c = &cvec[p - 42];
1071 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1074 /* Comparison function for oecount. */
1076 inline bool
1077 oecount_hasher::equal (int p1, int p2)
1079 const oecount *c1 = &cvec[p1 - 42];
1080 const oecount *c2 = &cvec[p2 - 42];
1081 return (c1->oecode == c2->oecode
1082 && c1->op == c2->op);
1085 /* Comparison function for qsort sorting oecount elements by count. */
1087 static int
1088 oecount_cmp (const void *p1, const void *p2)
1090 const oecount *c1 = (const oecount *)p1;
1091 const oecount *c2 = (const oecount *)p2;
1092 if (c1->cnt != c2->cnt)
1093 return c1->cnt - c2->cnt;
1094 else
1095 /* If counts are identical, use unique IDs to stabilize qsort. */
1096 return c1->id - c2->id;
1099 /* Return TRUE iff STMT represents a builtin call that raises OP
1100 to some exponent. */
1102 static bool
1103 stmt_is_power_of_op (gimple *stmt, tree op)
1105 if (!is_gimple_call (stmt))
1106 return false;
1108 switch (gimple_call_combined_fn (stmt))
1110 CASE_CFN_POW:
1111 CASE_CFN_POWI:
1112 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1114 default:
1115 return false;
1119 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1120 in place and return the result. Assumes that stmt_is_power_of_op
1121 was previously called for STMT and returned TRUE. */
1123 static HOST_WIDE_INT
1124 decrement_power (gimple *stmt)
1126 REAL_VALUE_TYPE c, cint;
1127 HOST_WIDE_INT power;
1128 tree arg1;
1130 switch (gimple_call_combined_fn (stmt))
1132 CASE_CFN_POW:
1133 arg1 = gimple_call_arg (stmt, 1);
1134 c = TREE_REAL_CST (arg1);
1135 power = real_to_integer (&c) - 1;
1136 real_from_integer (&cint, VOIDmode, power, SIGNED);
1137 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1138 return power;
1140 CASE_CFN_POWI:
1141 arg1 = gimple_call_arg (stmt, 1);
1142 power = TREE_INT_CST_LOW (arg1) - 1;
1143 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1144 return power;
1146 default:
1147 gcc_unreachable ();
1151 /* Find the single immediate use of STMT's LHS, and replace it
1152 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1153 replace *DEF with OP as well. */
1155 static void
1156 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1158 tree lhs;
1159 gimple *use_stmt;
1160 use_operand_p use;
1161 gimple_stmt_iterator gsi;
1163 if (is_gimple_call (stmt))
1164 lhs = gimple_call_lhs (stmt);
1165 else
1166 lhs = gimple_assign_lhs (stmt);
1168 gcc_assert (has_single_use (lhs));
1169 single_imm_use (lhs, &use, &use_stmt);
1170 if (lhs == *def)
1171 *def = op;
1172 SET_USE (use, op);
1173 if (TREE_CODE (op) != SSA_NAME)
1174 update_stmt (use_stmt);
1175 gsi = gsi_for_stmt (stmt);
1176 unlink_stmt_vdef (stmt);
1177 reassoc_remove_stmt (&gsi);
1178 release_defs (stmt);
1181 /* Walks the linear chain with result *DEF searching for an operation
1182 with operand OP and code OPCODE removing that from the chain. *DEF
1183 is updated if there is only one operand but no operation left. */
1185 static void
1186 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1188 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1192 tree name;
1194 if (opcode == MULT_EXPR)
1196 if (stmt_is_power_of_op (stmt, op))
1198 if (decrement_power (stmt) == 1)
1199 propagate_op_to_single_use (op, stmt, def);
1200 return;
1202 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1204 if (gimple_assign_rhs1 (stmt) == op)
1206 tree cst = build_minus_one_cst (TREE_TYPE (op));
1207 propagate_op_to_single_use (cst, stmt, def);
1208 return;
1210 else if (integer_minus_onep (op)
1211 || real_minus_onep (op))
1213 gimple_assign_set_rhs_code
1214 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1215 return;
1220 name = gimple_assign_rhs1 (stmt);
1222 /* If this is the operation we look for and one of the operands
1223 is ours simply propagate the other operand into the stmts
1224 single use. */
1225 if (gimple_assign_rhs_code (stmt) == opcode
1226 && (name == op
1227 || gimple_assign_rhs2 (stmt) == op))
1229 if (name == op)
1230 name = gimple_assign_rhs2 (stmt);
1231 propagate_op_to_single_use (name, stmt, def);
1232 return;
1235 /* We might have a multiply of two __builtin_pow* calls, and
1236 the operand might be hiding in the rightmost one. Likewise
1237 this can happen for a negate. */
1238 if (opcode == MULT_EXPR
1239 && gimple_assign_rhs_code (stmt) == opcode
1240 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1241 && has_single_use (gimple_assign_rhs2 (stmt)))
1243 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1244 if (stmt_is_power_of_op (stmt2, op))
1246 if (decrement_power (stmt2) == 1)
1247 propagate_op_to_single_use (op, stmt2, def);
1248 return;
1250 else if (is_gimple_assign (stmt2)
1251 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1253 if (gimple_assign_rhs1 (stmt2) == op)
1255 tree cst = build_minus_one_cst (TREE_TYPE (op));
1256 propagate_op_to_single_use (cst, stmt2, def);
1257 return;
1259 else if (integer_minus_onep (op)
1260 || real_minus_onep (op))
1262 gimple_assign_set_rhs_code
1263 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1264 return;
1269 /* Continue walking the chain. */
1270 gcc_assert (name != op
1271 && TREE_CODE (name) == SSA_NAME);
1272 stmt = SSA_NAME_DEF_STMT (name);
1274 while (1);
1277 /* Returns true if statement S1 dominates statement S2. Like
1278 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1280 static bool
1281 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1283 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1285 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1286 SSA_NAME. Assume it lives at the beginning of function and
1287 thus dominates everything. */
1288 if (!bb1 || s1 == s2)
1289 return true;
1291 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1292 if (!bb2)
1293 return false;
1295 if (bb1 == bb2)
1297 /* PHIs in the same basic block are assumed to be
1298 executed all in parallel, if only one stmt is a PHI,
1299 it dominates the other stmt in the same basic block. */
1300 if (gimple_code (s1) == GIMPLE_PHI)
1301 return true;
1303 if (gimple_code (s2) == GIMPLE_PHI)
1304 return false;
1306 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1308 if (gimple_uid (s1) < gimple_uid (s2))
1309 return true;
1311 if (gimple_uid (s1) > gimple_uid (s2))
1312 return false;
1314 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1315 unsigned int uid = gimple_uid (s1);
1316 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1318 gimple *s = gsi_stmt (gsi);
1319 if (gimple_uid (s) != uid)
1320 break;
1321 if (s == s2)
1322 return true;
1325 return false;
1328 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1331 /* Insert STMT after INSERT_POINT. */
1333 static void
1334 insert_stmt_after (gimple *stmt, gimple *insert_point)
1336 gimple_stmt_iterator gsi;
1337 basic_block bb;
1339 if (gimple_code (insert_point) == GIMPLE_PHI)
1340 bb = gimple_bb (insert_point);
1341 else if (!stmt_ends_bb_p (insert_point))
1343 gsi = gsi_for_stmt (insert_point);
1344 gimple_set_uid (stmt, gimple_uid (insert_point));
1345 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1346 return;
1348 else
1349 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1350 thus if it must end a basic block, it should be a call that can
1351 throw, or some assignment that can throw. If it throws, the LHS
1352 of it will not be initialized though, so only valid places using
1353 the SSA_NAME should be dominated by the fallthru edge. */
1354 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1355 gsi = gsi_after_labels (bb);
1356 if (gsi_end_p (gsi))
1358 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1359 gimple_set_uid (stmt,
1360 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1362 else
1363 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1364 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1367 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1368 the result. Places the statement after the definition of either
1369 OP1 or OP2. Returns the new statement. */
1371 static gimple *
1372 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1374 gimple *op1def = NULL, *op2def = NULL;
1375 gimple_stmt_iterator gsi;
1376 tree op;
1377 gassign *sum;
1379 /* Create the addition statement. */
1380 op = make_ssa_name (type);
1381 sum = gimple_build_assign (op, opcode, op1, op2);
1383 /* Find an insertion place and insert. */
1384 if (TREE_CODE (op1) == SSA_NAME)
1385 op1def = SSA_NAME_DEF_STMT (op1);
1386 if (TREE_CODE (op2) == SSA_NAME)
1387 op2def = SSA_NAME_DEF_STMT (op2);
1388 if ((!op1def || gimple_nop_p (op1def))
1389 && (!op2def || gimple_nop_p (op2def)))
1391 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1392 if (gsi_end_p (gsi))
1394 gimple_stmt_iterator gsi2
1395 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1396 gimple_set_uid (sum,
1397 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1399 else
1400 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1401 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1403 else
1405 gimple *insert_point;
1406 if ((!op1def || gimple_nop_p (op1def))
1407 || (op2def && !gimple_nop_p (op2def)
1408 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1409 insert_point = op2def;
1410 else
1411 insert_point = op1def;
1412 insert_stmt_after (sum, insert_point);
1414 update_stmt (sum);
1416 return sum;
1419 /* Perform un-distribution of divisions and multiplications.
1420 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1421 to (A + B) / X for real X.
1423 The algorithm is organized as follows.
1425 - First we walk the addition chain *OPS looking for summands that
1426 are defined by a multiplication or a real division. This results
1427 in the candidates bitmap with relevant indices into *OPS.
1429 - Second we build the chains of multiplications or divisions for
1430 these candidates, counting the number of occurrences of (operand, code)
1431 pairs in all of the candidates chains.
1433 - Third we sort the (operand, code) pairs by number of occurrence and
1434 process them starting with the pair with the most uses.
1436 * For each such pair we walk the candidates again to build a
1437 second candidate bitmap noting all multiplication/division chains
1438 that have at least one occurrence of (operand, code).
1440 * We build an alternate addition chain only covering these
1441 candidates with one (operand, code) operation removed from their
1442 multiplication/division chain.
1444 * The first candidate gets replaced by the alternate addition chain
1445 multiplied/divided by the operand.
1447 * All candidate chains get disabled for further processing and
1448 processing of (operand, code) pairs continues.
1450 The alternate addition chains built are re-processed by the main
1451 reassociation algorithm which allows optimizing a * x * y + b * y * x
1452 to (a + b ) * x * y in one invocation of the reassociation pass. */
1454 static bool
1455 undistribute_ops_list (enum tree_code opcode,
1456 vec<operand_entry *> *ops, struct loop *loop)
1458 unsigned int length = ops->length ();
1459 operand_entry *oe1;
1460 unsigned i, j;
1461 unsigned nr_candidates, nr_candidates2;
1462 sbitmap_iterator sbi0;
1463 vec<operand_entry *> *subops;
1464 bool changed = false;
1465 int next_oecount_id = 0;
1467 if (length <= 1
1468 || opcode != PLUS_EXPR)
1469 return false;
1471 /* Build a list of candidates to process. */
1472 auto_sbitmap candidates (length);
1473 bitmap_clear (candidates);
1474 nr_candidates = 0;
1475 FOR_EACH_VEC_ELT (*ops, i, oe1)
1477 enum tree_code dcode;
1478 gimple *oe1def;
1480 if (TREE_CODE (oe1->op) != SSA_NAME)
1481 continue;
1482 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1483 if (!is_gimple_assign (oe1def))
1484 continue;
1485 dcode = gimple_assign_rhs_code (oe1def);
1486 if ((dcode != MULT_EXPR
1487 && dcode != RDIV_EXPR)
1488 || !is_reassociable_op (oe1def, dcode, loop))
1489 continue;
1491 bitmap_set_bit (candidates, i);
1492 nr_candidates++;
1495 if (nr_candidates < 2)
1496 return false;
1498 if (dump_file && (dump_flags & TDF_DETAILS))
1500 fprintf (dump_file, "searching for un-distribute opportunities ");
1501 print_generic_expr (dump_file,
1502 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1503 fprintf (dump_file, " %d\n", nr_candidates);
1506 /* Build linearized sub-operand lists and the counting table. */
1507 cvec.create (0);
1509 hash_table<oecount_hasher> ctable (15);
1511 /* ??? Macro arguments cannot have multi-argument template types in
1512 them. This typedef is needed to workaround that limitation. */
1513 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1514 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1515 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1517 gimple *oedef;
1518 enum tree_code oecode;
1519 unsigned j;
1521 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1522 oecode = gimple_assign_rhs_code (oedef);
1523 linearize_expr_tree (&subops[i], oedef,
1524 associative_tree_code (oecode), false);
1526 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1528 oecount c;
1529 int *slot;
1530 int idx;
1531 c.oecode = oecode;
1532 c.cnt = 1;
1533 c.id = next_oecount_id++;
1534 c.op = oe1->op;
1535 cvec.safe_push (c);
1536 idx = cvec.length () + 41;
1537 slot = ctable.find_slot (idx, INSERT);
1538 if (!*slot)
1540 *slot = idx;
1542 else
1544 cvec.pop ();
1545 cvec[*slot - 42].cnt++;
1550 /* Sort the counting table. */
1551 cvec.qsort (oecount_cmp);
1553 if (dump_file && (dump_flags & TDF_DETAILS))
1555 oecount *c;
1556 fprintf (dump_file, "Candidates:\n");
1557 FOR_EACH_VEC_ELT (cvec, j, c)
1559 fprintf (dump_file, " %u %s: ", c->cnt,
1560 c->oecode == MULT_EXPR
1561 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1562 print_generic_expr (dump_file, c->op, 0);
1563 fprintf (dump_file, "\n");
1567 /* Process the (operand, code) pairs in order of most occurrence. */
1568 auto_sbitmap candidates2 (length);
1569 while (!cvec.is_empty ())
1571 oecount *c = &cvec.last ();
1572 if (c->cnt < 2)
1573 break;
1575 /* Now collect the operands in the outer chain that contain
1576 the common operand in their inner chain. */
1577 bitmap_clear (candidates2);
1578 nr_candidates2 = 0;
1579 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1581 gimple *oedef;
1582 enum tree_code oecode;
1583 unsigned j;
1584 tree op = (*ops)[i]->op;
1586 /* If we undistributed in this chain already this may be
1587 a constant. */
1588 if (TREE_CODE (op) != SSA_NAME)
1589 continue;
1591 oedef = SSA_NAME_DEF_STMT (op);
1592 oecode = gimple_assign_rhs_code (oedef);
1593 if (oecode != c->oecode)
1594 continue;
1596 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1598 if (oe1->op == c->op)
1600 bitmap_set_bit (candidates2, i);
1601 ++nr_candidates2;
1602 break;
1607 if (nr_candidates2 >= 2)
1609 operand_entry *oe1, *oe2;
1610 gimple *prod;
1611 int first = bitmap_first_set_bit (candidates2);
1613 /* Build the new addition chain. */
1614 oe1 = (*ops)[first];
1615 if (dump_file && (dump_flags & TDF_DETAILS))
1617 fprintf (dump_file, "Building (");
1618 print_generic_expr (dump_file, oe1->op, 0);
1620 zero_one_operation (&oe1->op, c->oecode, c->op);
1621 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1623 gimple *sum;
1624 oe2 = (*ops)[i];
1625 if (dump_file && (dump_flags & TDF_DETAILS))
1627 fprintf (dump_file, " + ");
1628 print_generic_expr (dump_file, oe2->op, 0);
1630 zero_one_operation (&oe2->op, c->oecode, c->op);
1631 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1632 oe1->op, oe2->op, opcode);
1633 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1634 oe2->rank = 0;
1635 oe1->op = gimple_get_lhs (sum);
1638 /* Apply the multiplication/division. */
1639 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1640 oe1->op, c->op, c->oecode);
1641 if (dump_file && (dump_flags & TDF_DETAILS))
1643 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1644 print_generic_expr (dump_file, c->op, 0);
1645 fprintf (dump_file, "\n");
1648 /* Record it in the addition chain and disable further
1649 undistribution with this op. */
1650 oe1->op = gimple_assign_lhs (prod);
1651 oe1->rank = get_rank (oe1->op);
1652 subops[first].release ();
1654 changed = true;
1657 cvec.pop ();
1660 for (i = 0; i < ops->length (); ++i)
1661 subops[i].release ();
1662 free (subops);
1663 cvec.release ();
1665 return changed;
1668 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1669 expression, examine the other OPS to see if any of them are comparisons
1670 of the same values, which we may be able to combine or eliminate.
1671 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1673 static bool
1674 eliminate_redundant_comparison (enum tree_code opcode,
1675 vec<operand_entry *> *ops,
1676 unsigned int currindex,
1677 operand_entry *curr)
1679 tree op1, op2;
1680 enum tree_code lcode, rcode;
1681 gimple *def1, *def2;
1682 int i;
1683 operand_entry *oe;
1685 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1686 return false;
1688 /* Check that CURR is a comparison. */
1689 if (TREE_CODE (curr->op) != SSA_NAME)
1690 return false;
1691 def1 = SSA_NAME_DEF_STMT (curr->op);
1692 if (!is_gimple_assign (def1))
1693 return false;
1694 lcode = gimple_assign_rhs_code (def1);
1695 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1696 return false;
1697 op1 = gimple_assign_rhs1 (def1);
1698 op2 = gimple_assign_rhs2 (def1);
1700 /* Now look for a similar comparison in the remaining OPS. */
1701 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1703 tree t;
1705 if (TREE_CODE (oe->op) != SSA_NAME)
1706 continue;
1707 def2 = SSA_NAME_DEF_STMT (oe->op);
1708 if (!is_gimple_assign (def2))
1709 continue;
1710 rcode = gimple_assign_rhs_code (def2);
1711 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1712 continue;
1714 /* If we got here, we have a match. See if we can combine the
1715 two comparisons. */
1716 if (opcode == BIT_IOR_EXPR)
1717 t = maybe_fold_or_comparisons (lcode, op1, op2,
1718 rcode, gimple_assign_rhs1 (def2),
1719 gimple_assign_rhs2 (def2));
1720 else
1721 t = maybe_fold_and_comparisons (lcode, op1, op2,
1722 rcode, gimple_assign_rhs1 (def2),
1723 gimple_assign_rhs2 (def2));
1724 if (!t)
1725 continue;
1727 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1728 always give us a boolean_type_node value back. If the original
1729 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1730 we need to convert. */
1731 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1732 t = fold_convert (TREE_TYPE (curr->op), t);
1734 if (TREE_CODE (t) != INTEGER_CST
1735 && !operand_equal_p (t, curr->op, 0))
1737 enum tree_code subcode;
1738 tree newop1, newop2;
1739 if (!COMPARISON_CLASS_P (t))
1740 continue;
1741 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1742 STRIP_USELESS_TYPE_CONVERSION (newop1);
1743 STRIP_USELESS_TYPE_CONVERSION (newop2);
1744 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1745 continue;
1748 if (dump_file && (dump_flags & TDF_DETAILS))
1750 fprintf (dump_file, "Equivalence: ");
1751 print_generic_expr (dump_file, curr->op, 0);
1752 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1753 print_generic_expr (dump_file, oe->op, 0);
1754 fprintf (dump_file, " -> ");
1755 print_generic_expr (dump_file, t, 0);
1756 fprintf (dump_file, "\n");
1759 /* Now we can delete oe, as it has been subsumed by the new combined
1760 expression t. */
1761 ops->ordered_remove (i);
1762 reassociate_stats.ops_eliminated ++;
1764 /* If t is the same as curr->op, we're done. Otherwise we must
1765 replace curr->op with t. Special case is if we got a constant
1766 back, in which case we add it to the end instead of in place of
1767 the current entry. */
1768 if (TREE_CODE (t) == INTEGER_CST)
1770 ops->ordered_remove (currindex);
1771 add_to_ops_vec (ops, t);
1773 else if (!operand_equal_p (t, curr->op, 0))
1775 gimple *sum;
1776 enum tree_code subcode;
1777 tree newop1;
1778 tree newop2;
1779 gcc_assert (COMPARISON_CLASS_P (t));
1780 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1781 STRIP_USELESS_TYPE_CONVERSION (newop1);
1782 STRIP_USELESS_TYPE_CONVERSION (newop2);
1783 gcc_checking_assert (is_gimple_val (newop1)
1784 && is_gimple_val (newop2));
1785 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1786 curr->op = gimple_get_lhs (sum);
1788 return true;
1791 return false;
1795 /* Transform repeated addition of same values into multiply with
1796 constant. */
1797 static bool
1798 transform_add_to_multiply (vec<operand_entry *> *ops)
1800 operand_entry *oe;
1801 tree op = NULL_TREE;
1802 int j;
1803 int i, start = -1, end = 0, count = 0;
1804 auto_vec<std::pair <int, int> > indxs;
1805 bool changed = false;
1807 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1808 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1809 || !flag_unsafe_math_optimizations))
1810 return false;
1812 /* Look for repeated operands. */
1813 FOR_EACH_VEC_ELT (*ops, i, oe)
1815 if (start == -1)
1817 count = 1;
1818 op = oe->op;
1819 start = i;
1821 else if (operand_equal_p (oe->op, op, 0))
1823 count++;
1824 end = i;
1826 else
1828 if (count > 1)
1829 indxs.safe_push (std::make_pair (start, end));
1830 count = 1;
1831 op = oe->op;
1832 start = i;
1836 if (count > 1)
1837 indxs.safe_push (std::make_pair (start, end));
1839 for (j = indxs.length () - 1; j >= 0; --j)
1841 /* Convert repeated operand addition to multiplication. */
1842 start = indxs[j].first;
1843 end = indxs[j].second;
1844 op = (*ops)[start]->op;
1845 count = end - start + 1;
1846 for (i = end; i >= start; --i)
1847 ops->unordered_remove (i);
1848 tree tmp = make_ssa_name (TREE_TYPE (op));
1849 tree cst = build_int_cst (integer_type_node, count);
1850 gassign *mul_stmt
1851 = gimple_build_assign (tmp, MULT_EXPR,
1852 op, fold_convert (TREE_TYPE (op), cst));
1853 gimple_set_visited (mul_stmt, true);
1854 add_to_ops_vec (ops, tmp, mul_stmt);
1855 changed = true;
1858 return changed;
1862 /* Perform various identities and other optimizations on the list of
1863 operand entries, stored in OPS. The tree code for the binary
1864 operation between all the operands is OPCODE. */
1866 static void
1867 optimize_ops_list (enum tree_code opcode,
1868 vec<operand_entry *> *ops)
1870 unsigned int length = ops->length ();
1871 unsigned int i;
1872 operand_entry *oe;
1873 operand_entry *oelast = NULL;
1874 bool iterate = false;
1876 if (length == 1)
1877 return;
1879 oelast = ops->last ();
1881 /* If the last two are constants, pop the constants off, merge them
1882 and try the next two. */
1883 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1885 operand_entry *oelm1 = (*ops)[length - 2];
1887 if (oelm1->rank == 0
1888 && is_gimple_min_invariant (oelm1->op)
1889 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1890 TREE_TYPE (oelast->op)))
1892 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1893 oelm1->op, oelast->op);
1895 if (folded && is_gimple_min_invariant (folded))
1897 if (dump_file && (dump_flags & TDF_DETAILS))
1898 fprintf (dump_file, "Merging constants\n");
1900 ops->pop ();
1901 ops->pop ();
1903 add_to_ops_vec (ops, folded);
1904 reassociate_stats.constants_eliminated++;
1906 optimize_ops_list (opcode, ops);
1907 return;
1912 eliminate_using_constants (opcode, ops);
1913 oelast = NULL;
1915 for (i = 0; ops->iterate (i, &oe);)
1917 bool done = false;
1919 if (eliminate_not_pairs (opcode, ops, i, oe))
1920 return;
1921 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1922 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1923 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1925 if (done)
1926 return;
1927 iterate = true;
1928 oelast = NULL;
1929 continue;
1931 oelast = oe;
1932 i++;
1935 length = ops->length ();
1936 oelast = ops->last ();
1938 if (iterate)
1939 optimize_ops_list (opcode, ops);
1942 /* The following functions are subroutines to optimize_range_tests and allow
1943 it to try to change a logical combination of comparisons into a range
1944 test.
1946 For example, both
1947 X == 2 || X == 5 || X == 3 || X == 4
1949 X >= 2 && X <= 5
1950 are converted to
1951 (unsigned) (X - 2) <= 3
1953 For more information see comments above fold_test_range in fold-const.c,
1954 this implementation is for GIMPLE. */
1956 struct range_entry
1958 tree exp;
1959 tree low;
1960 tree high;
1961 bool in_p;
1962 bool strict_overflow_p;
1963 unsigned int idx, next;
1966 /* This is similar to make_range in fold-const.c, but on top of
1967 GIMPLE instead of trees. If EXP is non-NULL, it should be
1968 an SSA_NAME and STMT argument is ignored, otherwise STMT
1969 argument should be a GIMPLE_COND. */
1971 static void
1972 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
1974 int in_p;
1975 tree low, high;
1976 bool is_bool, strict_overflow_p;
1978 r->exp = NULL_TREE;
1979 r->in_p = false;
1980 r->strict_overflow_p = false;
1981 r->low = NULL_TREE;
1982 r->high = NULL_TREE;
1983 if (exp != NULL_TREE
1984 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
1985 return;
1987 /* Start with simply saying "EXP != 0" and then look at the code of EXP
1988 and see if we can refine the range. Some of the cases below may not
1989 happen, but it doesn't seem worth worrying about this. We "continue"
1990 the outer loop when we've changed something; otherwise we "break"
1991 the switch, which will "break" the while. */
1992 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
1993 high = low;
1994 in_p = 0;
1995 strict_overflow_p = false;
1996 is_bool = false;
1997 if (exp == NULL_TREE)
1998 is_bool = true;
1999 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2001 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2002 is_bool = true;
2003 else
2004 return;
2006 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2007 is_bool = true;
2009 while (1)
2011 enum tree_code code;
2012 tree arg0, arg1, exp_type;
2013 tree nexp;
2014 location_t loc;
2016 if (exp != NULL_TREE)
2018 if (TREE_CODE (exp) != SSA_NAME
2019 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2020 break;
2022 stmt = SSA_NAME_DEF_STMT (exp);
2023 if (!is_gimple_assign (stmt))
2024 break;
2026 code = gimple_assign_rhs_code (stmt);
2027 arg0 = gimple_assign_rhs1 (stmt);
2028 arg1 = gimple_assign_rhs2 (stmt);
2029 exp_type = TREE_TYPE (exp);
2031 else
2033 code = gimple_cond_code (stmt);
2034 arg0 = gimple_cond_lhs (stmt);
2035 arg1 = gimple_cond_rhs (stmt);
2036 exp_type = boolean_type_node;
2039 if (TREE_CODE (arg0) != SSA_NAME)
2040 break;
2041 loc = gimple_location (stmt);
2042 switch (code)
2044 case BIT_NOT_EXPR:
2045 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2046 /* Ensure the range is either +[-,0], +[0,0],
2047 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2048 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2049 or similar expression of unconditional true or
2050 false, it should not be negated. */
2051 && ((high && integer_zerop (high))
2052 || (low && integer_onep (low))))
2054 in_p = !in_p;
2055 exp = arg0;
2056 continue;
2058 break;
2059 case SSA_NAME:
2060 exp = arg0;
2061 continue;
2062 CASE_CONVERT:
2063 if (is_bool)
2064 goto do_default;
2065 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2067 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2068 is_bool = true;
2069 else
2070 return;
2072 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2073 is_bool = true;
2074 goto do_default;
2075 case EQ_EXPR:
2076 case NE_EXPR:
2077 case LT_EXPR:
2078 case LE_EXPR:
2079 case GE_EXPR:
2080 case GT_EXPR:
2081 is_bool = true;
2082 /* FALLTHRU */
2083 default:
2084 if (!is_bool)
2085 return;
2086 do_default:
2087 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2088 &low, &high, &in_p,
2089 &strict_overflow_p);
2090 if (nexp != NULL_TREE)
2092 exp = nexp;
2093 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2094 continue;
2096 break;
2098 break;
2100 if (is_bool)
2102 r->exp = exp;
2103 r->in_p = in_p;
2104 r->low = low;
2105 r->high = high;
2106 r->strict_overflow_p = strict_overflow_p;
2110 /* Comparison function for qsort. Sort entries
2111 without SSA_NAME exp first, then with SSA_NAMEs sorted
2112 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2113 by increasing ->low and if ->low is the same, by increasing
2114 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2115 maximum. */
2117 static int
2118 range_entry_cmp (const void *a, const void *b)
2120 const struct range_entry *p = (const struct range_entry *) a;
2121 const struct range_entry *q = (const struct range_entry *) b;
2123 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2125 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2127 /* Group range_entries for the same SSA_NAME together. */
2128 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2129 return -1;
2130 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2131 return 1;
2132 /* If ->low is different, NULL low goes first, then by
2133 ascending low. */
2134 if (p->low != NULL_TREE)
2136 if (q->low != NULL_TREE)
2138 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2139 p->low, q->low);
2140 if (tem && integer_onep (tem))
2141 return -1;
2142 tem = fold_binary (GT_EXPR, boolean_type_node,
2143 p->low, q->low);
2144 if (tem && integer_onep (tem))
2145 return 1;
2147 else
2148 return 1;
2150 else if (q->low != NULL_TREE)
2151 return -1;
2152 /* If ->high is different, NULL high goes last, before that by
2153 ascending high. */
2154 if (p->high != NULL_TREE)
2156 if (q->high != NULL_TREE)
2158 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2159 p->high, q->high);
2160 if (tem && integer_onep (tem))
2161 return -1;
2162 tem = fold_binary (GT_EXPR, boolean_type_node,
2163 p->high, q->high);
2164 if (tem && integer_onep (tem))
2165 return 1;
2167 else
2168 return -1;
2170 else if (q->high != NULL_TREE)
2171 return 1;
2172 /* If both ranges are the same, sort below by ascending idx. */
2174 else
2175 return 1;
2177 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2178 return -1;
2180 if (p->idx < q->idx)
2181 return -1;
2182 else
2184 gcc_checking_assert (p->idx > q->idx);
2185 return 1;
2189 /* Helper routine of optimize_range_test.
2190 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2191 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2192 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2193 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2194 an array of COUNT pointers to other ranges. Return
2195 true if the range merge has been successful.
2196 If OPCODE is ERROR_MARK, this is called from within
2197 maybe_optimize_range_tests and is performing inter-bb range optimization.
2198 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2199 oe->rank. */
2201 static bool
2202 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2203 struct range_entry **otherrangep,
2204 unsigned int count, enum tree_code opcode,
2205 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2206 bool in_p, tree low, tree high, bool strict_overflow_p)
2208 operand_entry *oe = (*ops)[range->idx];
2209 tree op = oe->op;
2210 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2211 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2212 location_t loc = gimple_location (stmt);
2213 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2214 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2215 in_p, low, high);
2216 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2217 gimple_stmt_iterator gsi;
2218 unsigned int i, uid;
2220 if (tem == NULL_TREE)
2221 return false;
2223 /* If op is default def SSA_NAME, there is no place to insert the
2224 new comparison. Give up, unless we can use OP itself as the
2225 range test. */
2226 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2228 if (op == range->exp
2229 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2230 || TREE_CODE (optype) == BOOLEAN_TYPE)
2231 && (op == tem
2232 || (TREE_CODE (tem) == EQ_EXPR
2233 && TREE_OPERAND (tem, 0) == op
2234 && integer_onep (TREE_OPERAND (tem, 1))))
2235 && opcode != BIT_IOR_EXPR
2236 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2238 stmt = NULL;
2239 tem = op;
2241 else
2242 return false;
2245 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2246 warning_at (loc, OPT_Wstrict_overflow,
2247 "assuming signed overflow does not occur "
2248 "when simplifying range test");
2250 if (dump_file && (dump_flags & TDF_DETAILS))
2252 struct range_entry *r;
2253 fprintf (dump_file, "Optimizing range tests ");
2254 print_generic_expr (dump_file, range->exp, 0);
2255 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2256 print_generic_expr (dump_file, range->low, 0);
2257 fprintf (dump_file, ", ");
2258 print_generic_expr (dump_file, range->high, 0);
2259 fprintf (dump_file, "]");
2260 for (i = 0; i < count; i++)
2262 if (otherrange)
2263 r = otherrange + i;
2264 else
2265 r = otherrangep[i];
2266 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2267 print_generic_expr (dump_file, r->low, 0);
2268 fprintf (dump_file, ", ");
2269 print_generic_expr (dump_file, r->high, 0);
2270 fprintf (dump_file, "]");
2272 fprintf (dump_file, "\n into ");
2273 print_generic_expr (dump_file, tem, 0);
2274 fprintf (dump_file, "\n");
2277 if (opcode == BIT_IOR_EXPR
2278 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2279 tem = invert_truthvalue_loc (loc, tem);
2281 tem = fold_convert_loc (loc, optype, tem);
2282 if (stmt)
2284 gsi = gsi_for_stmt (stmt);
2285 uid = gimple_uid (stmt);
2287 else
2289 gsi = gsi_none ();
2290 uid = 0;
2292 if (stmt == NULL)
2293 gcc_checking_assert (tem == op);
2294 /* In rare cases range->exp can be equal to lhs of stmt.
2295 In that case we have to insert after the stmt rather then before
2296 it. If stmt is a PHI, insert it at the start of the basic block. */
2297 else if (op != range->exp)
2299 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2300 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2301 GSI_SAME_STMT);
2302 gsi_prev (&gsi);
2304 else if (gimple_code (stmt) != GIMPLE_PHI)
2306 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2307 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2308 GSI_CONTINUE_LINKING);
2310 else
2312 gsi = gsi_after_labels (gimple_bb (stmt));
2313 if (!gsi_end_p (gsi))
2314 uid = gimple_uid (gsi_stmt (gsi));
2315 else
2317 gsi = gsi_start_bb (gimple_bb (stmt));
2318 uid = 1;
2319 while (!gsi_end_p (gsi))
2321 uid = gimple_uid (gsi_stmt (gsi));
2322 gsi_next (&gsi);
2325 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2326 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2327 GSI_SAME_STMT);
2328 if (gsi_end_p (gsi))
2329 gsi = gsi_last_bb (gimple_bb (stmt));
2330 else
2331 gsi_prev (&gsi);
2333 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2334 if (gimple_uid (gsi_stmt (gsi)))
2335 break;
2336 else
2337 gimple_set_uid (gsi_stmt (gsi), uid);
2339 oe->op = tem;
2340 range->exp = exp;
2341 range->low = low;
2342 range->high = high;
2343 range->in_p = in_p;
2344 range->strict_overflow_p = false;
2346 for (i = 0; i < count; i++)
2348 if (otherrange)
2349 range = otherrange + i;
2350 else
2351 range = otherrangep[i];
2352 oe = (*ops)[range->idx];
2353 /* Now change all the other range test immediate uses, so that
2354 those tests will be optimized away. */
2355 if (opcode == ERROR_MARK)
2357 if (oe->op)
2358 oe->op = build_int_cst (TREE_TYPE (oe->op),
2359 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2360 else
2361 oe->op = (oe->rank == BIT_IOR_EXPR
2362 ? boolean_false_node : boolean_true_node);
2364 else
2365 oe->op = error_mark_node;
2366 range->exp = NULL_TREE;
2368 return true;
2371 /* Optimize X == CST1 || X == CST2
2372 if popcount (CST1 ^ CST2) == 1 into
2373 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2374 Similarly for ranges. E.g.
2375 X != 2 && X != 3 && X != 10 && X != 11
2376 will be transformed by the previous optimization into
2377 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2378 and this loop can transform that into
2379 !(((X & ~8) - 2U) <= 1U). */
2381 static bool
2382 optimize_range_tests_xor (enum tree_code opcode, tree type,
2383 tree lowi, tree lowj, tree highi, tree highj,
2384 vec<operand_entry *> *ops,
2385 struct range_entry *rangei,
2386 struct range_entry *rangej)
2388 tree lowxor, highxor, tem, exp;
2389 /* Check lowi ^ lowj == highi ^ highj and
2390 popcount (lowi ^ lowj) == 1. */
2391 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2392 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2393 return false;
2394 if (!integer_pow2p (lowxor))
2395 return false;
2396 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2397 if (!tree_int_cst_equal (lowxor, highxor))
2398 return false;
2400 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2401 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2402 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2403 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2404 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2405 NULL, rangei->in_p, lowj, highj,
2406 rangei->strict_overflow_p
2407 || rangej->strict_overflow_p))
2408 return true;
2409 return false;
2412 /* Optimize X == CST1 || X == CST2
2413 if popcount (CST2 - CST1) == 1 into
2414 ((X - CST1) & ~(CST2 - CST1)) == 0.
2415 Similarly for ranges. E.g.
2416 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2417 || X == 75 || X == 45
2418 will be transformed by the previous optimization into
2419 (X - 43U) <= 3U || (X - 75U) <= 3U
2420 and this loop can transform that into
2421 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2422 static bool
2423 optimize_range_tests_diff (enum tree_code opcode, tree type,
2424 tree lowi, tree lowj, tree highi, tree highj,
2425 vec<operand_entry *> *ops,
2426 struct range_entry *rangei,
2427 struct range_entry *rangej)
2429 tree tem1, tem2, mask;
2430 /* Check highi - lowi == highj - lowj. */
2431 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2432 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2433 return false;
2434 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2435 if (!tree_int_cst_equal (tem1, tem2))
2436 return false;
2437 /* Check popcount (lowj - lowi) == 1. */
2438 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2439 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2440 return false;
2441 if (!integer_pow2p (tem1))
2442 return false;
2444 type = unsigned_type_for (type);
2445 tem1 = fold_convert (type, tem1);
2446 tem2 = fold_convert (type, tem2);
2447 lowi = fold_convert (type, lowi);
2448 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2449 tem1 = fold_binary (MINUS_EXPR, type,
2450 fold_convert (type, rangei->exp), lowi);
2451 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2452 lowj = build_int_cst (type, 0);
2453 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2454 NULL, rangei->in_p, lowj, tem2,
2455 rangei->strict_overflow_p
2456 || rangej->strict_overflow_p))
2457 return true;
2458 return false;
2461 /* It does some common checks for function optimize_range_tests_xor and
2462 optimize_range_tests_diff.
2463 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2464 Else it calls optimize_range_tests_diff. */
2466 static bool
2467 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2468 bool optimize_xor, vec<operand_entry *> *ops,
2469 struct range_entry *ranges)
2471 int i, j;
2472 bool any_changes = false;
2473 for (i = first; i < length; i++)
2475 tree lowi, highi, lowj, highj, type, tem;
2477 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2478 continue;
2479 type = TREE_TYPE (ranges[i].exp);
2480 if (!INTEGRAL_TYPE_P (type))
2481 continue;
2482 lowi = ranges[i].low;
2483 if (lowi == NULL_TREE)
2484 lowi = TYPE_MIN_VALUE (type);
2485 highi = ranges[i].high;
2486 if (highi == NULL_TREE)
2487 continue;
2488 for (j = i + 1; j < length && j < i + 64; j++)
2490 bool changes;
2491 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2492 continue;
2493 lowj = ranges[j].low;
2494 if (lowj == NULL_TREE)
2495 continue;
2496 highj = ranges[j].high;
2497 if (highj == NULL_TREE)
2498 highj = TYPE_MAX_VALUE (type);
2499 /* Check lowj > highi. */
2500 tem = fold_binary (GT_EXPR, boolean_type_node,
2501 lowj, highi);
2502 if (tem == NULL_TREE || !integer_onep (tem))
2503 continue;
2504 if (optimize_xor)
2505 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2506 highi, highj, ops,
2507 ranges + i, ranges + j);
2508 else
2509 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2510 highi, highj, ops,
2511 ranges + i, ranges + j);
2512 if (changes)
2514 any_changes = true;
2515 break;
2519 return any_changes;
2522 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2523 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2524 EXP on success, NULL otherwise. */
2526 static tree
2527 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2528 wide_int *mask, tree *totallowp)
2530 tree tem = int_const_binop (MINUS_EXPR, high, low);
2531 if (tem == NULL_TREE
2532 || TREE_CODE (tem) != INTEGER_CST
2533 || TREE_OVERFLOW (tem)
2534 || tree_int_cst_sgn (tem) == -1
2535 || compare_tree_int (tem, prec) != -1)
2536 return NULL_TREE;
2538 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2539 *mask = wi::shifted_mask (0, max, false, prec);
2540 if (TREE_CODE (exp) == BIT_AND_EXPR
2541 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2543 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2544 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2545 if (wi::popcount (msk) == 1
2546 && wi::ltu_p (msk, prec - max))
2548 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2549 max += msk.to_uhwi ();
2550 exp = TREE_OPERAND (exp, 0);
2551 if (integer_zerop (low)
2552 && TREE_CODE (exp) == PLUS_EXPR
2553 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2555 tree ret = TREE_OPERAND (exp, 0);
2556 STRIP_NOPS (ret);
2557 widest_int bias
2558 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2559 TYPE_PRECISION (TREE_TYPE (low))));
2560 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2561 if (totallowp)
2563 *totallowp = tbias;
2564 return ret;
2566 else if (!tree_int_cst_lt (totallow, tbias))
2567 return NULL_TREE;
2568 bias = wi::to_widest (tbias);
2569 bias -= wi::to_widest (totallow);
2570 if (bias >= 0 && bias < prec - max)
2572 *mask = wi::lshift (*mask, bias);
2573 return ret;
2578 if (totallowp)
2579 return exp;
2580 if (!tree_int_cst_lt (totallow, low))
2581 return exp;
2582 tem = int_const_binop (MINUS_EXPR, low, totallow);
2583 if (tem == NULL_TREE
2584 || TREE_CODE (tem) != INTEGER_CST
2585 || TREE_OVERFLOW (tem)
2586 || compare_tree_int (tem, prec - max) == 1)
2587 return NULL_TREE;
2589 *mask = wi::lshift (*mask, wi::to_widest (tem));
2590 return exp;
2593 /* Attempt to optimize small range tests using bit test.
2594 E.g.
2595 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2596 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2597 has been by earlier optimizations optimized into:
2598 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2599 As all the 43 through 82 range is less than 64 numbers,
2600 for 64-bit word targets optimize that into:
2601 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2603 static bool
2604 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2605 vec<operand_entry *> *ops,
2606 struct range_entry *ranges)
2608 int i, j;
2609 bool any_changes = false;
2610 int prec = GET_MODE_BITSIZE (word_mode);
2611 auto_vec<struct range_entry *, 64> candidates;
2613 for (i = first; i < length - 2; i++)
2615 tree lowi, highi, lowj, highj, type;
2617 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2618 continue;
2619 type = TREE_TYPE (ranges[i].exp);
2620 if (!INTEGRAL_TYPE_P (type))
2621 continue;
2622 lowi = ranges[i].low;
2623 if (lowi == NULL_TREE)
2624 lowi = TYPE_MIN_VALUE (type);
2625 highi = ranges[i].high;
2626 if (highi == NULL_TREE)
2627 continue;
2628 wide_int mask;
2629 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2630 highi, &mask, &lowi);
2631 if (exp == NULL_TREE)
2632 continue;
2633 bool strict_overflow_p = ranges[i].strict_overflow_p;
2634 candidates.truncate (0);
2635 int end = MIN (i + 64, length);
2636 for (j = i + 1; j < end; j++)
2638 tree exp2;
2639 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2640 continue;
2641 if (ranges[j].exp == exp)
2643 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2645 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2646 if (exp2 == exp)
2648 else if (TREE_CODE (exp2) == PLUS_EXPR)
2650 exp2 = TREE_OPERAND (exp2, 0);
2651 STRIP_NOPS (exp2);
2652 if (exp2 != exp)
2653 continue;
2655 else
2656 continue;
2658 else
2659 continue;
2660 lowj = ranges[j].low;
2661 if (lowj == NULL_TREE)
2662 continue;
2663 highj = ranges[j].high;
2664 if (highj == NULL_TREE)
2665 highj = TYPE_MAX_VALUE (type);
2666 wide_int mask2;
2667 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2668 highj, &mask2, NULL);
2669 if (exp2 != exp)
2670 continue;
2671 mask |= mask2;
2672 strict_overflow_p |= ranges[j].strict_overflow_p;
2673 candidates.safe_push (&ranges[j]);
2676 /* If we need otherwise 3 or more comparisons, use a bit test. */
2677 if (candidates.length () >= 2)
2679 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2680 wi::to_widest (lowi)
2681 + prec - 1 - wi::clz (mask));
2682 operand_entry *oe = (*ops)[ranges[i].idx];
2683 tree op = oe->op;
2684 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2685 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2686 location_t loc = gimple_location (stmt);
2687 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2689 /* See if it isn't cheaper to pretend the minimum value of the
2690 range is 0, if maximum value is small enough.
2691 We can avoid then subtraction of the minimum value, but the
2692 mask constant could be perhaps more expensive. */
2693 if (compare_tree_int (lowi, 0) > 0
2694 && compare_tree_int (high, prec) < 0)
2696 int cost_diff;
2697 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2698 rtx reg = gen_raw_REG (word_mode, 10000);
2699 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2700 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2701 GEN_INT (-m)), speed_p);
2702 rtx r = immed_wide_int_const (mask, word_mode);
2703 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2704 word_mode, speed_p);
2705 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2706 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2707 word_mode, speed_p);
2708 if (cost_diff > 0)
2710 mask = wi::lshift (mask, m);
2711 lowi = build_zero_cst (TREE_TYPE (lowi));
2715 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2716 false, lowi, high);
2717 if (tem == NULL_TREE || is_gimple_val (tem))
2718 continue;
2719 tree etype = unsigned_type_for (TREE_TYPE (exp));
2720 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2721 fold_convert_loc (loc, etype, exp),
2722 fold_convert_loc (loc, etype, lowi));
2723 exp = fold_convert_loc (loc, integer_type_node, exp);
2724 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2725 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2726 build_int_cst (word_type, 1), exp);
2727 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2728 wide_int_to_tree (word_type, mask));
2729 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2730 build_zero_cst (word_type));
2731 if (is_gimple_val (exp))
2732 continue;
2734 /* The shift might have undefined behavior if TEM is true,
2735 but reassociate_bb isn't prepared to have basic blocks
2736 split when it is running. So, temporarily emit a code
2737 with BIT_IOR_EXPR instead of &&, and fix it up in
2738 branch_fixup. */
2739 gimple_seq seq;
2740 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2741 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2742 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2743 gimple_seq seq2;
2744 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2745 gimple_seq_add_seq_without_update (&seq, seq2);
2746 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2747 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2748 gimple *g = gimple_build_assign (make_ssa_name (optype),
2749 BIT_IOR_EXPR, tem, exp);
2750 gimple_set_location (g, loc);
2751 gimple_seq_add_stmt_without_update (&seq, g);
2752 exp = gimple_assign_lhs (g);
2753 tree val = build_zero_cst (optype);
2754 if (update_range_test (&ranges[i], NULL, candidates.address (),
2755 candidates.length (), opcode, ops, exp,
2756 seq, false, val, val, strict_overflow_p))
2758 any_changes = true;
2759 reassoc_branch_fixups.safe_push (tem);
2761 else
2762 gimple_seq_discard (seq);
2765 return any_changes;
2768 /* Optimize range tests, similarly how fold_range_test optimizes
2769 it on trees. The tree code for the binary
2770 operation between all the operands is OPCODE.
2771 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2772 maybe_optimize_range_tests for inter-bb range optimization.
2773 In that case if oe->op is NULL, oe->id is bb->index whose
2774 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2775 the actual opcode. */
2777 static bool
2778 optimize_range_tests (enum tree_code opcode,
2779 vec<operand_entry *> *ops)
2781 unsigned int length = ops->length (), i, j, first;
2782 operand_entry *oe;
2783 struct range_entry *ranges;
2784 bool any_changes = false;
2786 if (length == 1)
2787 return false;
2789 ranges = XNEWVEC (struct range_entry, length);
2790 for (i = 0; i < length; i++)
2792 oe = (*ops)[i];
2793 ranges[i].idx = i;
2794 init_range_entry (ranges + i, oe->op,
2795 oe->op
2796 ? NULL
2797 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2798 /* For | invert it now, we will invert it again before emitting
2799 the optimized expression. */
2800 if (opcode == BIT_IOR_EXPR
2801 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2802 ranges[i].in_p = !ranges[i].in_p;
2805 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2806 for (i = 0; i < length; i++)
2807 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2808 break;
2810 /* Try to merge ranges. */
2811 for (first = i; i < length; i++)
2813 tree low = ranges[i].low;
2814 tree high = ranges[i].high;
2815 int in_p = ranges[i].in_p;
2816 bool strict_overflow_p = ranges[i].strict_overflow_p;
2817 int update_fail_count = 0;
2819 for (j = i + 1; j < length; j++)
2821 if (ranges[i].exp != ranges[j].exp)
2822 break;
2823 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2824 ranges[j].in_p, ranges[j].low, ranges[j].high))
2825 break;
2826 strict_overflow_p |= ranges[j].strict_overflow_p;
2829 if (j == i + 1)
2830 continue;
2832 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2833 opcode, ops, ranges[i].exp, NULL, in_p,
2834 low, high, strict_overflow_p))
2836 i = j - 1;
2837 any_changes = true;
2839 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2840 while update_range_test would fail. */
2841 else if (update_fail_count == 64)
2842 i = j - 1;
2843 else
2844 ++update_fail_count;
2847 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2848 ops, ranges);
2850 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2851 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2852 ops, ranges);
2853 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2854 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2855 ops, ranges);
2857 if (any_changes && opcode != ERROR_MARK)
2859 j = 0;
2860 FOR_EACH_VEC_ELT (*ops, i, oe)
2862 if (oe->op == error_mark_node)
2863 continue;
2864 else if (i != j)
2865 (*ops)[j] = oe;
2866 j++;
2868 ops->truncate (j);
2871 XDELETEVEC (ranges);
2872 return any_changes;
2875 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
2876 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
2877 otherwise the comparison code. */
2879 static tree_code
2880 ovce_extract_ops (tree var, gassign **rets, bool *reti)
2882 if (TREE_CODE (var) != SSA_NAME)
2883 return ERROR_MARK;
2885 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
2886 if (stmt == NULL)
2887 return ERROR_MARK;
2889 /* ??? If we start creating more COND_EXPR, we could perform
2890 this same optimization with them. For now, simplify. */
2891 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
2892 return ERROR_MARK;
2894 tree cond = gimple_assign_rhs1 (stmt);
2895 tree_code cmp = TREE_CODE (cond);
2896 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
2897 return ERROR_MARK;
2899 /* ??? For now, allow only canonical true and false result vectors.
2900 We could expand this to other constants should the need arise,
2901 but at the moment we don't create them. */
2902 tree t = gimple_assign_rhs2 (stmt);
2903 tree f = gimple_assign_rhs3 (stmt);
2904 bool inv;
2905 if (integer_all_onesp (t))
2906 inv = false;
2907 else if (integer_all_onesp (f))
2909 cmp = invert_tree_comparison (cmp, false);
2910 inv = true;
2912 else
2913 return ERROR_MARK;
2914 if (!integer_zerop (f))
2915 return ERROR_MARK;
2917 /* Success! */
2918 if (rets)
2919 *rets = stmt;
2920 if (reti)
2921 *reti = inv;
2922 return cmp;
2925 /* Optimize the condition of VEC_COND_EXPRs which have been combined
2926 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
2928 static bool
2929 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
2931 unsigned int length = ops->length (), i, j;
2932 bool any_changes = false;
2934 if (length == 1)
2935 return false;
2937 for (i = 0; i < length; ++i)
2939 tree elt0 = (*ops)[i]->op;
2941 gassign *stmt0;
2942 bool invert;
2943 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
2944 if (cmp0 == ERROR_MARK)
2945 continue;
2947 for (j = i + 1; j < length; ++j)
2949 tree &elt1 = (*ops)[j]->op;
2951 gassign *stmt1;
2952 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
2953 if (cmp1 == ERROR_MARK)
2954 continue;
2956 tree cond0 = gimple_assign_rhs1 (stmt0);
2957 tree x0 = TREE_OPERAND (cond0, 0);
2958 tree y0 = TREE_OPERAND (cond0, 1);
2960 tree cond1 = gimple_assign_rhs1 (stmt1);
2961 tree x1 = TREE_OPERAND (cond1, 0);
2962 tree y1 = TREE_OPERAND (cond1, 1);
2964 tree comb;
2965 if (opcode == BIT_AND_EXPR)
2966 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2967 else if (opcode == BIT_IOR_EXPR)
2968 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
2969 else
2970 gcc_unreachable ();
2971 if (comb == NULL)
2972 continue;
2974 /* Success! */
2975 if (dump_file && (dump_flags & TDF_DETAILS))
2977 fprintf (dump_file, "Transforming ");
2978 print_generic_expr (dump_file, cond0, 0);
2979 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
2980 print_generic_expr (dump_file, cond1, 0);
2981 fprintf (dump_file, " into ");
2982 print_generic_expr (dump_file, comb, 0);
2983 fputc ('\n', dump_file);
2986 gimple_assign_set_rhs1 (stmt0, comb);
2987 if (invert)
2988 std::swap (*gimple_assign_rhs2_ptr (stmt0),
2989 *gimple_assign_rhs3_ptr (stmt0));
2990 update_stmt (stmt0);
2992 elt1 = error_mark_node;
2993 any_changes = true;
2997 if (any_changes)
2999 operand_entry *oe;
3000 j = 0;
3001 FOR_EACH_VEC_ELT (*ops, i, oe)
3003 if (oe->op == error_mark_node)
3004 continue;
3005 else if (i != j)
3006 (*ops)[j] = oe;
3007 j++;
3009 ops->truncate (j);
3012 return any_changes;
3015 /* Return true if STMT is a cast like:
3016 <bb N>:
3018 _123 = (int) _234;
3020 <bb M>:
3021 # _345 = PHI <_123(N), 1(...), 1(...)>
3022 where _234 has bool type, _123 has single use and
3023 bb N has a single successor M. This is commonly used in
3024 the last block of a range test.
3026 Also Return true if STMT is tcc_compare like:
3027 <bb N>:
3029 _234 = a_2(D) == 2;
3031 <bb M>:
3032 # _345 = PHI <_234(N), 1(...), 1(...)>
3033 _346 = (int) _345;
3034 where _234 has booltype, single use and
3035 bb N has a single successor M. This is commonly used in
3036 the last block of a range test. */
3038 static bool
3039 final_range_test_p (gimple *stmt)
3041 basic_block bb, rhs_bb, lhs_bb;
3042 edge e;
3043 tree lhs, rhs;
3044 use_operand_p use_p;
3045 gimple *use_stmt;
3047 if (!gimple_assign_cast_p (stmt)
3048 && (!is_gimple_assign (stmt)
3049 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3050 != tcc_comparison)))
3051 return false;
3052 bb = gimple_bb (stmt);
3053 if (!single_succ_p (bb))
3054 return false;
3055 e = single_succ_edge (bb);
3056 if (e->flags & EDGE_COMPLEX)
3057 return false;
3059 lhs = gimple_assign_lhs (stmt);
3060 rhs = gimple_assign_rhs1 (stmt);
3061 if (gimple_assign_cast_p (stmt)
3062 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3063 || TREE_CODE (rhs) != SSA_NAME
3064 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3065 return false;
3067 if (!gimple_assign_cast_p (stmt)
3068 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3069 return false;
3071 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3072 if (!single_imm_use (lhs, &use_p, &use_stmt))
3073 return false;
3075 if (gimple_code (use_stmt) != GIMPLE_PHI
3076 || gimple_bb (use_stmt) != e->dest)
3077 return false;
3079 /* And that the rhs is defined in the same loop. */
3080 if (gimple_assign_cast_p (stmt))
3082 if (TREE_CODE (rhs) != SSA_NAME
3083 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3084 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3085 return false;
3087 else
3089 if (TREE_CODE (lhs) != SSA_NAME
3090 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3091 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3092 return false;
3095 return true;
3098 /* Return true if BB is suitable basic block for inter-bb range test
3099 optimization. If BACKWARD is true, BB should be the only predecessor
3100 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3101 or compared with to find a common basic block to which all conditions
3102 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3103 be the only predecessor of BB. */
3105 static bool
3106 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3107 bool backward)
3109 edge_iterator ei, ei2;
3110 edge e, e2;
3111 gimple *stmt;
3112 gphi_iterator gsi;
3113 bool other_edge_seen = false;
3114 bool is_cond;
3116 if (test_bb == bb)
3117 return false;
3118 /* Check last stmt first. */
3119 stmt = last_stmt (bb);
3120 if (stmt == NULL
3121 || (gimple_code (stmt) != GIMPLE_COND
3122 && (backward || !final_range_test_p (stmt)))
3123 || gimple_visited_p (stmt)
3124 || stmt_could_throw_p (stmt)
3125 || *other_bb == bb)
3126 return false;
3127 is_cond = gimple_code (stmt) == GIMPLE_COND;
3128 if (is_cond)
3130 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3131 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3132 to *OTHER_BB (if not set yet, try to find it out). */
3133 if (EDGE_COUNT (bb->succs) != 2)
3134 return false;
3135 FOR_EACH_EDGE (e, ei, bb->succs)
3137 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3138 return false;
3139 if (e->dest == test_bb)
3141 if (backward)
3142 continue;
3143 else
3144 return false;
3146 if (e->dest == bb)
3147 return false;
3148 if (*other_bb == NULL)
3150 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3151 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3152 return false;
3153 else if (e->dest == e2->dest)
3154 *other_bb = e->dest;
3155 if (*other_bb == NULL)
3156 return false;
3158 if (e->dest == *other_bb)
3159 other_edge_seen = true;
3160 else if (backward)
3161 return false;
3163 if (*other_bb == NULL || !other_edge_seen)
3164 return false;
3166 else if (single_succ (bb) != *other_bb)
3167 return false;
3169 /* Now check all PHIs of *OTHER_BB. */
3170 e = find_edge (bb, *other_bb);
3171 e2 = find_edge (test_bb, *other_bb);
3172 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3174 gphi *phi = gsi.phi ();
3175 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3176 corresponding to BB and TEST_BB predecessor must be the same. */
3177 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3178 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3180 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3181 one of the PHIs should have the lhs of the last stmt in
3182 that block as PHI arg and that PHI should have 0 or 1
3183 corresponding to it in all other range test basic blocks
3184 considered. */
3185 if (!is_cond)
3187 if (gimple_phi_arg_def (phi, e->dest_idx)
3188 == gimple_assign_lhs (stmt)
3189 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3190 || integer_onep (gimple_phi_arg_def (phi,
3191 e2->dest_idx))))
3192 continue;
3194 else
3196 gimple *test_last = last_stmt (test_bb);
3197 if (gimple_code (test_last) != GIMPLE_COND
3198 && gimple_phi_arg_def (phi, e2->dest_idx)
3199 == gimple_assign_lhs (test_last)
3200 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3201 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3202 continue;
3205 return false;
3208 return true;
3211 /* Return true if BB doesn't have side-effects that would disallow
3212 range test optimization, all SSA_NAMEs set in the bb are consumed
3213 in the bb and there are no PHIs. */
3215 static bool
3216 no_side_effect_bb (basic_block bb)
3218 gimple_stmt_iterator gsi;
3219 gimple *last;
3221 if (!gimple_seq_empty_p (phi_nodes (bb)))
3222 return false;
3223 last = last_stmt (bb);
3224 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3226 gimple *stmt = gsi_stmt (gsi);
3227 tree lhs;
3228 imm_use_iterator imm_iter;
3229 use_operand_p use_p;
3231 if (is_gimple_debug (stmt))
3232 continue;
3233 if (gimple_has_side_effects (stmt))
3234 return false;
3235 if (stmt == last)
3236 return true;
3237 if (!is_gimple_assign (stmt))
3238 return false;
3239 lhs = gimple_assign_lhs (stmt);
3240 if (TREE_CODE (lhs) != SSA_NAME)
3241 return false;
3242 if (gimple_assign_rhs_could_trap_p (stmt))
3243 return false;
3244 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3246 gimple *use_stmt = USE_STMT (use_p);
3247 if (is_gimple_debug (use_stmt))
3248 continue;
3249 if (gimple_bb (use_stmt) != bb)
3250 return false;
3253 return false;
3256 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3257 return true and fill in *OPS recursively. */
3259 static bool
3260 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3261 struct loop *loop)
3263 gimple *stmt = SSA_NAME_DEF_STMT (var);
3264 tree rhs[2];
3265 int i;
3267 if (!is_reassociable_op (stmt, code, loop))
3268 return false;
3270 rhs[0] = gimple_assign_rhs1 (stmt);
3271 rhs[1] = gimple_assign_rhs2 (stmt);
3272 gimple_set_visited (stmt, true);
3273 for (i = 0; i < 2; i++)
3274 if (TREE_CODE (rhs[i]) == SSA_NAME
3275 && !get_ops (rhs[i], code, ops, loop)
3276 && has_single_use (rhs[i]))
3278 operand_entry *oe = operand_entry_pool.allocate ();
3280 oe->op = rhs[i];
3281 oe->rank = code;
3282 oe->id = 0;
3283 oe->count = 1;
3284 oe->stmt_to_insert = NULL;
3285 ops->safe_push (oe);
3287 return true;
3290 /* Find the ops that were added by get_ops starting from VAR, see if
3291 they were changed during update_range_test and if yes, create new
3292 stmts. */
3294 static tree
3295 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3296 unsigned int *pidx, struct loop *loop)
3298 gimple *stmt = SSA_NAME_DEF_STMT (var);
3299 tree rhs[4];
3300 int i;
3302 if (!is_reassociable_op (stmt, code, loop))
3303 return NULL;
3305 rhs[0] = gimple_assign_rhs1 (stmt);
3306 rhs[1] = gimple_assign_rhs2 (stmt);
3307 rhs[2] = rhs[0];
3308 rhs[3] = rhs[1];
3309 for (i = 0; i < 2; i++)
3310 if (TREE_CODE (rhs[i]) == SSA_NAME)
3312 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3313 if (rhs[2 + i] == NULL_TREE)
3315 if (has_single_use (rhs[i]))
3316 rhs[2 + i] = ops[(*pidx)++]->op;
3317 else
3318 rhs[2 + i] = rhs[i];
3321 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3322 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3324 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3325 var = make_ssa_name (TREE_TYPE (var));
3326 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3327 rhs[2], rhs[3]);
3328 gimple_set_uid (g, gimple_uid (stmt));
3329 gimple_set_visited (g, true);
3330 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3332 return var;
3335 /* Structure to track the initial value passed to get_ops and
3336 the range in the ops vector for each basic block. */
3338 struct inter_bb_range_test_entry
3340 tree op;
3341 unsigned int first_idx, last_idx;
3344 /* Inter-bb range test optimization.
3346 Returns TRUE if a gimple conditional is optimized to a true/false,
3347 otherwise return FALSE.
3349 This indicates to the caller that it should run a CFG cleanup pass
3350 once reassociation is completed. */
3352 static bool
3353 maybe_optimize_range_tests (gimple *stmt)
3355 basic_block first_bb = gimple_bb (stmt);
3356 basic_block last_bb = first_bb;
3357 basic_block other_bb = NULL;
3358 basic_block bb;
3359 edge_iterator ei;
3360 edge e;
3361 auto_vec<operand_entry *> ops;
3362 auto_vec<inter_bb_range_test_entry> bbinfo;
3363 bool any_changes = false;
3364 bool cfg_cleanup_needed = false;
3366 /* Consider only basic blocks that end with GIMPLE_COND or
3367 a cast statement satisfying final_range_test_p. All
3368 but the last bb in the first_bb .. last_bb range
3369 should end with GIMPLE_COND. */
3370 if (gimple_code (stmt) == GIMPLE_COND)
3372 if (EDGE_COUNT (first_bb->succs) != 2)
3373 return cfg_cleanup_needed;
3375 else if (final_range_test_p (stmt))
3376 other_bb = single_succ (first_bb);
3377 else
3378 return cfg_cleanup_needed;
3380 if (stmt_could_throw_p (stmt))
3381 return cfg_cleanup_needed;
3383 /* As relative ordering of post-dominator sons isn't fixed,
3384 maybe_optimize_range_tests can be called first on any
3385 bb in the range we want to optimize. So, start searching
3386 backwards, if first_bb can be set to a predecessor. */
3387 while (single_pred_p (first_bb))
3389 basic_block pred_bb = single_pred (first_bb);
3390 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3391 break;
3392 if (!no_side_effect_bb (first_bb))
3393 break;
3394 first_bb = pred_bb;
3396 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3397 Before starting forward search in last_bb successors, find
3398 out the other_bb. */
3399 if (first_bb == last_bb)
3401 other_bb = NULL;
3402 /* As non-GIMPLE_COND last stmt always terminates the range,
3403 if forward search didn't discover anything, just give up. */
3404 if (gimple_code (stmt) != GIMPLE_COND)
3405 return cfg_cleanup_needed;
3406 /* Look at both successors. Either it ends with a GIMPLE_COND
3407 and satisfies suitable_cond_bb, or ends with a cast and
3408 other_bb is that cast's successor. */
3409 FOR_EACH_EDGE (e, ei, first_bb->succs)
3410 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3411 || e->dest == first_bb)
3412 return cfg_cleanup_needed;
3413 else if (single_pred_p (e->dest))
3415 stmt = last_stmt (e->dest);
3416 if (stmt
3417 && gimple_code (stmt) == GIMPLE_COND
3418 && EDGE_COUNT (e->dest->succs) == 2)
3420 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3421 break;
3422 else
3423 other_bb = NULL;
3425 else if (stmt
3426 && final_range_test_p (stmt)
3427 && find_edge (first_bb, single_succ (e->dest)))
3429 other_bb = single_succ (e->dest);
3430 if (other_bb == first_bb)
3431 other_bb = NULL;
3434 if (other_bb == NULL)
3435 return cfg_cleanup_needed;
3437 /* Now do the forward search, moving last_bb to successor bbs
3438 that aren't other_bb. */
3439 while (EDGE_COUNT (last_bb->succs) == 2)
3441 FOR_EACH_EDGE (e, ei, last_bb->succs)
3442 if (e->dest != other_bb)
3443 break;
3444 if (e == NULL)
3445 break;
3446 if (!single_pred_p (e->dest))
3447 break;
3448 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3449 break;
3450 if (!no_side_effect_bb (e->dest))
3451 break;
3452 last_bb = e->dest;
3454 if (first_bb == last_bb)
3455 return cfg_cleanup_needed;
3456 /* Here basic blocks first_bb through last_bb's predecessor
3457 end with GIMPLE_COND, all of them have one of the edges to
3458 other_bb and another to another block in the range,
3459 all blocks except first_bb don't have side-effects and
3460 last_bb ends with either GIMPLE_COND, or cast satisfying
3461 final_range_test_p. */
3462 for (bb = last_bb; ; bb = single_pred (bb))
3464 enum tree_code code;
3465 tree lhs, rhs;
3466 inter_bb_range_test_entry bb_ent;
3468 bb_ent.op = NULL_TREE;
3469 bb_ent.first_idx = ops.length ();
3470 bb_ent.last_idx = bb_ent.first_idx;
3471 e = find_edge (bb, other_bb);
3472 stmt = last_stmt (bb);
3473 gimple_set_visited (stmt, true);
3474 if (gimple_code (stmt) != GIMPLE_COND)
3476 use_operand_p use_p;
3477 gimple *phi;
3478 edge e2;
3479 unsigned int d;
3481 lhs = gimple_assign_lhs (stmt);
3482 rhs = gimple_assign_rhs1 (stmt);
3483 gcc_assert (bb == last_bb);
3485 /* stmt is
3486 _123 = (int) _234;
3488 _234 = a_2(D) == 2;
3490 followed by:
3491 <bb M>:
3492 # _345 = PHI <_123(N), 1(...), 1(...)>
3494 or 0 instead of 1. If it is 0, the _234
3495 range test is anded together with all the
3496 other range tests, if it is 1, it is ored with
3497 them. */
3498 single_imm_use (lhs, &use_p, &phi);
3499 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3500 e2 = find_edge (first_bb, other_bb);
3501 d = e2->dest_idx;
3502 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3503 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3504 code = BIT_AND_EXPR;
3505 else
3507 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3508 code = BIT_IOR_EXPR;
3511 /* If _234 SSA_NAME_DEF_STMT is
3512 _234 = _567 | _789;
3513 (or &, corresponding to 1/0 in the phi arguments,
3514 push into ops the individual range test arguments
3515 of the bitwise or resp. and, recursively. */
3516 if (TREE_CODE (rhs) == SSA_NAME
3517 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3518 != tcc_comparison)
3519 && !get_ops (rhs, code, &ops,
3520 loop_containing_stmt (stmt))
3521 && has_single_use (rhs))
3523 /* Otherwise, push the _234 range test itself. */
3524 operand_entry *oe = operand_entry_pool.allocate ();
3526 oe->op = rhs;
3527 oe->rank = code;
3528 oe->id = 0;
3529 oe->count = 1;
3530 oe->stmt_to_insert = NULL;
3531 ops.safe_push (oe);
3532 bb_ent.last_idx++;
3533 bb_ent.op = rhs;
3535 else if (is_gimple_assign (stmt)
3536 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3537 == tcc_comparison)
3538 && !get_ops (lhs, code, &ops,
3539 loop_containing_stmt (stmt))
3540 && has_single_use (lhs))
3542 operand_entry *oe = operand_entry_pool.allocate ();
3543 oe->op = lhs;
3544 oe->rank = code;
3545 oe->id = 0;
3546 oe->count = 1;
3547 ops.safe_push (oe);
3548 bb_ent.last_idx++;
3549 bb_ent.op = lhs;
3551 else
3553 bb_ent.last_idx = ops.length ();
3554 bb_ent.op = rhs;
3556 bbinfo.safe_push (bb_ent);
3557 continue;
3559 /* Otherwise stmt is GIMPLE_COND. */
3560 code = gimple_cond_code (stmt);
3561 lhs = gimple_cond_lhs (stmt);
3562 rhs = gimple_cond_rhs (stmt);
3563 if (TREE_CODE (lhs) == SSA_NAME
3564 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3565 && ((code != EQ_EXPR && code != NE_EXPR)
3566 || rhs != boolean_false_node
3567 /* Either push into ops the individual bitwise
3568 or resp. and operands, depending on which
3569 edge is other_bb. */
3570 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3571 ^ (code == EQ_EXPR))
3572 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3573 loop_containing_stmt (stmt))))
3575 /* Or push the GIMPLE_COND stmt itself. */
3576 operand_entry *oe = operand_entry_pool.allocate ();
3578 oe->op = NULL;
3579 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3580 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3581 /* oe->op = NULL signs that there is no SSA_NAME
3582 for the range test, and oe->id instead is the
3583 basic block number, at which's end the GIMPLE_COND
3584 is. */
3585 oe->id = bb->index;
3586 oe->count = 1;
3587 oe->stmt_to_insert = NULL;
3588 ops.safe_push (oe);
3589 bb_ent.op = NULL;
3590 bb_ent.last_idx++;
3592 else if (ops.length () > bb_ent.first_idx)
3594 bb_ent.op = lhs;
3595 bb_ent.last_idx = ops.length ();
3597 bbinfo.safe_push (bb_ent);
3598 if (bb == first_bb)
3599 break;
3601 if (ops.length () > 1)
3602 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3603 if (any_changes)
3605 unsigned int idx, max_idx = 0;
3606 /* update_ops relies on has_single_use predicates returning the
3607 same values as it did during get_ops earlier. Additionally it
3608 never removes statements, only adds new ones and it should walk
3609 from the single imm use and check the predicate already before
3610 making those changes.
3611 On the other side, the handling of GIMPLE_COND directly can turn
3612 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3613 it needs to be done in a separate loop afterwards. */
3614 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3616 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3617 && bbinfo[idx].op != NULL_TREE)
3619 tree new_op;
3621 max_idx = idx;
3622 stmt = last_stmt (bb);
3623 new_op = update_ops (bbinfo[idx].op,
3624 (enum tree_code)
3625 ops[bbinfo[idx].first_idx]->rank,
3626 ops, &bbinfo[idx].first_idx,
3627 loop_containing_stmt (stmt));
3628 if (new_op == NULL_TREE)
3630 gcc_assert (bb == last_bb);
3631 new_op = ops[bbinfo[idx].first_idx++]->op;
3633 if (bbinfo[idx].op != new_op)
3635 imm_use_iterator iter;
3636 use_operand_p use_p;
3637 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
3639 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3640 if (is_gimple_debug (use_stmt))
3641 continue;
3642 else if (gimple_code (use_stmt) == GIMPLE_COND
3643 || gimple_code (use_stmt) == GIMPLE_PHI)
3644 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3645 SET_USE (use_p, new_op);
3646 else if ((is_gimple_assign (use_stmt)
3647 && (TREE_CODE_CLASS
3648 (gimple_assign_rhs_code (use_stmt))
3649 == tcc_comparison)))
3650 cast_or_tcc_cmp_stmt = use_stmt;
3651 else if (gimple_assign_cast_p (use_stmt))
3652 cast_or_tcc_cmp_stmt = use_stmt;
3653 else
3654 gcc_unreachable ();
3656 if (cast_or_tcc_cmp_stmt)
3658 gcc_assert (bb == last_bb);
3659 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
3660 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3661 enum tree_code rhs_code
3662 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
3663 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
3664 : CONVERT_EXPR;
3665 gassign *g;
3666 if (is_gimple_min_invariant (new_op))
3668 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3669 g = gimple_build_assign (new_lhs, new_op);
3671 else
3672 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3673 gimple_stmt_iterator gsi
3674 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
3675 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
3676 gimple_set_visited (g, true);
3677 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3678 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3679 if (is_gimple_debug (use_stmt))
3680 continue;
3681 else if (gimple_code (use_stmt) == GIMPLE_COND
3682 || gimple_code (use_stmt) == GIMPLE_PHI)
3683 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3684 SET_USE (use_p, new_lhs);
3685 else
3686 gcc_unreachable ();
3690 if (bb == first_bb)
3691 break;
3693 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3695 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3696 && bbinfo[idx].op == NULL_TREE
3697 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3699 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3701 if (idx > max_idx)
3702 max_idx = idx;
3704 /* If we collapse the conditional to a true/false
3705 condition, then bubble that knowledge up to our caller. */
3706 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3708 gimple_cond_make_false (cond_stmt);
3709 cfg_cleanup_needed = true;
3711 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3713 gimple_cond_make_true (cond_stmt);
3714 cfg_cleanup_needed = true;
3716 else
3718 gimple_cond_set_code (cond_stmt, NE_EXPR);
3719 gimple_cond_set_lhs (cond_stmt,
3720 ops[bbinfo[idx].first_idx]->op);
3721 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3723 update_stmt (cond_stmt);
3725 if (bb == first_bb)
3726 break;
3729 /* The above changes could result in basic blocks after the first
3730 modified one, up to and including last_bb, to be executed even if
3731 they would not be in the original program. If the value ranges of
3732 assignment lhs' in those bbs were dependent on the conditions
3733 guarding those basic blocks which now can change, the VRs might
3734 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
3735 are only used within the same bb, it should be not a big deal if
3736 we just reset all the VRs in those bbs. See PR68671. */
3737 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
3738 reset_flow_sensitive_info_in_bb (bb);
3740 return cfg_cleanup_needed;
3743 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3744 of STMT in it's operands. This is also known as a "destructive
3745 update" operation. */
3747 static bool
3748 is_phi_for_stmt (gimple *stmt, tree operand)
3750 gimple *def_stmt;
3751 gphi *def_phi;
3752 tree lhs;
3753 use_operand_p arg_p;
3754 ssa_op_iter i;
3756 if (TREE_CODE (operand) != SSA_NAME)
3757 return false;
3759 lhs = gimple_assign_lhs (stmt);
3761 def_stmt = SSA_NAME_DEF_STMT (operand);
3762 def_phi = dyn_cast <gphi *> (def_stmt);
3763 if (!def_phi)
3764 return false;
3766 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3767 if (lhs == USE_FROM_PTR (arg_p))
3768 return true;
3769 return false;
3772 /* Remove def stmt of VAR if VAR has zero uses and recurse
3773 on rhs1 operand if so. */
3775 static void
3776 remove_visited_stmt_chain (tree var)
3778 gimple *stmt;
3779 gimple_stmt_iterator gsi;
3781 while (1)
3783 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3784 return;
3785 stmt = SSA_NAME_DEF_STMT (var);
3786 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3788 var = gimple_assign_rhs1 (stmt);
3789 gsi = gsi_for_stmt (stmt);
3790 reassoc_remove_stmt (&gsi);
3791 release_defs (stmt);
3793 else
3794 return;
3798 /* This function checks three consequtive operands in
3799 passed operands vector OPS starting from OPINDEX and
3800 swaps two operands if it is profitable for binary operation
3801 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3803 We pair ops with the same rank if possible.
3805 The alternative we try is to see if STMT is a destructive
3806 update style statement, which is like:
3807 b = phi (a, ...)
3808 a = c + b;
3809 In that case, we want to use the destructive update form to
3810 expose the possible vectorizer sum reduction opportunity.
3811 In that case, the third operand will be the phi node. This
3812 check is not performed if STMT is null.
3814 We could, of course, try to be better as noted above, and do a
3815 lot of work to try to find these opportunities in >3 operand
3816 cases, but it is unlikely to be worth it. */
3818 static void
3819 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3820 unsigned int opindex, gimple *stmt)
3822 operand_entry *oe1, *oe2, *oe3;
3824 oe1 = ops[opindex];
3825 oe2 = ops[opindex + 1];
3826 oe3 = ops[opindex + 2];
3828 if ((oe1->rank == oe2->rank
3829 && oe2->rank != oe3->rank)
3830 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3831 && !is_phi_for_stmt (stmt, oe1->op)
3832 && !is_phi_for_stmt (stmt, oe2->op)))
3833 std::swap (*oe1, *oe3);
3834 else if ((oe1->rank == oe3->rank
3835 && oe2->rank != oe3->rank)
3836 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3837 && !is_phi_for_stmt (stmt, oe1->op)
3838 && !is_phi_for_stmt (stmt, oe3->op)))
3839 std::swap (*oe1, *oe2);
3842 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3843 two definitions, otherwise return STMT. */
3845 static inline gimple *
3846 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3848 if (TREE_CODE (rhs1) == SSA_NAME
3849 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3850 stmt = SSA_NAME_DEF_STMT (rhs1);
3851 if (TREE_CODE (rhs2) == SSA_NAME
3852 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3853 stmt = SSA_NAME_DEF_STMT (rhs2);
3854 return stmt;
3857 /* If the stmt that defines operand has to be inserted, insert it
3858 before the use. */
3859 static void
3860 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
3862 gcc_assert (is_gimple_assign (stmt_to_insert));
3863 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
3864 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
3865 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
3866 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
3867 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
3869 /* If the insert point is not stmt, then insert_point would be
3870 the point where operand rhs1 or rhs2 is defined. In this case,
3871 stmt_to_insert has to be inserted afterwards. This would
3872 only happen when the stmt insertion point is flexible. */
3873 if (stmt == insert_point)
3874 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
3875 else
3876 insert_stmt_after (stmt_to_insert, insert_point);
3880 /* Recursively rewrite our linearized statements so that the operators
3881 match those in OPS[OPINDEX], putting the computation in rank
3882 order. Return new lhs. */
3884 static tree
3885 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3886 vec<operand_entry *> ops, bool changed)
3888 tree rhs1 = gimple_assign_rhs1 (stmt);
3889 tree rhs2 = gimple_assign_rhs2 (stmt);
3890 tree lhs = gimple_assign_lhs (stmt);
3891 operand_entry *oe;
3893 /* The final recursion case for this function is that you have
3894 exactly two operations left.
3895 If we had exactly one op in the entire list to start with, we
3896 would have never called this function, and the tail recursion
3897 rewrites them one at a time. */
3898 if (opindex + 2 == ops.length ())
3900 operand_entry *oe1, *oe2;
3902 oe1 = ops[opindex];
3903 oe2 = ops[opindex + 1];
3905 if (rhs1 != oe1->op || rhs2 != oe2->op)
3907 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3908 unsigned int uid = gimple_uid (stmt);
3910 if (dump_file && (dump_flags & TDF_DETAILS))
3912 fprintf (dump_file, "Transforming ");
3913 print_gimple_stmt (dump_file, stmt, 0, 0);
3916 /* If the stmt that defines operand has to be inserted, insert it
3917 before the use. */
3918 if (oe1->stmt_to_insert)
3919 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
3920 if (oe2->stmt_to_insert)
3921 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
3922 /* Even when changed is false, reassociation could have e.g. removed
3923 some redundant operations, so unless we are just swapping the
3924 arguments or unless there is no change at all (then we just
3925 return lhs), force creation of a new SSA_NAME. */
3926 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3928 gimple *insert_point
3929 = find_insert_point (stmt, oe1->op, oe2->op);
3930 lhs = make_ssa_name (TREE_TYPE (lhs));
3931 stmt
3932 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3933 oe1->op, oe2->op);
3934 gimple_set_uid (stmt, uid);
3935 gimple_set_visited (stmt, true);
3936 if (insert_point == gsi_stmt (gsi))
3937 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3938 else
3939 insert_stmt_after (stmt, insert_point);
3941 else
3943 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
3944 == stmt);
3945 gimple_assign_set_rhs1 (stmt, oe1->op);
3946 gimple_assign_set_rhs2 (stmt, oe2->op);
3947 update_stmt (stmt);
3950 if (rhs1 != oe1->op && rhs1 != oe2->op)
3951 remove_visited_stmt_chain (rhs1);
3953 if (dump_file && (dump_flags & TDF_DETAILS))
3955 fprintf (dump_file, " into ");
3956 print_gimple_stmt (dump_file, stmt, 0, 0);
3959 return lhs;
3962 /* If we hit here, we should have 3 or more ops left. */
3963 gcc_assert (opindex + 2 < ops.length ());
3965 /* Rewrite the next operator. */
3966 oe = ops[opindex];
3968 /* If the stmt that defines operand has to be inserted, insert it
3969 before the use. */
3970 if (oe->stmt_to_insert)
3971 insert_stmt_before_use (stmt, oe->stmt_to_insert);
3973 /* Recurse on the LHS of the binary operator, which is guaranteed to
3974 be the non-leaf side. */
3975 tree new_rhs1
3976 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
3977 changed || oe->op != rhs2);
3979 if (oe->op != rhs2 || new_rhs1 != rhs1)
3981 if (dump_file && (dump_flags & TDF_DETAILS))
3983 fprintf (dump_file, "Transforming ");
3984 print_gimple_stmt (dump_file, stmt, 0, 0);
3987 /* If changed is false, this is either opindex == 0
3988 or all outer rhs2's were equal to corresponding oe->op,
3989 and powi_result is NULL.
3990 That means lhs is equivalent before and after reassociation.
3991 Otherwise ensure the old lhs SSA_NAME is not reused and
3992 create a new stmt as well, so that any debug stmts will be
3993 properly adjusted. */
3994 if (changed)
3996 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3997 unsigned int uid = gimple_uid (stmt);
3998 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4000 lhs = make_ssa_name (TREE_TYPE (lhs));
4001 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4002 new_rhs1, oe->op);
4003 gimple_set_uid (stmt, uid);
4004 gimple_set_visited (stmt, true);
4005 if (insert_point == gsi_stmt (gsi))
4006 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4007 else
4008 insert_stmt_after (stmt, insert_point);
4010 else
4012 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4013 == stmt);
4014 gimple_assign_set_rhs1 (stmt, new_rhs1);
4015 gimple_assign_set_rhs2 (stmt, oe->op);
4016 update_stmt (stmt);
4019 if (dump_file && (dump_flags & TDF_DETAILS))
4021 fprintf (dump_file, " into ");
4022 print_gimple_stmt (dump_file, stmt, 0, 0);
4025 return lhs;
4028 /* Find out how many cycles we need to compute statements chain.
4029 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4030 maximum number of independent statements we may execute per cycle. */
4032 static int
4033 get_required_cycles (int ops_num, int cpu_width)
4035 int res;
4036 int elog;
4037 unsigned int rest;
4039 /* While we have more than 2 * cpu_width operands
4040 we may reduce number of operands by cpu_width
4041 per cycle. */
4042 res = ops_num / (2 * cpu_width);
4044 /* Remained operands count may be reduced twice per cycle
4045 until we have only one operand. */
4046 rest = (unsigned)(ops_num - res * cpu_width);
4047 elog = exact_log2 (rest);
4048 if (elog >= 0)
4049 res += elog;
4050 else
4051 res += floor_log2 (rest) + 1;
4053 return res;
4056 /* Returns an optimal number of registers to use for computation of
4057 given statements. */
4059 static int
4060 get_reassociation_width (int ops_num, enum tree_code opc,
4061 machine_mode mode)
4063 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4064 int width;
4065 int width_min;
4066 int cycles_best;
4068 if (param_width > 0)
4069 width = param_width;
4070 else
4071 width = targetm.sched.reassociation_width (opc, mode);
4073 if (width == 1)
4074 return width;
4076 /* Get the minimal time required for sequence computation. */
4077 cycles_best = get_required_cycles (ops_num, width);
4079 /* Check if we may use less width and still compute sequence for
4080 the same time. It will allow us to reduce registers usage.
4081 get_required_cycles is monotonically increasing with lower width
4082 so we can perform a binary search for the minimal width that still
4083 results in the optimal cycle count. */
4084 width_min = 1;
4085 while (width > width_min)
4087 int width_mid = (width + width_min) / 2;
4089 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4090 width = width_mid;
4091 else if (width_min < width_mid)
4092 width_min = width_mid;
4093 else
4094 break;
4097 return width;
4100 /* Recursively rewrite our linearized statements so that the operators
4101 match those in OPS[OPINDEX], putting the computation in rank
4102 order and trying to allow operations to be executed in
4103 parallel. */
4105 static void
4106 rewrite_expr_tree_parallel (gassign *stmt, int width,
4107 vec<operand_entry *> ops)
4109 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4110 int op_num = ops.length ();
4111 int stmt_num = op_num - 1;
4112 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4113 int op_index = op_num - 1;
4114 int stmt_index = 0;
4115 int ready_stmts_end = 0;
4116 int i = 0;
4117 gimple *stmt1 = NULL, *stmt2 = NULL;
4118 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4120 /* We start expression rewriting from the top statements.
4121 So, in this loop we create a full list of statements
4122 we will work with. */
4123 stmts[stmt_num - 1] = stmt;
4124 for (i = stmt_num - 2; i >= 0; i--)
4125 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4127 for (i = 0; i < stmt_num; i++)
4129 tree op1, op2;
4131 /* Determine whether we should use results of
4132 already handled statements or not. */
4133 if (ready_stmts_end == 0
4134 && (i - stmt_index >= width || op_index < 1))
4135 ready_stmts_end = i;
4137 /* Now we choose operands for the next statement. Non zero
4138 value in ready_stmts_end means here that we should use
4139 the result of already generated statements as new operand. */
4140 if (ready_stmts_end > 0)
4142 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4143 if (ready_stmts_end > stmt_index)
4144 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4145 else if (op_index >= 0)
4147 operand_entry *oe = ops[op_index--];
4148 stmt2 = oe->stmt_to_insert;
4149 op2 = oe->op;
4151 else
4153 gcc_assert (stmt_index < i);
4154 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4157 if (stmt_index >= ready_stmts_end)
4158 ready_stmts_end = 0;
4160 else
4162 if (op_index > 1)
4163 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4164 operand_entry *oe2 = ops[op_index--];
4165 operand_entry *oe1 = ops[op_index--];
4166 op2 = oe2->op;
4167 stmt2 = oe2->stmt_to_insert;
4168 op1 = oe1->op;
4169 stmt1 = oe1->stmt_to_insert;
4172 /* If we emit the last statement then we should put
4173 operands into the last statement. It will also
4174 break the loop. */
4175 if (op_index < 0 && stmt_index == i)
4176 i = stmt_num - 1;
4178 if (dump_file && (dump_flags & TDF_DETAILS))
4180 fprintf (dump_file, "Transforming ");
4181 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4184 /* If the stmt that defines operand has to be inserted, insert it
4185 before the use. */
4186 if (stmt1)
4187 insert_stmt_before_use (stmts[i], stmt1);
4188 if (stmt2)
4189 insert_stmt_before_use (stmts[i], stmt2);
4190 stmt1 = stmt2 = NULL;
4192 /* We keep original statement only for the last one. All
4193 others are recreated. */
4194 if (i == stmt_num - 1)
4196 gimple_assign_set_rhs1 (stmts[i], op1);
4197 gimple_assign_set_rhs2 (stmts[i], op2);
4198 update_stmt (stmts[i]);
4200 else
4202 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4204 if (dump_file && (dump_flags & TDF_DETAILS))
4206 fprintf (dump_file, " into ");
4207 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4211 remove_visited_stmt_chain (last_rhs1);
4214 /* Transform STMT, which is really (A +B) + (C + D) into the left
4215 linear form, ((A+B)+C)+D.
4216 Recurse on D if necessary. */
4218 static void
4219 linearize_expr (gimple *stmt)
4221 gimple_stmt_iterator gsi;
4222 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4223 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4224 gimple *oldbinrhs = binrhs;
4225 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4226 gimple *newbinrhs = NULL;
4227 struct loop *loop = loop_containing_stmt (stmt);
4228 tree lhs = gimple_assign_lhs (stmt);
4230 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4231 && is_reassociable_op (binrhs, rhscode, loop));
4233 gsi = gsi_for_stmt (stmt);
4235 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4236 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4237 gimple_assign_rhs_code (binrhs),
4238 gimple_assign_lhs (binlhs),
4239 gimple_assign_rhs2 (binrhs));
4240 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4241 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4242 gimple_set_uid (binrhs, gimple_uid (stmt));
4244 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4245 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4247 if (dump_file && (dump_flags & TDF_DETAILS))
4249 fprintf (dump_file, "Linearized: ");
4250 print_gimple_stmt (dump_file, stmt, 0, 0);
4253 reassociate_stats.linearized++;
4254 update_stmt (stmt);
4256 gsi = gsi_for_stmt (oldbinrhs);
4257 reassoc_remove_stmt (&gsi);
4258 release_defs (oldbinrhs);
4260 gimple_set_visited (stmt, true);
4261 gimple_set_visited (binlhs, true);
4262 gimple_set_visited (binrhs, true);
4264 /* Tail recurse on the new rhs if it still needs reassociation. */
4265 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4266 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4267 want to change the algorithm while converting to tuples. */
4268 linearize_expr (stmt);
4271 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4272 it. Otherwise, return NULL. */
4274 static gimple *
4275 get_single_immediate_use (tree lhs)
4277 use_operand_p immuse;
4278 gimple *immusestmt;
4280 if (TREE_CODE (lhs) == SSA_NAME
4281 && single_imm_use (lhs, &immuse, &immusestmt)
4282 && is_gimple_assign (immusestmt))
4283 return immusestmt;
4285 return NULL;
4288 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4289 representing the negated value. Insertions of any necessary
4290 instructions go before GSI.
4291 This function is recursive in that, if you hand it "a_5" as the
4292 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4293 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4295 static tree
4296 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4298 gimple *negatedefstmt = NULL;
4299 tree resultofnegate;
4300 gimple_stmt_iterator gsi;
4301 unsigned int uid;
4303 /* If we are trying to negate a name, defined by an add, negate the
4304 add operands instead. */
4305 if (TREE_CODE (tonegate) == SSA_NAME)
4306 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4307 if (TREE_CODE (tonegate) == SSA_NAME
4308 && is_gimple_assign (negatedefstmt)
4309 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4310 && has_single_use (gimple_assign_lhs (negatedefstmt))
4311 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4313 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4314 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4315 tree lhs = gimple_assign_lhs (negatedefstmt);
4316 gimple *g;
4318 gsi = gsi_for_stmt (negatedefstmt);
4319 rhs1 = negate_value (rhs1, &gsi);
4321 gsi = gsi_for_stmt (negatedefstmt);
4322 rhs2 = negate_value (rhs2, &gsi);
4324 gsi = gsi_for_stmt (negatedefstmt);
4325 lhs = make_ssa_name (TREE_TYPE (lhs));
4326 gimple_set_visited (negatedefstmt, true);
4327 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4328 gimple_set_uid (g, gimple_uid (negatedefstmt));
4329 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4330 return lhs;
4333 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4334 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4335 NULL_TREE, true, GSI_SAME_STMT);
4336 gsi = *gsip;
4337 uid = gimple_uid (gsi_stmt (gsi));
4338 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4340 gimple *stmt = gsi_stmt (gsi);
4341 if (gimple_uid (stmt) != 0)
4342 break;
4343 gimple_set_uid (stmt, uid);
4345 return resultofnegate;
4348 /* Return true if we should break up the subtract in STMT into an add
4349 with negate. This is true when we the subtract operands are really
4350 adds, or the subtract itself is used in an add expression. In
4351 either case, breaking up the subtract into an add with negate
4352 exposes the adds to reassociation. */
4354 static bool
4355 should_break_up_subtract (gimple *stmt)
4357 tree lhs = gimple_assign_lhs (stmt);
4358 tree binlhs = gimple_assign_rhs1 (stmt);
4359 tree binrhs = gimple_assign_rhs2 (stmt);
4360 gimple *immusestmt;
4361 struct loop *loop = loop_containing_stmt (stmt);
4363 if (TREE_CODE (binlhs) == SSA_NAME
4364 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4365 return true;
4367 if (TREE_CODE (binrhs) == SSA_NAME
4368 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4369 return true;
4371 if (TREE_CODE (lhs) == SSA_NAME
4372 && (immusestmt = get_single_immediate_use (lhs))
4373 && is_gimple_assign (immusestmt)
4374 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4375 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4376 return true;
4377 return false;
4380 /* Transform STMT from A - B into A + -B. */
4382 static void
4383 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4385 tree rhs1 = gimple_assign_rhs1 (stmt);
4386 tree rhs2 = gimple_assign_rhs2 (stmt);
4388 if (dump_file && (dump_flags & TDF_DETAILS))
4390 fprintf (dump_file, "Breaking up subtract ");
4391 print_gimple_stmt (dump_file, stmt, 0, 0);
4394 rhs2 = negate_value (rhs2, gsip);
4395 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4396 update_stmt (stmt);
4399 /* Determine whether STMT is a builtin call that raises an SSA name
4400 to an integer power and has only one use. If so, and this is early
4401 reassociation and unsafe math optimizations are permitted, place
4402 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4403 If any of these conditions does not hold, return FALSE. */
4405 static bool
4406 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4408 tree arg1;
4409 REAL_VALUE_TYPE c, cint;
4411 switch (gimple_call_combined_fn (stmt))
4413 CASE_CFN_POW:
4414 if (flag_errno_math)
4415 return false;
4417 *base = gimple_call_arg (stmt, 0);
4418 arg1 = gimple_call_arg (stmt, 1);
4420 if (TREE_CODE (arg1) != REAL_CST)
4421 return false;
4423 c = TREE_REAL_CST (arg1);
4425 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4426 return false;
4428 *exponent = real_to_integer (&c);
4429 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4430 if (!real_identical (&c, &cint))
4431 return false;
4433 break;
4435 CASE_CFN_POWI:
4436 *base = gimple_call_arg (stmt, 0);
4437 arg1 = gimple_call_arg (stmt, 1);
4439 if (!tree_fits_shwi_p (arg1))
4440 return false;
4442 *exponent = tree_to_shwi (arg1);
4443 break;
4445 default:
4446 return false;
4449 /* Expanding negative exponents is generally unproductive, so we don't
4450 complicate matters with those. Exponents of zero and one should
4451 have been handled by expression folding. */
4452 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4453 return false;
4455 return true;
4458 /* Try to derive and add operand entry for OP to *OPS. Return false if
4459 unsuccessful. */
4461 static bool
4462 try_special_add_to_ops (vec<operand_entry *> *ops,
4463 enum tree_code code,
4464 tree op, gimple* def_stmt)
4466 tree base = NULL_TREE;
4467 HOST_WIDE_INT exponent = 0;
4469 if (TREE_CODE (op) != SSA_NAME
4470 || ! has_single_use (op))
4471 return false;
4473 if (code == MULT_EXPR
4474 && reassoc_insert_powi_p
4475 && flag_unsafe_math_optimizations
4476 && is_gimple_call (def_stmt)
4477 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4479 add_repeat_to_ops_vec (ops, base, exponent);
4480 gimple_set_visited (def_stmt, true);
4481 return true;
4483 else if (code == MULT_EXPR
4484 && is_gimple_assign (def_stmt)
4485 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4486 && !HONOR_SNANS (TREE_TYPE (op))
4487 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4488 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4490 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4491 tree cst = build_minus_one_cst (TREE_TYPE (op));
4492 add_to_ops_vec (ops, rhs1);
4493 add_to_ops_vec (ops, cst);
4494 gimple_set_visited (def_stmt, true);
4495 return true;
4498 return false;
4501 /* Recursively linearize a binary expression that is the RHS of STMT.
4502 Place the operands of the expression tree in the vector named OPS. */
4504 static void
4505 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4506 bool is_associative, bool set_visited)
4508 tree binlhs = gimple_assign_rhs1 (stmt);
4509 tree binrhs = gimple_assign_rhs2 (stmt);
4510 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4511 bool binlhsisreassoc = false;
4512 bool binrhsisreassoc = false;
4513 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4514 struct loop *loop = loop_containing_stmt (stmt);
4516 if (set_visited)
4517 gimple_set_visited (stmt, true);
4519 if (TREE_CODE (binlhs) == SSA_NAME)
4521 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4522 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4523 && !stmt_could_throw_p (binlhsdef));
4526 if (TREE_CODE (binrhs) == SSA_NAME)
4528 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4529 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4530 && !stmt_could_throw_p (binrhsdef));
4533 /* If the LHS is not reassociable, but the RHS is, we need to swap
4534 them. If neither is reassociable, there is nothing we can do, so
4535 just put them in the ops vector. If the LHS is reassociable,
4536 linearize it. If both are reassociable, then linearize the RHS
4537 and the LHS. */
4539 if (!binlhsisreassoc)
4541 /* If this is not a associative operation like division, give up. */
4542 if (!is_associative)
4544 add_to_ops_vec (ops, binrhs);
4545 return;
4548 if (!binrhsisreassoc)
4550 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4551 add_to_ops_vec (ops, binrhs);
4553 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
4554 add_to_ops_vec (ops, binlhs);
4556 return;
4559 if (dump_file && (dump_flags & TDF_DETAILS))
4561 fprintf (dump_file, "swapping operands of ");
4562 print_gimple_stmt (dump_file, stmt, 0, 0);
4565 swap_ssa_operands (stmt,
4566 gimple_assign_rhs1_ptr (stmt),
4567 gimple_assign_rhs2_ptr (stmt));
4568 update_stmt (stmt);
4570 if (dump_file && (dump_flags & TDF_DETAILS))
4572 fprintf (dump_file, " is now ");
4573 print_gimple_stmt (dump_file, stmt, 0, 0);
4576 /* We want to make it so the lhs is always the reassociative op,
4577 so swap. */
4578 std::swap (binlhs, binrhs);
4580 else if (binrhsisreassoc)
4582 linearize_expr (stmt);
4583 binlhs = gimple_assign_rhs1 (stmt);
4584 binrhs = gimple_assign_rhs2 (stmt);
4587 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4588 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4589 rhscode, loop));
4590 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4591 is_associative, set_visited);
4593 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4594 add_to_ops_vec (ops, binrhs);
4597 /* Repropagate the negates back into subtracts, since no other pass
4598 currently does it. */
4600 static void
4601 repropagate_negates (void)
4603 unsigned int i = 0;
4604 tree negate;
4606 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4608 gimple *user = get_single_immediate_use (negate);
4610 if (!user || !is_gimple_assign (user))
4611 continue;
4613 /* The negate operand can be either operand of a PLUS_EXPR
4614 (it can be the LHS if the RHS is a constant for example).
4616 Force the negate operand to the RHS of the PLUS_EXPR, then
4617 transform the PLUS_EXPR into a MINUS_EXPR. */
4618 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4620 /* If the negated operand appears on the LHS of the
4621 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4622 to force the negated operand to the RHS of the PLUS_EXPR. */
4623 if (gimple_assign_rhs1 (user) == negate)
4625 swap_ssa_operands (user,
4626 gimple_assign_rhs1_ptr (user),
4627 gimple_assign_rhs2_ptr (user));
4630 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4631 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4632 if (gimple_assign_rhs2 (user) == negate)
4634 tree rhs1 = gimple_assign_rhs1 (user);
4635 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4636 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4637 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4638 update_stmt (user);
4641 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4643 if (gimple_assign_rhs1 (user) == negate)
4645 /* We have
4646 x = -a
4647 y = x - b
4648 which we transform into
4649 x = a + b
4650 y = -x .
4651 This pushes down the negate which we possibly can merge
4652 into some other operation, hence insert it into the
4653 plus_negates vector. */
4654 gimple *feed = SSA_NAME_DEF_STMT (negate);
4655 tree a = gimple_assign_rhs1 (feed);
4656 tree b = gimple_assign_rhs2 (user);
4657 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4658 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4659 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4660 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4661 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4662 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4663 user = gsi_stmt (gsi2);
4664 update_stmt (user);
4665 reassoc_remove_stmt (&gsi);
4666 release_defs (feed);
4667 plus_negates.safe_push (gimple_assign_lhs (user));
4669 else
4671 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4672 rid of one operation. */
4673 gimple *feed = SSA_NAME_DEF_STMT (negate);
4674 tree a = gimple_assign_rhs1 (feed);
4675 tree rhs1 = gimple_assign_rhs1 (user);
4676 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4677 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4678 update_stmt (gsi_stmt (gsi));
4684 /* Returns true if OP is of a type for which we can do reassociation.
4685 That is for integral or non-saturating fixed-point types, and for
4686 floating point type when associative-math is enabled. */
4688 static bool
4689 can_reassociate_p (tree op)
4691 tree type = TREE_TYPE (op);
4692 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4693 || NON_SAT_FIXED_POINT_TYPE_P (type)
4694 || (flag_associative_math && FLOAT_TYPE_P (type)))
4695 return true;
4696 return false;
4699 /* Break up subtract operations in block BB.
4701 We do this top down because we don't know whether the subtract is
4702 part of a possible chain of reassociation except at the top.
4704 IE given
4705 d = f + g
4706 c = a + e
4707 b = c - d
4708 q = b - r
4709 k = t - q
4711 we want to break up k = t - q, but we won't until we've transformed q
4712 = b - r, which won't be broken up until we transform b = c - d.
4714 En passant, clear the GIMPLE visited flag on every statement
4715 and set UIDs within each basic block. */
4717 static void
4718 break_up_subtract_bb (basic_block bb)
4720 gimple_stmt_iterator gsi;
4721 basic_block son;
4722 unsigned int uid = 1;
4724 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4726 gimple *stmt = gsi_stmt (gsi);
4727 gimple_set_visited (stmt, false);
4728 gimple_set_uid (stmt, uid++);
4730 if (!is_gimple_assign (stmt)
4731 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4732 continue;
4734 /* Look for simple gimple subtract operations. */
4735 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4737 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4738 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4739 continue;
4741 /* Check for a subtract used only in an addition. If this
4742 is the case, transform it into add of a negate for better
4743 reassociation. IE transform C = A-B into C = A + -B if C
4744 is only used in an addition. */
4745 if (should_break_up_subtract (stmt))
4746 break_up_subtract (stmt, &gsi);
4748 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4749 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4750 plus_negates.safe_push (gimple_assign_lhs (stmt));
4752 for (son = first_dom_son (CDI_DOMINATORS, bb);
4753 son;
4754 son = next_dom_son (CDI_DOMINATORS, son))
4755 break_up_subtract_bb (son);
4758 /* Used for repeated factor analysis. */
4759 struct repeat_factor
4761 /* An SSA name that occurs in a multiply chain. */
4762 tree factor;
4764 /* Cached rank of the factor. */
4765 unsigned rank;
4767 /* Number of occurrences of the factor in the chain. */
4768 HOST_WIDE_INT count;
4770 /* An SSA name representing the product of this factor and
4771 all factors appearing later in the repeated factor vector. */
4772 tree repr;
4776 static vec<repeat_factor> repeat_factor_vec;
4778 /* Used for sorting the repeat factor vector. Sort primarily by
4779 ascending occurrence count, secondarily by descending rank. */
4781 static int
4782 compare_repeat_factors (const void *x1, const void *x2)
4784 const repeat_factor *rf1 = (const repeat_factor *) x1;
4785 const repeat_factor *rf2 = (const repeat_factor *) x2;
4787 if (rf1->count != rf2->count)
4788 return rf1->count - rf2->count;
4790 return rf2->rank - rf1->rank;
4793 /* Look for repeated operands in OPS in the multiply tree rooted at
4794 STMT. Replace them with an optimal sequence of multiplies and powi
4795 builtin calls, and remove the used operands from OPS. Return an
4796 SSA name representing the value of the replacement sequence. */
4798 static tree
4799 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4801 unsigned i, j, vec_len;
4802 int ii;
4803 operand_entry *oe;
4804 repeat_factor *rf1, *rf2;
4805 repeat_factor rfnew;
4806 tree result = NULL_TREE;
4807 tree target_ssa, iter_result;
4808 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4809 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4810 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4811 gimple *mul_stmt, *pow_stmt;
4813 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4814 target. */
4815 if (!powi_fndecl)
4816 return NULL_TREE;
4818 /* Allocate the repeated factor vector. */
4819 repeat_factor_vec.create (10);
4821 /* Scan the OPS vector for all SSA names in the product and build
4822 up a vector of occurrence counts for each factor. */
4823 FOR_EACH_VEC_ELT (*ops, i, oe)
4825 if (TREE_CODE (oe->op) == SSA_NAME)
4827 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4829 if (rf1->factor == oe->op)
4831 rf1->count += oe->count;
4832 break;
4836 if (j >= repeat_factor_vec.length ())
4838 rfnew.factor = oe->op;
4839 rfnew.rank = oe->rank;
4840 rfnew.count = oe->count;
4841 rfnew.repr = NULL_TREE;
4842 repeat_factor_vec.safe_push (rfnew);
4847 /* Sort the repeated factor vector by (a) increasing occurrence count,
4848 and (b) decreasing rank. */
4849 repeat_factor_vec.qsort (compare_repeat_factors);
4851 /* It is generally best to combine as many base factors as possible
4852 into a product before applying __builtin_powi to the result.
4853 However, the sort order chosen for the repeated factor vector
4854 allows us to cache partial results for the product of the base
4855 factors for subsequent use. When we already have a cached partial
4856 result from a previous iteration, it is best to make use of it
4857 before looking for another __builtin_pow opportunity.
4859 As an example, consider x * x * y * y * y * z * z * z * z.
4860 We want to first compose the product x * y * z, raise it to the
4861 second power, then multiply this by y * z, and finally multiply
4862 by z. This can be done in 5 multiplies provided we cache y * z
4863 for use in both expressions:
4865 t1 = y * z
4866 t2 = t1 * x
4867 t3 = t2 * t2
4868 t4 = t1 * t3
4869 result = t4 * z
4871 If we instead ignored the cached y * z and first multiplied by
4872 the __builtin_pow opportunity z * z, we would get the inferior:
4874 t1 = y * z
4875 t2 = t1 * x
4876 t3 = t2 * t2
4877 t4 = z * z
4878 t5 = t3 * t4
4879 result = t5 * y */
4881 vec_len = repeat_factor_vec.length ();
4883 /* Repeatedly look for opportunities to create a builtin_powi call. */
4884 while (true)
4886 HOST_WIDE_INT power;
4888 /* First look for the largest cached product of factors from
4889 preceding iterations. If found, create a builtin_powi for
4890 it if the minimum occurrence count for its factors is at
4891 least 2, or just use this cached product as our next
4892 multiplicand if the minimum occurrence count is 1. */
4893 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4895 if (rf1->repr && rf1->count > 0)
4896 break;
4899 if (j < vec_len)
4901 power = rf1->count;
4903 if (power == 1)
4905 iter_result = rf1->repr;
4907 if (dump_file && (dump_flags & TDF_DETAILS))
4909 unsigned elt;
4910 repeat_factor *rf;
4911 fputs ("Multiplying by cached product ", dump_file);
4912 for (elt = j; elt < vec_len; elt++)
4914 rf = &repeat_factor_vec[elt];
4915 print_generic_expr (dump_file, rf->factor, 0);
4916 if (elt < vec_len - 1)
4917 fputs (" * ", dump_file);
4919 fputs ("\n", dump_file);
4922 else
4924 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4925 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4926 build_int_cst (integer_type_node,
4927 power));
4928 gimple_call_set_lhs (pow_stmt, iter_result);
4929 gimple_set_location (pow_stmt, gimple_location (stmt));
4930 gimple_set_uid (pow_stmt, gimple_uid (stmt));
4931 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4933 if (dump_file && (dump_flags & TDF_DETAILS))
4935 unsigned elt;
4936 repeat_factor *rf;
4937 fputs ("Building __builtin_pow call for cached product (",
4938 dump_file);
4939 for (elt = j; elt < vec_len; elt++)
4941 rf = &repeat_factor_vec[elt];
4942 print_generic_expr (dump_file, rf->factor, 0);
4943 if (elt < vec_len - 1)
4944 fputs (" * ", dump_file);
4946 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
4947 power);
4951 else
4953 /* Otherwise, find the first factor in the repeated factor
4954 vector whose occurrence count is at least 2. If no such
4955 factor exists, there are no builtin_powi opportunities
4956 remaining. */
4957 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4959 if (rf1->count >= 2)
4960 break;
4963 if (j >= vec_len)
4964 break;
4966 power = rf1->count;
4968 if (dump_file && (dump_flags & TDF_DETAILS))
4970 unsigned elt;
4971 repeat_factor *rf;
4972 fputs ("Building __builtin_pow call for (", dump_file);
4973 for (elt = j; elt < vec_len; elt++)
4975 rf = &repeat_factor_vec[elt];
4976 print_generic_expr (dump_file, rf->factor, 0);
4977 if (elt < vec_len - 1)
4978 fputs (" * ", dump_file);
4980 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
4983 reassociate_stats.pows_created++;
4985 /* Visit each element of the vector in reverse order (so that
4986 high-occurrence elements are visited first, and within the
4987 same occurrence count, lower-ranked elements are visited
4988 first). Form a linear product of all elements in this order
4989 whose occurrencce count is at least that of element J.
4990 Record the SSA name representing the product of each element
4991 with all subsequent elements in the vector. */
4992 if (j == vec_len - 1)
4993 rf1->repr = rf1->factor;
4994 else
4996 for (ii = vec_len - 2; ii >= (int)j; ii--)
4998 tree op1, op2;
5000 rf1 = &repeat_factor_vec[ii];
5001 rf2 = &repeat_factor_vec[ii + 1];
5003 /* Init the last factor's representative to be itself. */
5004 if (!rf2->repr)
5005 rf2->repr = rf2->factor;
5007 op1 = rf1->factor;
5008 op2 = rf2->repr;
5010 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5011 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5012 op1, op2);
5013 gimple_set_location (mul_stmt, gimple_location (stmt));
5014 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5015 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5016 rf1->repr = target_ssa;
5018 /* Don't reprocess the multiply we just introduced. */
5019 gimple_set_visited (mul_stmt, true);
5023 /* Form a call to __builtin_powi for the maximum product
5024 just formed, raised to the power obtained earlier. */
5025 rf1 = &repeat_factor_vec[j];
5026 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5027 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5028 build_int_cst (integer_type_node,
5029 power));
5030 gimple_call_set_lhs (pow_stmt, iter_result);
5031 gimple_set_location (pow_stmt, gimple_location (stmt));
5032 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5033 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5036 /* If we previously formed at least one other builtin_powi call,
5037 form the product of this one and those others. */
5038 if (result)
5040 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5041 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5042 result, iter_result);
5043 gimple_set_location (mul_stmt, gimple_location (stmt));
5044 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5045 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5046 gimple_set_visited (mul_stmt, true);
5047 result = new_result;
5049 else
5050 result = iter_result;
5052 /* Decrement the occurrence count of each element in the product
5053 by the count found above, and remove this many copies of each
5054 factor from OPS. */
5055 for (i = j; i < vec_len; i++)
5057 unsigned k = power;
5058 unsigned n;
5060 rf1 = &repeat_factor_vec[i];
5061 rf1->count -= power;
5063 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5065 if (oe->op == rf1->factor)
5067 if (oe->count <= k)
5069 ops->ordered_remove (n);
5070 k -= oe->count;
5072 if (k == 0)
5073 break;
5075 else
5077 oe->count -= k;
5078 break;
5085 /* At this point all elements in the repeated factor vector have a
5086 remaining occurrence count of 0 or 1, and those with a count of 1
5087 don't have cached representatives. Re-sort the ops vector and
5088 clean up. */
5089 ops->qsort (sort_by_operand_rank);
5090 repeat_factor_vec.release ();
5092 /* Return the final product computed herein. Note that there may
5093 still be some elements with single occurrence count left in OPS;
5094 those will be handled by the normal reassociation logic. */
5095 return result;
5098 /* Attempt to optimize
5099 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5100 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5102 static void
5103 attempt_builtin_copysign (vec<operand_entry *> *ops)
5105 operand_entry *oe;
5106 unsigned int i;
5107 unsigned int length = ops->length ();
5108 tree cst = ops->last ()->op;
5110 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5111 return;
5113 FOR_EACH_VEC_ELT (*ops, i, oe)
5115 if (TREE_CODE (oe->op) == SSA_NAME
5116 && has_single_use (oe->op))
5118 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5119 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5121 tree arg0, arg1;
5122 switch (gimple_call_combined_fn (old_call))
5124 CASE_CFN_COPYSIGN:
5125 arg0 = gimple_call_arg (old_call, 0);
5126 arg1 = gimple_call_arg (old_call, 1);
5127 /* The first argument of copysign must be a constant,
5128 otherwise there's nothing to do. */
5129 if (TREE_CODE (arg0) == REAL_CST)
5131 tree type = TREE_TYPE (arg0);
5132 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5133 /* If we couldn't fold to a single constant, skip it.
5134 That happens e.g. for inexact multiplication when
5135 -frounding-math. */
5136 if (mul == NULL_TREE)
5137 break;
5138 /* Instead of adjusting OLD_CALL, let's build a new
5139 call to not leak the LHS and prevent keeping bogus
5140 debug statements. DCE will clean up the old call. */
5141 gcall *new_call;
5142 if (gimple_call_internal_p (old_call))
5143 new_call = gimple_build_call_internal
5144 (IFN_COPYSIGN, 2, mul, arg1);
5145 else
5146 new_call = gimple_build_call
5147 (gimple_call_fndecl (old_call), 2, mul, arg1);
5148 tree lhs = make_ssa_name (type);
5149 gimple_call_set_lhs (new_call, lhs);
5150 gimple_set_location (new_call,
5151 gimple_location (old_call));
5152 insert_stmt_after (new_call, old_call);
5153 /* We've used the constant, get rid of it. */
5154 ops->pop ();
5155 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5156 /* Handle the CST1 < 0 case by negating the result. */
5157 if (cst1_neg)
5159 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5160 gimple *negate_stmt
5161 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5162 insert_stmt_after (negate_stmt, new_call);
5163 oe->op = negrhs;
5165 else
5166 oe->op = lhs;
5167 if (dump_file && (dump_flags & TDF_DETAILS))
5169 fprintf (dump_file, "Optimizing copysign: ");
5170 print_generic_expr (dump_file, cst, 0);
5171 fprintf (dump_file, " * COPYSIGN (");
5172 print_generic_expr (dump_file, arg0, 0);
5173 fprintf (dump_file, ", ");
5174 print_generic_expr (dump_file, arg1, 0);
5175 fprintf (dump_file, ") into %sCOPYSIGN (",
5176 cst1_neg ? "-" : "");
5177 print_generic_expr (dump_file, mul, 0);
5178 fprintf (dump_file, ", ");
5179 print_generic_expr (dump_file, arg1, 0);
5180 fprintf (dump_file, "\n");
5182 return;
5184 break;
5185 default:
5186 break;
5193 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5195 static void
5196 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5198 tree rhs1;
5200 if (dump_file && (dump_flags & TDF_DETAILS))
5202 fprintf (dump_file, "Transforming ");
5203 print_gimple_stmt (dump_file, stmt, 0, 0);
5206 rhs1 = gimple_assign_rhs1 (stmt);
5207 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5208 update_stmt (stmt);
5209 remove_visited_stmt_chain (rhs1);
5211 if (dump_file && (dump_flags & TDF_DETAILS))
5213 fprintf (dump_file, " into ");
5214 print_gimple_stmt (dump_file, stmt, 0, 0);
5218 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5220 static void
5221 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5222 tree rhs1, tree rhs2)
5224 if (dump_file && (dump_flags & TDF_DETAILS))
5226 fprintf (dump_file, "Transforming ");
5227 print_gimple_stmt (dump_file, stmt, 0, 0);
5230 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5231 update_stmt (gsi_stmt (*gsi));
5232 remove_visited_stmt_chain (rhs1);
5234 if (dump_file && (dump_flags & TDF_DETAILS))
5236 fprintf (dump_file, " into ");
5237 print_gimple_stmt (dump_file, stmt, 0, 0);
5241 /* Reassociate expressions in basic block BB and its post-dominator as
5242 children.
5244 Bubble up return status from maybe_optimize_range_tests. */
5246 static bool
5247 reassociate_bb (basic_block bb)
5249 gimple_stmt_iterator gsi;
5250 basic_block son;
5251 gimple *stmt = last_stmt (bb);
5252 bool cfg_cleanup_needed = false;
5254 if (stmt && !gimple_visited_p (stmt))
5255 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5257 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5259 stmt = gsi_stmt (gsi);
5261 if (is_gimple_assign (stmt)
5262 && !stmt_could_throw_p (stmt))
5264 tree lhs, rhs1, rhs2;
5265 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5267 /* If this is not a gimple binary expression, there is
5268 nothing for us to do with it. */
5269 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5270 continue;
5272 /* If this was part of an already processed statement,
5273 we don't need to touch it again. */
5274 if (gimple_visited_p (stmt))
5276 /* This statement might have become dead because of previous
5277 reassociations. */
5278 if (has_zero_uses (gimple_get_lhs (stmt)))
5280 reassoc_remove_stmt (&gsi);
5281 release_defs (stmt);
5282 /* We might end up removing the last stmt above which
5283 places the iterator to the end of the sequence.
5284 Reset it to the last stmt in this case which might
5285 be the end of the sequence as well if we removed
5286 the last statement of the sequence. In which case
5287 we need to bail out. */
5288 if (gsi_end_p (gsi))
5290 gsi = gsi_last_bb (bb);
5291 if (gsi_end_p (gsi))
5292 break;
5295 continue;
5298 lhs = gimple_assign_lhs (stmt);
5299 rhs1 = gimple_assign_rhs1 (stmt);
5300 rhs2 = gimple_assign_rhs2 (stmt);
5302 /* For non-bit or min/max operations we can't associate
5303 all types. Verify that here. */
5304 if (rhs_code != BIT_IOR_EXPR
5305 && rhs_code != BIT_AND_EXPR
5306 && rhs_code != BIT_XOR_EXPR
5307 && rhs_code != MIN_EXPR
5308 && rhs_code != MAX_EXPR
5309 && (!can_reassociate_p (lhs)
5310 || !can_reassociate_p (rhs1)
5311 || !can_reassociate_p (rhs2)))
5312 continue;
5314 if (associative_tree_code (rhs_code))
5316 auto_vec<operand_entry *> ops;
5317 tree powi_result = NULL_TREE;
5318 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5320 /* There may be no immediate uses left by the time we
5321 get here because we may have eliminated them all. */
5322 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5323 continue;
5325 gimple_set_visited (stmt, true);
5326 linearize_expr_tree (&ops, stmt, true, true);
5327 ops.qsort (sort_by_operand_rank);
5328 optimize_ops_list (rhs_code, &ops);
5329 if (undistribute_ops_list (rhs_code, &ops,
5330 loop_containing_stmt (stmt)))
5332 ops.qsort (sort_by_operand_rank);
5333 optimize_ops_list (rhs_code, &ops);
5336 if (rhs_code == PLUS_EXPR
5337 && transform_add_to_multiply (&ops))
5338 ops.qsort (sort_by_operand_rank);
5340 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5342 if (is_vector)
5343 optimize_vec_cond_expr (rhs_code, &ops);
5344 else
5345 optimize_range_tests (rhs_code, &ops);
5348 if (rhs_code == MULT_EXPR && !is_vector)
5350 attempt_builtin_copysign (&ops);
5352 if (reassoc_insert_powi_p
5353 && flag_unsafe_math_optimizations)
5354 powi_result = attempt_builtin_powi (stmt, &ops);
5357 operand_entry *last;
5358 bool negate_result = false;
5359 if (ops.length () > 1
5360 && rhs_code == MULT_EXPR)
5362 last = ops.last ();
5363 if ((integer_minus_onep (last->op)
5364 || real_minus_onep (last->op))
5365 && !HONOR_SNANS (TREE_TYPE (lhs))
5366 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5367 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5369 ops.pop ();
5370 negate_result = true;
5374 tree new_lhs = lhs;
5375 /* If the operand vector is now empty, all operands were
5376 consumed by the __builtin_powi optimization. */
5377 if (ops.length () == 0)
5378 transform_stmt_to_copy (&gsi, stmt, powi_result);
5379 else if (ops.length () == 1)
5381 tree last_op = ops.last ()->op;
5383 /* If the stmt that defines operand has to be inserted, insert it
5384 before the use. */
5385 if (ops.last ()->stmt_to_insert)
5386 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5387 if (powi_result)
5388 transform_stmt_to_multiply (&gsi, stmt, last_op,
5389 powi_result);
5390 else
5391 transform_stmt_to_copy (&gsi, stmt, last_op);
5393 else
5395 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5396 int ops_num = ops.length ();
5397 int width = get_reassociation_width (ops_num, rhs_code, mode);
5399 if (dump_file && (dump_flags & TDF_DETAILS))
5400 fprintf (dump_file,
5401 "Width = %d was chosen for reassociation\n", width);
5403 if (width > 1
5404 && ops.length () > 3)
5405 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5406 width, ops);
5407 else
5409 /* When there are three operands left, we want
5410 to make sure the ones that get the double
5411 binary op are chosen wisely. */
5412 int len = ops.length ();
5413 if (len >= 3)
5414 swap_ops_for_binary_stmt (ops, len - 3, stmt);
5416 new_lhs = rewrite_expr_tree (stmt, 0, ops,
5417 powi_result != NULL
5418 || negate_result);
5421 /* If we combined some repeated factors into a
5422 __builtin_powi call, multiply that result by the
5423 reassociated operands. */
5424 if (powi_result)
5426 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5427 tree type = TREE_TYPE (lhs);
5428 tree target_ssa = make_temp_ssa_name (type, NULL,
5429 "reassocpow");
5430 gimple_set_lhs (lhs_stmt, target_ssa);
5431 update_stmt (lhs_stmt);
5432 if (lhs != new_lhs)
5434 target_ssa = new_lhs;
5435 new_lhs = lhs;
5437 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5438 powi_result, target_ssa);
5439 gimple_set_location (mul_stmt, gimple_location (stmt));
5440 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5441 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5445 if (negate_result)
5447 stmt = SSA_NAME_DEF_STMT (lhs);
5448 tree tmp = make_ssa_name (TREE_TYPE (lhs));
5449 gimple_set_lhs (stmt, tmp);
5450 if (lhs != new_lhs)
5451 tmp = new_lhs;
5452 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5453 tmp);
5454 gimple_set_uid (neg_stmt, gimple_uid (stmt));
5455 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5456 update_stmt (stmt);
5461 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5462 son;
5463 son = next_dom_son (CDI_POST_DOMINATORS, son))
5464 cfg_cleanup_needed |= reassociate_bb (son);
5466 return cfg_cleanup_needed;
5469 /* Add jumps around shifts for range tests turned into bit tests.
5470 For each SSA_NAME VAR we have code like:
5471 VAR = ...; // final stmt of range comparison
5472 // bit test here...;
5473 OTHERVAR = ...; // final stmt of the bit test sequence
5474 RES = VAR | OTHERVAR;
5475 Turn the above into:
5476 VAR = ...;
5477 if (VAR != 0)
5478 goto <l3>;
5479 else
5480 goto <l2>;
5481 <l2>:
5482 // bit test here...;
5483 OTHERVAR = ...;
5484 <l3>:
5485 # RES = PHI<1(l1), OTHERVAR(l2)>; */
5487 static void
5488 branch_fixup (void)
5490 tree var;
5491 unsigned int i;
5493 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5495 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5496 gimple *use_stmt;
5497 use_operand_p use;
5498 bool ok = single_imm_use (var, &use, &use_stmt);
5499 gcc_assert (ok
5500 && is_gimple_assign (use_stmt)
5501 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5502 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5504 basic_block cond_bb = gimple_bb (def_stmt);
5505 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5506 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5508 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5509 gimple *g = gimple_build_cond (NE_EXPR, var,
5510 build_zero_cst (TREE_TYPE (var)),
5511 NULL_TREE, NULL_TREE);
5512 location_t loc = gimple_location (use_stmt);
5513 gimple_set_location (g, loc);
5514 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5516 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5517 etrue->probability = REG_BR_PROB_BASE / 2;
5518 etrue->count = cond_bb->count / 2;
5519 edge efalse = find_edge (cond_bb, then_bb);
5520 efalse->flags = EDGE_FALSE_VALUE;
5521 efalse->probability -= etrue->probability;
5522 efalse->count -= etrue->count;
5523 then_bb->count -= etrue->count;
5525 tree othervar = NULL_TREE;
5526 if (gimple_assign_rhs1 (use_stmt) == var)
5527 othervar = gimple_assign_rhs2 (use_stmt);
5528 else if (gimple_assign_rhs2 (use_stmt) == var)
5529 othervar = gimple_assign_rhs1 (use_stmt);
5530 else
5531 gcc_unreachable ();
5532 tree lhs = gimple_assign_lhs (use_stmt);
5533 gphi *phi = create_phi_node (lhs, merge_bb);
5534 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5535 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5536 gsi = gsi_for_stmt (use_stmt);
5537 gsi_remove (&gsi, true);
5539 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5540 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5542 reassoc_branch_fixups.release ();
5545 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5546 void debug_ops_vector (vec<operand_entry *> ops);
5548 /* Dump the operand entry vector OPS to FILE. */
5550 void
5551 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5553 operand_entry *oe;
5554 unsigned int i;
5556 FOR_EACH_VEC_ELT (ops, i, oe)
5558 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5559 print_generic_expr (file, oe->op, 0);
5560 fprintf (file, "\n");
5564 /* Dump the operand entry vector OPS to STDERR. */
5566 DEBUG_FUNCTION void
5567 debug_ops_vector (vec<operand_entry *> ops)
5569 dump_ops_vector (stderr, ops);
5572 /* Bubble up return status from reassociate_bb. */
5574 static bool
5575 do_reassoc (void)
5577 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5578 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5581 /* Initialize the reassociation pass. */
5583 static void
5584 init_reassoc (void)
5586 int i;
5587 long rank = 2;
5588 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5590 /* Find the loops, so that we can prevent moving calculations in
5591 them. */
5592 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5594 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5596 next_operand_entry_id = 0;
5598 /* Reverse RPO (Reverse Post Order) will give us something where
5599 deeper loops come later. */
5600 pre_and_rev_post_order_compute (NULL, bbs, false);
5601 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5602 operand_rank = new hash_map<tree, long>;
5604 /* Give each default definition a distinct rank. This includes
5605 parameters and the static chain. Walk backwards over all
5606 SSA names so that we get proper rank ordering according
5607 to tree_swap_operands_p. */
5608 for (i = num_ssa_names - 1; i > 0; --i)
5610 tree name = ssa_name (i);
5611 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5612 insert_operand_rank (name, ++rank);
5615 /* Set up rank for each BB */
5616 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5617 bb_rank[bbs[i]] = ++rank << 16;
5619 free (bbs);
5620 calculate_dominance_info (CDI_POST_DOMINATORS);
5621 plus_negates = vNULL;
5624 /* Cleanup after the reassociation pass, and print stats if
5625 requested. */
5627 static void
5628 fini_reassoc (void)
5630 statistics_counter_event (cfun, "Linearized",
5631 reassociate_stats.linearized);
5632 statistics_counter_event (cfun, "Constants eliminated",
5633 reassociate_stats.constants_eliminated);
5634 statistics_counter_event (cfun, "Ops eliminated",
5635 reassociate_stats.ops_eliminated);
5636 statistics_counter_event (cfun, "Statements rewritten",
5637 reassociate_stats.rewritten);
5638 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5639 reassociate_stats.pows_encountered);
5640 statistics_counter_event (cfun, "Built-in powi calls created",
5641 reassociate_stats.pows_created);
5643 delete operand_rank;
5644 operand_entry_pool.release ();
5645 free (bb_rank);
5646 plus_negates.release ();
5647 free_dominance_info (CDI_POST_DOMINATORS);
5648 loop_optimizer_finalize ();
5651 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
5652 insertion of __builtin_powi calls.
5654 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5655 optimization of a gimple conditional. Otherwise returns zero. */
5657 static unsigned int
5658 execute_reassoc (bool insert_powi_p)
5660 reassoc_insert_powi_p = insert_powi_p;
5662 init_reassoc ();
5664 bool cfg_cleanup_needed;
5665 cfg_cleanup_needed = do_reassoc ();
5666 repropagate_negates ();
5667 branch_fixup ();
5669 fini_reassoc ();
5670 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5673 namespace {
5675 const pass_data pass_data_reassoc =
5677 GIMPLE_PASS, /* type */
5678 "reassoc", /* name */
5679 OPTGROUP_NONE, /* optinfo_flags */
5680 TV_TREE_REASSOC, /* tv_id */
5681 ( PROP_cfg | PROP_ssa ), /* properties_required */
5682 0, /* properties_provided */
5683 0, /* properties_destroyed */
5684 0, /* todo_flags_start */
5685 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5688 class pass_reassoc : public gimple_opt_pass
5690 public:
5691 pass_reassoc (gcc::context *ctxt)
5692 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5695 /* opt_pass methods: */
5696 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5697 void set_pass_param (unsigned int n, bool param)
5699 gcc_assert (n == 0);
5700 insert_powi_p = param;
5702 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5703 virtual unsigned int execute (function *)
5704 { return execute_reassoc (insert_powi_p); }
5706 private:
5707 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
5708 point 3a in the pass header comment. */
5709 bool insert_powi_p;
5710 }; // class pass_reassoc
5712 } // anon namespace
5714 gimple_opt_pass *
5715 make_pass_reassoc (gcc::context *ctxt)
5717 return new pass_reassoc (ctxt);