* lib/gcc-dg.exp (process-message): Support relative line number
[official-gcc.git] / gcc / tree-ssa-reassoc.c
blob8fc76e4fb6fe5af19e667931645c7e7db9569f0d
1 /* Reassociation for trees.
2 Copyright (C) 2005-2016 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "tm_p.h"
33 #include "ssa.h"
34 #include "optabs-tree.h"
35 #include "gimple-pretty-print.h"
36 #include "diagnostic-core.h"
37 #include "fold-const.h"
38 #include "stor-layout.h"
39 #include "cfganal.h"
40 #include "gimple-fold.h"
41 #include "tree-eh.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
44 #include "tree-cfg.h"
45 #include "tree-ssa-loop.h"
46 #include "flags.h"
47 #include "tree-ssa.h"
48 #include "langhooks.h"
49 #include "cfgloop.h"
50 #include "params.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
55 /* This is a simple global reassociation pass. It is, in part, based
56 on the LLVM pass of the same name (They do some things more/less
57 than we do, in different orders, etc).
59 It consists of five steps:
61 1. Breaking up subtract operations into addition + negate, where
62 it would promote the reassociation of adds.
64 2. Left linearization of the expression trees, so that (A+B)+(C+D)
65 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
66 During linearization, we place the operands of the binary
67 expressions into a vector of operand_entry_*
69 3. Optimization of the operand lists, eliminating things like a +
70 -a, a & a, etc.
72 3a. Combine repeated factors with the same occurrence counts
73 into a __builtin_powi call that will later be optimized into
74 an optimal number of multiplies.
76 4. Rewrite the expression trees we linearized and optimized so
77 they are in proper rank order.
79 5. Repropagate negates, as nothing else will clean it up ATM.
81 A bit of theory on #4, since nobody seems to write anything down
82 about why it makes sense to do it the way they do it:
84 We could do this much nicer theoretically, but don't (for reasons
85 explained after how to do it theoretically nice :P).
87 In order to promote the most redundancy elimination, you want
88 binary expressions whose operands are the same rank (or
89 preferably, the same value) exposed to the redundancy eliminator,
90 for possible elimination.
92 So the way to do this if we really cared, is to build the new op
93 tree from the leaves to the roots, merging as you go, and putting the
94 new op on the end of the worklist, until you are left with one
95 thing on the worklist.
97 IE if you have to rewrite the following set of operands (listed with
98 rank in parentheses), with opcode PLUS_EXPR:
100 a (1), b (1), c (1), d (2), e (2)
103 We start with our merge worklist empty, and the ops list with all of
104 those on it.
106 You want to first merge all leaves of the same rank, as much as
107 possible.
109 So first build a binary op of
111 mergetmp = a + b, and put "mergetmp" on the merge worklist.
113 Because there is no three operand form of PLUS_EXPR, c is not going to
114 be exposed to redundancy elimination as a rank 1 operand.
116 So you might as well throw it on the merge worklist (you could also
117 consider it to now be a rank two operand, and merge it with d and e,
118 but in this case, you then have evicted e from a binary op. So at
119 least in this situation, you can't win.)
121 Then build a binary op of d + e
122 mergetmp2 = d + e
124 and put mergetmp2 on the merge worklist.
126 so merge worklist = {mergetmp, c, mergetmp2}
128 Continue building binary ops of these operations until you have only
129 one operation left on the worklist.
131 So we have
133 build binary op
134 mergetmp3 = mergetmp + c
136 worklist = {mergetmp2, mergetmp3}
138 mergetmp4 = mergetmp2 + mergetmp3
140 worklist = {mergetmp4}
142 because we have one operation left, we can now just set the original
143 statement equal to the result of that operation.
145 This will at least expose a + b and d + e to redundancy elimination
146 as binary operations.
148 For extra points, you can reuse the old statements to build the
149 mergetmps, since you shouldn't run out.
151 So why don't we do this?
153 Because it's expensive, and rarely will help. Most trees we are
154 reassociating have 3 or less ops. If they have 2 ops, they already
155 will be written into a nice single binary op. If you have 3 ops, a
156 single simple check suffices to tell you whether the first two are of the
157 same rank. If so, you know to order it
159 mergetmp = op1 + op2
160 newstmt = mergetmp + op3
162 instead of
163 mergetmp = op2 + op3
164 newstmt = mergetmp + op1
166 If all three are of the same rank, you can't expose them all in a
167 single binary operator anyway, so the above is *still* the best you
168 can do.
170 Thus, this is what we do. When we have three ops left, we check to see
171 what order to put them in, and call it a day. As a nod to vector sum
172 reduction, we check if any of the ops are really a phi node that is a
173 destructive update for the associating op, and keep the destructive
174 update together for vector sum reduction recognition. */
176 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
177 point 3a in the pass header comment. */
178 static bool reassoc_insert_powi_p;
180 /* Statistics */
181 static struct
183 int linearized;
184 int constants_eliminated;
185 int ops_eliminated;
186 int rewritten;
187 int pows_encountered;
188 int pows_created;
189 } reassociate_stats;
191 /* Operator, rank pair. */
192 struct operand_entry
194 unsigned int rank;
195 int id;
196 tree op;
197 unsigned int count;
198 gimple *stmt_to_insert;
201 static object_allocator<operand_entry> operand_entry_pool
202 ("operand entry pool");
204 /* This is used to assign a unique ID to each struct operand_entry
205 so that qsort results are identical on different hosts. */
206 static int next_operand_entry_id;
208 /* Starting rank number for a given basic block, so that we can rank
209 operations using unmovable instructions in that BB based on the bb
210 depth. */
211 static long *bb_rank;
213 /* Operand->rank hashtable. */
214 static hash_map<tree, long> *operand_rank;
216 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
217 all basic blocks the CFG should be adjusted - basic blocks
218 split right after that SSA_NAME's definition statement and before
219 the only use, which must be a bit ior. */
220 static vec<tree> reassoc_branch_fixups;
222 /* Forward decls. */
223 static long get_rank (tree);
224 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
226 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
227 possibly added by gsi_remove. */
229 bool
230 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
232 gimple *stmt = gsi_stmt (*gsi);
234 if (!MAY_HAVE_DEBUG_STMTS || gimple_code (stmt) == GIMPLE_PHI)
235 return gsi_remove (gsi, true);
237 gimple_stmt_iterator prev = *gsi;
238 gsi_prev (&prev);
239 unsigned uid = gimple_uid (stmt);
240 basic_block bb = gimple_bb (stmt);
241 bool ret = gsi_remove (gsi, true);
242 if (!gsi_end_p (prev))
243 gsi_next (&prev);
244 else
245 prev = gsi_start_bb (bb);
246 gimple *end_stmt = gsi_stmt (*gsi);
247 while ((stmt = gsi_stmt (prev)) != end_stmt)
249 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
250 gimple_set_uid (stmt, uid);
251 gsi_next (&prev);
253 return ret;
256 /* Bias amount for loop-carried phis. We want this to be larger than
257 the depth of any reassociation tree we can see, but not larger than
258 the rank difference between two blocks. */
259 #define PHI_LOOP_BIAS (1 << 15)
261 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
262 an innermost loop, and the phi has only a single use which is inside
263 the loop, then the rank is the block rank of the loop latch plus an
264 extra bias for the loop-carried dependence. This causes expressions
265 calculated into an accumulator variable to be independent for each
266 iteration of the loop. If STMT is some other phi, the rank is the
267 block rank of its containing block. */
268 static long
269 phi_rank (gimple *stmt)
271 basic_block bb = gimple_bb (stmt);
272 struct loop *father = bb->loop_father;
273 tree res;
274 unsigned i;
275 use_operand_p use;
276 gimple *use_stmt;
278 /* We only care about real loops (those with a latch). */
279 if (!father->latch)
280 return bb_rank[bb->index];
282 /* Interesting phis must be in headers of innermost loops. */
283 if (bb != father->header
284 || father->inner)
285 return bb_rank[bb->index];
287 /* Ignore virtual SSA_NAMEs. */
288 res = gimple_phi_result (stmt);
289 if (virtual_operand_p (res))
290 return bb_rank[bb->index];
292 /* The phi definition must have a single use, and that use must be
293 within the loop. Otherwise this isn't an accumulator pattern. */
294 if (!single_imm_use (res, &use, &use_stmt)
295 || gimple_bb (use_stmt)->loop_father != father)
296 return bb_rank[bb->index];
298 /* Look for phi arguments from within the loop. If found, bias this phi. */
299 for (i = 0; i < gimple_phi_num_args (stmt); i++)
301 tree arg = gimple_phi_arg_def (stmt, i);
302 if (TREE_CODE (arg) == SSA_NAME
303 && !SSA_NAME_IS_DEFAULT_DEF (arg))
305 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
306 if (gimple_bb (def_stmt)->loop_father == father)
307 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
311 /* Must be an uninteresting phi. */
312 return bb_rank[bb->index];
315 /* If EXP is an SSA_NAME defined by a PHI statement that represents a
316 loop-carried dependence of an innermost loop, return TRUE; else
317 return FALSE. */
318 static bool
319 loop_carried_phi (tree exp)
321 gimple *phi_stmt;
322 long block_rank;
324 if (TREE_CODE (exp) != SSA_NAME
325 || SSA_NAME_IS_DEFAULT_DEF (exp))
326 return false;
328 phi_stmt = SSA_NAME_DEF_STMT (exp);
330 if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI)
331 return false;
333 /* Non-loop-carried phis have block rank. Loop-carried phis have
334 an additional bias added in. If this phi doesn't have block rank,
335 it's biased and should not be propagated. */
336 block_rank = bb_rank[gimple_bb (phi_stmt)->index];
338 if (phi_rank (phi_stmt) != block_rank)
339 return true;
341 return false;
344 /* Return the maximum of RANK and the rank that should be propagated
345 from expression OP. For most operands, this is just the rank of OP.
346 For loop-carried phis, the value is zero to avoid undoing the bias
347 in favor of the phi. */
348 static long
349 propagate_rank (long rank, tree op)
351 long op_rank;
353 if (loop_carried_phi (op))
354 return rank;
356 op_rank = get_rank (op);
358 return MAX (rank, op_rank);
361 /* Look up the operand rank structure for expression E. */
363 static inline long
364 find_operand_rank (tree e)
366 long *slot = operand_rank->get (e);
367 return slot ? *slot : -1;
370 /* Insert {E,RANK} into the operand rank hashtable. */
372 static inline void
373 insert_operand_rank (tree e, long rank)
375 gcc_assert (rank > 0);
376 gcc_assert (!operand_rank->put (e, rank));
379 /* Given an expression E, return the rank of the expression. */
381 static long
382 get_rank (tree e)
384 /* SSA_NAME's have the rank of the expression they are the result
386 For globals and uninitialized values, the rank is 0.
387 For function arguments, use the pre-setup rank.
388 For PHI nodes, stores, asm statements, etc, we use the rank of
389 the BB.
390 For simple operations, the rank is the maximum rank of any of
391 its operands, or the bb_rank, whichever is less.
392 I make no claims that this is optimal, however, it gives good
393 results. */
395 /* We make an exception to the normal ranking system to break
396 dependences of accumulator variables in loops. Suppose we
397 have a simple one-block loop containing:
399 x_1 = phi(x_0, x_2)
400 b = a + x_1
401 c = b + d
402 x_2 = c + e
404 As shown, each iteration of the calculation into x is fully
405 dependent upon the iteration before it. We would prefer to
406 see this in the form:
408 x_1 = phi(x_0, x_2)
409 b = a + d
410 c = b + e
411 x_2 = c + x_1
413 If the loop is unrolled, the calculations of b and c from
414 different iterations can be interleaved.
416 To obtain this result during reassociation, we bias the rank
417 of the phi definition x_1 upward, when it is recognized as an
418 accumulator pattern. The artificial rank causes it to be
419 added last, providing the desired independence. */
421 if (TREE_CODE (e) == SSA_NAME)
423 ssa_op_iter iter;
424 gimple *stmt;
425 long rank;
426 tree op;
428 if (SSA_NAME_IS_DEFAULT_DEF (e))
429 return find_operand_rank (e);
431 stmt = SSA_NAME_DEF_STMT (e);
432 if (gimple_code (stmt) == GIMPLE_PHI)
433 return phi_rank (stmt);
435 if (!is_gimple_assign (stmt))
436 return bb_rank[gimple_bb (stmt)->index];
438 /* If we already have a rank for this expression, use that. */
439 rank = find_operand_rank (e);
440 if (rank != -1)
441 return rank;
443 /* Otherwise, find the maximum rank for the operands. As an
444 exception, remove the bias from loop-carried phis when propagating
445 the rank so that dependent operations are not also biased. */
446 /* Simply walk over all SSA uses - this takes advatage of the
447 fact that non-SSA operands are is_gimple_min_invariant and
448 thus have rank 0. */
449 rank = 0;
450 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
451 rank = propagate_rank (rank, op);
453 if (dump_file && (dump_flags & TDF_DETAILS))
455 fprintf (dump_file, "Rank for ");
456 print_generic_expr (dump_file, e, 0);
457 fprintf (dump_file, " is %ld\n", (rank + 1));
460 /* Note the rank in the hashtable so we don't recompute it. */
461 insert_operand_rank (e, (rank + 1));
462 return (rank + 1);
465 /* Constants, globals, etc., are rank 0 */
466 return 0;
470 /* We want integer ones to end up last no matter what, since they are
471 the ones we can do the most with. */
472 #define INTEGER_CONST_TYPE 1 << 3
473 #define FLOAT_CONST_TYPE 1 << 2
474 #define OTHER_CONST_TYPE 1 << 1
476 /* Classify an invariant tree into integer, float, or other, so that
477 we can sort them to be near other constants of the same type. */
478 static inline int
479 constant_type (tree t)
481 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
482 return INTEGER_CONST_TYPE;
483 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
484 return FLOAT_CONST_TYPE;
485 else
486 return OTHER_CONST_TYPE;
489 /* qsort comparison function to sort operand entries PA and PB by rank
490 so that the sorted array is ordered by rank in decreasing order. */
491 static int
492 sort_by_operand_rank (const void *pa, const void *pb)
494 const operand_entry *oea = *(const operand_entry *const *)pa;
495 const operand_entry *oeb = *(const operand_entry *const *)pb;
497 /* It's nicer for optimize_expression if constants that are likely
498 to fold when added/multiplied//whatever are put next to each
499 other. Since all constants have rank 0, order them by type. */
500 if (oeb->rank == 0 && oea->rank == 0)
502 if (constant_type (oeb->op) != constant_type (oea->op))
503 return constant_type (oeb->op) - constant_type (oea->op);
504 else
505 /* To make sorting result stable, we use unique IDs to determine
506 order. */
507 return oeb->id - oea->id;
510 /* Lastly, make sure the versions that are the same go next to each
511 other. */
512 if ((oeb->rank - oea->rank == 0)
513 && TREE_CODE (oea->op) == SSA_NAME
514 && TREE_CODE (oeb->op) == SSA_NAME)
516 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
517 versions of removed SSA_NAMEs, so if possible, prefer to sort
518 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
519 See PR60418. */
520 if (!SSA_NAME_IS_DEFAULT_DEF (oea->op)
521 && !SSA_NAME_IS_DEFAULT_DEF (oeb->op)
522 && !oea->stmt_to_insert
523 && !oeb->stmt_to_insert
524 && SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
526 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
527 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
528 basic_block bba = gimple_bb (stmta);
529 basic_block bbb = gimple_bb (stmtb);
530 if (bbb != bba)
532 if (bb_rank[bbb->index] != bb_rank[bba->index])
533 return bb_rank[bbb->index] - bb_rank[bba->index];
535 else
537 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
538 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
539 if (da != db)
540 return da ? 1 : -1;
544 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
545 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
546 else
547 return oeb->id - oea->id;
550 if (oeb->rank != oea->rank)
551 return oeb->rank - oea->rank;
552 else
553 return oeb->id - oea->id;
556 /* Add an operand entry to *OPS for the tree operand OP. */
558 static void
559 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
561 operand_entry *oe = operand_entry_pool.allocate ();
563 oe->op = op;
564 oe->rank = get_rank (op);
565 oe->id = next_operand_entry_id++;
566 oe->count = 1;
567 oe->stmt_to_insert = stmt_to_insert;
568 ops->safe_push (oe);
571 /* Add an operand entry to *OPS for the tree operand OP with repeat
572 count REPEAT. */
574 static void
575 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
576 HOST_WIDE_INT repeat)
578 operand_entry *oe = operand_entry_pool.allocate ();
580 oe->op = op;
581 oe->rank = get_rank (op);
582 oe->id = next_operand_entry_id++;
583 oe->count = repeat;
584 oe->stmt_to_insert = NULL;
585 ops->safe_push (oe);
587 reassociate_stats.pows_encountered++;
590 /* Return true if STMT is reassociable operation containing a binary
591 operation with tree code CODE, and is inside LOOP. */
593 static bool
594 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop)
596 basic_block bb = gimple_bb (stmt);
598 if (gimple_bb (stmt) == NULL)
599 return false;
601 if (!flow_bb_inside_loop_p (loop, bb))
602 return false;
604 if (is_gimple_assign (stmt)
605 && gimple_assign_rhs_code (stmt) == code
606 && has_single_use (gimple_assign_lhs (stmt)))
607 return true;
609 return false;
613 /* Return true if STMT is a nop-conversion. */
615 static bool
616 gimple_nop_conversion_p (gimple *stmt)
618 if (gassign *ass = dyn_cast <gassign *> (stmt))
620 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
621 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
622 TREE_TYPE (gimple_assign_rhs1 (ass))))
623 return true;
625 return false;
628 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
629 operand of the negate operation. Otherwise, return NULL. */
631 static tree
632 get_unary_op (tree name, enum tree_code opcode)
634 gimple *stmt = SSA_NAME_DEF_STMT (name);
636 /* Look through nop conversions (sign changes). */
637 if (gimple_nop_conversion_p (stmt)
638 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
639 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
641 if (!is_gimple_assign (stmt))
642 return NULL_TREE;
644 if (gimple_assign_rhs_code (stmt) == opcode)
645 return gimple_assign_rhs1 (stmt);
646 return NULL_TREE;
649 /* Return true if OP1 and OP2 have the same value if casted to either type. */
651 static bool
652 ops_equal_values_p (tree op1, tree op2)
654 if (op1 == op2)
655 return true;
657 tree orig_op1 = op1;
658 if (TREE_CODE (op1) == SSA_NAME)
660 gimple *stmt = SSA_NAME_DEF_STMT (op1);
661 if (gimple_nop_conversion_p (stmt))
663 op1 = gimple_assign_rhs1 (stmt);
664 if (op1 == op2)
665 return true;
669 if (TREE_CODE (op2) == SSA_NAME)
671 gimple *stmt = SSA_NAME_DEF_STMT (op2);
672 if (gimple_nop_conversion_p (stmt))
674 op2 = gimple_assign_rhs1 (stmt);
675 if (op1 == op2
676 || orig_op1 == op2)
677 return true;
681 return false;
685 /* If CURR and LAST are a pair of ops that OPCODE allows us to
686 eliminate through equivalences, do so, remove them from OPS, and
687 return true. Otherwise, return false. */
689 static bool
690 eliminate_duplicate_pair (enum tree_code opcode,
691 vec<operand_entry *> *ops,
692 bool *all_done,
693 unsigned int i,
694 operand_entry *curr,
695 operand_entry *last)
698 /* If we have two of the same op, and the opcode is & |, min, or max,
699 we can eliminate one of them.
700 If we have two of the same op, and the opcode is ^, we can
701 eliminate both of them. */
703 if (last && last->op == curr->op)
705 switch (opcode)
707 case MAX_EXPR:
708 case MIN_EXPR:
709 case BIT_IOR_EXPR:
710 case BIT_AND_EXPR:
711 if (dump_file && (dump_flags & TDF_DETAILS))
713 fprintf (dump_file, "Equivalence: ");
714 print_generic_expr (dump_file, curr->op, 0);
715 fprintf (dump_file, " [&|minmax] ");
716 print_generic_expr (dump_file, last->op, 0);
717 fprintf (dump_file, " -> ");
718 print_generic_stmt (dump_file, last->op, 0);
721 ops->ordered_remove (i);
722 reassociate_stats.ops_eliminated ++;
724 return true;
726 case BIT_XOR_EXPR:
727 if (dump_file && (dump_flags & TDF_DETAILS))
729 fprintf (dump_file, "Equivalence: ");
730 print_generic_expr (dump_file, curr->op, 0);
731 fprintf (dump_file, " ^ ");
732 print_generic_expr (dump_file, last->op, 0);
733 fprintf (dump_file, " -> nothing\n");
736 reassociate_stats.ops_eliminated += 2;
738 if (ops->length () == 2)
740 ops->truncate (0);
741 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
742 *all_done = true;
744 else
746 ops->ordered_remove (i-1);
747 ops->ordered_remove (i-1);
750 return true;
752 default:
753 break;
756 return false;
759 static vec<tree> plus_negates;
761 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
762 expression, look in OPS for a corresponding positive operation to cancel
763 it out. If we find one, remove the other from OPS, replace
764 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
765 return false. */
767 static bool
768 eliminate_plus_minus_pair (enum tree_code opcode,
769 vec<operand_entry *> *ops,
770 unsigned int currindex,
771 operand_entry *curr)
773 tree negateop;
774 tree notop;
775 unsigned int i;
776 operand_entry *oe;
778 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
779 return false;
781 negateop = get_unary_op (curr->op, NEGATE_EXPR);
782 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
783 if (negateop == NULL_TREE && notop == NULL_TREE)
784 return false;
786 /* Any non-negated version will have a rank that is one less than
787 the current rank. So once we hit those ranks, if we don't find
788 one, we can stop. */
790 for (i = currindex + 1;
791 ops->iterate (i, &oe)
792 && oe->rank >= curr->rank - 1 ;
793 i++)
795 if (negateop
796 && ops_equal_values_p (oe->op, negateop))
798 if (dump_file && (dump_flags & TDF_DETAILS))
800 fprintf (dump_file, "Equivalence: ");
801 print_generic_expr (dump_file, negateop, 0);
802 fprintf (dump_file, " + -");
803 print_generic_expr (dump_file, oe->op, 0);
804 fprintf (dump_file, " -> 0\n");
807 ops->ordered_remove (i);
808 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
809 ops->ordered_remove (currindex);
810 reassociate_stats.ops_eliminated ++;
812 return true;
814 else if (notop
815 && ops_equal_values_p (oe->op, notop))
817 tree op_type = TREE_TYPE (oe->op);
819 if (dump_file && (dump_flags & TDF_DETAILS))
821 fprintf (dump_file, "Equivalence: ");
822 print_generic_expr (dump_file, notop, 0);
823 fprintf (dump_file, " + ~");
824 print_generic_expr (dump_file, oe->op, 0);
825 fprintf (dump_file, " -> -1\n");
828 ops->ordered_remove (i);
829 add_to_ops_vec (ops, build_all_ones_cst (op_type));
830 ops->ordered_remove (currindex);
831 reassociate_stats.ops_eliminated ++;
833 return true;
837 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
838 save it for later inspection in repropagate_negates(). */
839 if (negateop != NULL_TREE
840 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
841 plus_negates.safe_push (curr->op);
843 return false;
846 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
847 bitwise not expression, look in OPS for a corresponding operand to
848 cancel it out. If we find one, remove the other from OPS, replace
849 OPS[CURRINDEX] with 0, and return true. Otherwise, return
850 false. */
852 static bool
853 eliminate_not_pairs (enum tree_code opcode,
854 vec<operand_entry *> *ops,
855 unsigned int currindex,
856 operand_entry *curr)
858 tree notop;
859 unsigned int i;
860 operand_entry *oe;
862 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
863 || TREE_CODE (curr->op) != SSA_NAME)
864 return false;
866 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
867 if (notop == NULL_TREE)
868 return false;
870 /* Any non-not version will have a rank that is one less than
871 the current rank. So once we hit those ranks, if we don't find
872 one, we can stop. */
874 for (i = currindex + 1;
875 ops->iterate (i, &oe)
876 && oe->rank >= curr->rank - 1;
877 i++)
879 if (oe->op == notop)
881 if (dump_file && (dump_flags & TDF_DETAILS))
883 fprintf (dump_file, "Equivalence: ");
884 print_generic_expr (dump_file, notop, 0);
885 if (opcode == BIT_AND_EXPR)
886 fprintf (dump_file, " & ~");
887 else if (opcode == BIT_IOR_EXPR)
888 fprintf (dump_file, " | ~");
889 print_generic_expr (dump_file, oe->op, 0);
890 if (opcode == BIT_AND_EXPR)
891 fprintf (dump_file, " -> 0\n");
892 else if (opcode == BIT_IOR_EXPR)
893 fprintf (dump_file, " -> -1\n");
896 if (opcode == BIT_AND_EXPR)
897 oe->op = build_zero_cst (TREE_TYPE (oe->op));
898 else if (opcode == BIT_IOR_EXPR)
899 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
901 reassociate_stats.ops_eliminated += ops->length () - 1;
902 ops->truncate (0);
903 ops->quick_push (oe);
904 return true;
908 return false;
911 /* Use constant value that may be present in OPS to try to eliminate
912 operands. Note that this function is only really used when we've
913 eliminated ops for other reasons, or merged constants. Across
914 single statements, fold already does all of this, plus more. There
915 is little point in duplicating logic, so I've only included the
916 identities that I could ever construct testcases to trigger. */
918 static void
919 eliminate_using_constants (enum tree_code opcode,
920 vec<operand_entry *> *ops)
922 operand_entry *oelast = ops->last ();
923 tree type = TREE_TYPE (oelast->op);
925 if (oelast->rank == 0
926 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
928 switch (opcode)
930 case BIT_AND_EXPR:
931 if (integer_zerop (oelast->op))
933 if (ops->length () != 1)
935 if (dump_file && (dump_flags & TDF_DETAILS))
936 fprintf (dump_file, "Found & 0, removing all other ops\n");
938 reassociate_stats.ops_eliminated += ops->length () - 1;
940 ops->truncate (0);
941 ops->quick_push (oelast);
942 return;
945 else if (integer_all_onesp (oelast->op))
947 if (ops->length () != 1)
949 if (dump_file && (dump_flags & TDF_DETAILS))
950 fprintf (dump_file, "Found & -1, removing\n");
951 ops->pop ();
952 reassociate_stats.ops_eliminated++;
955 break;
956 case BIT_IOR_EXPR:
957 if (integer_all_onesp (oelast->op))
959 if (ops->length () != 1)
961 if (dump_file && (dump_flags & TDF_DETAILS))
962 fprintf (dump_file, "Found | -1, removing all other ops\n");
964 reassociate_stats.ops_eliminated += ops->length () - 1;
966 ops->truncate (0);
967 ops->quick_push (oelast);
968 return;
971 else if (integer_zerop (oelast->op))
973 if (ops->length () != 1)
975 if (dump_file && (dump_flags & TDF_DETAILS))
976 fprintf (dump_file, "Found | 0, removing\n");
977 ops->pop ();
978 reassociate_stats.ops_eliminated++;
981 break;
982 case MULT_EXPR:
983 if (integer_zerop (oelast->op)
984 || (FLOAT_TYPE_P (type)
985 && !HONOR_NANS (type)
986 && !HONOR_SIGNED_ZEROS (type)
987 && real_zerop (oelast->op)))
989 if (ops->length () != 1)
991 if (dump_file && (dump_flags & TDF_DETAILS))
992 fprintf (dump_file, "Found * 0, removing all other ops\n");
994 reassociate_stats.ops_eliminated += ops->length () - 1;
995 ops->truncate (1);
996 ops->quick_push (oelast);
997 return;
1000 else if (integer_onep (oelast->op)
1001 || (FLOAT_TYPE_P (type)
1002 && !HONOR_SNANS (type)
1003 && real_onep (oelast->op)))
1005 if (ops->length () != 1)
1007 if (dump_file && (dump_flags & TDF_DETAILS))
1008 fprintf (dump_file, "Found * 1, removing\n");
1009 ops->pop ();
1010 reassociate_stats.ops_eliminated++;
1011 return;
1014 break;
1015 case BIT_XOR_EXPR:
1016 case PLUS_EXPR:
1017 case MINUS_EXPR:
1018 if (integer_zerop (oelast->op)
1019 || (FLOAT_TYPE_P (type)
1020 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1021 && fold_real_zero_addition_p (type, oelast->op,
1022 opcode == MINUS_EXPR)))
1024 if (ops->length () != 1)
1026 if (dump_file && (dump_flags & TDF_DETAILS))
1027 fprintf (dump_file, "Found [|^+] 0, removing\n");
1028 ops->pop ();
1029 reassociate_stats.ops_eliminated++;
1030 return;
1033 break;
1034 default:
1035 break;
1041 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1042 bool, bool);
1044 /* Structure for tracking and counting operands. */
1045 struct oecount {
1046 int cnt;
1047 int id;
1048 enum tree_code oecode;
1049 tree op;
1053 /* The heap for the oecount hashtable and the sorted list of operands. */
1054 static vec<oecount> cvec;
1057 /* Oecount hashtable helpers. */
1059 struct oecount_hasher : int_hash <int, 0, 1>
1061 static inline hashval_t hash (int);
1062 static inline bool equal (int, int);
1065 /* Hash function for oecount. */
1067 inline hashval_t
1068 oecount_hasher::hash (int p)
1070 const oecount *c = &cvec[p - 42];
1071 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1074 /* Comparison function for oecount. */
1076 inline bool
1077 oecount_hasher::equal (int p1, int p2)
1079 const oecount *c1 = &cvec[p1 - 42];
1080 const oecount *c2 = &cvec[p2 - 42];
1081 return (c1->oecode == c2->oecode
1082 && c1->op == c2->op);
1085 /* Comparison function for qsort sorting oecount elements by count. */
1087 static int
1088 oecount_cmp (const void *p1, const void *p2)
1090 const oecount *c1 = (const oecount *)p1;
1091 const oecount *c2 = (const oecount *)p2;
1092 if (c1->cnt != c2->cnt)
1093 return c1->cnt - c2->cnt;
1094 else
1095 /* If counts are identical, use unique IDs to stabilize qsort. */
1096 return c1->id - c2->id;
1099 /* Return TRUE iff STMT represents a builtin call that raises OP
1100 to some exponent. */
1102 static bool
1103 stmt_is_power_of_op (gimple *stmt, tree op)
1105 if (!is_gimple_call (stmt))
1106 return false;
1108 switch (gimple_call_combined_fn (stmt))
1110 CASE_CFN_POW:
1111 CASE_CFN_POWI:
1112 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1114 default:
1115 return false;
1119 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1120 in place and return the result. Assumes that stmt_is_power_of_op
1121 was previously called for STMT and returned TRUE. */
1123 static HOST_WIDE_INT
1124 decrement_power (gimple *stmt)
1126 REAL_VALUE_TYPE c, cint;
1127 HOST_WIDE_INT power;
1128 tree arg1;
1130 switch (gimple_call_combined_fn (stmt))
1132 CASE_CFN_POW:
1133 arg1 = gimple_call_arg (stmt, 1);
1134 c = TREE_REAL_CST (arg1);
1135 power = real_to_integer (&c) - 1;
1136 real_from_integer (&cint, VOIDmode, power, SIGNED);
1137 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1138 return power;
1140 CASE_CFN_POWI:
1141 arg1 = gimple_call_arg (stmt, 1);
1142 power = TREE_INT_CST_LOW (arg1) - 1;
1143 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1144 return power;
1146 default:
1147 gcc_unreachable ();
1151 /* Replace SSA defined by STMT and replace all its uses with new
1152 SSA. Also return the new SSA. */
1154 static tree
1155 make_new_ssa_for_def (gimple *stmt)
1157 gimple *use_stmt;
1158 use_operand_p use;
1159 imm_use_iterator iter;
1160 tree new_lhs;
1161 tree lhs = gimple_assign_lhs (stmt);
1163 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1164 gimple_set_lhs (stmt, new_lhs);
1166 /* Also need to update GIMPLE_DEBUGs. */
1167 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1169 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1170 SET_USE (use, new_lhs);
1171 update_stmt (use_stmt);
1173 return new_lhs;
1176 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1177 uses with new SSAs. Also do this for the stmt that defines DEF
1178 if *DEF is not OP. */
1180 static void
1181 make_new_ssa_for_all_defs (tree *def, tree op,
1182 vec<gimple *> &stmts_to_fix)
1184 unsigned i;
1185 gimple *stmt;
1187 if (*def != op
1188 && TREE_CODE (*def) == SSA_NAME
1189 && (stmt = SSA_NAME_DEF_STMT (*def))
1190 && gimple_code (stmt) != GIMPLE_NOP)
1191 *def = make_new_ssa_for_def (stmt);
1193 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1194 make_new_ssa_for_def (stmt);
1197 /* Find the single immediate use of STMT's LHS, and replace it
1198 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1199 replace *DEF with OP as well. */
1201 static void
1202 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1204 tree lhs;
1205 gimple *use_stmt;
1206 use_operand_p use;
1207 gimple_stmt_iterator gsi;
1209 if (is_gimple_call (stmt))
1210 lhs = gimple_call_lhs (stmt);
1211 else
1212 lhs = gimple_assign_lhs (stmt);
1214 gcc_assert (has_single_use (lhs));
1215 single_imm_use (lhs, &use, &use_stmt);
1216 if (lhs == *def)
1217 *def = op;
1218 SET_USE (use, op);
1219 if (TREE_CODE (op) != SSA_NAME)
1220 update_stmt (use_stmt);
1221 gsi = gsi_for_stmt (stmt);
1222 unlink_stmt_vdef (stmt);
1223 reassoc_remove_stmt (&gsi);
1224 release_defs (stmt);
1227 /* Walks the linear chain with result *DEF searching for an operation
1228 with operand OP and code OPCODE removing that from the chain. *DEF
1229 is updated if there is only one operand but no operation left. */
1231 static void
1232 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1234 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1235 /* PR72835 - Record the stmt chain that has to be updated such that
1236 we dont use the same LHS when the values computed are different. */
1237 auto_vec<gimple *, 64> stmts_to_fix;
1241 tree name;
1243 if (opcode == MULT_EXPR)
1245 if (stmt_is_power_of_op (stmt, op))
1247 if (decrement_power (stmt) == 1)
1249 if (stmts_to_fix.length () > 0)
1250 stmts_to_fix.pop ();
1251 propagate_op_to_single_use (op, stmt, def);
1253 break;
1255 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1257 if (gimple_assign_rhs1 (stmt) == op)
1259 tree cst = build_minus_one_cst (TREE_TYPE (op));
1260 if (stmts_to_fix.length () > 0)
1261 stmts_to_fix.pop ();
1262 propagate_op_to_single_use (cst, stmt, def);
1263 break;
1265 else if (integer_minus_onep (op)
1266 || real_minus_onep (op))
1268 gimple_assign_set_rhs_code
1269 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1270 break;
1275 name = gimple_assign_rhs1 (stmt);
1277 /* If this is the operation we look for and one of the operands
1278 is ours simply propagate the other operand into the stmts
1279 single use. */
1280 if (gimple_assign_rhs_code (stmt) == opcode
1281 && (name == op
1282 || gimple_assign_rhs2 (stmt) == op))
1284 if (name == op)
1285 name = gimple_assign_rhs2 (stmt);
1286 if (stmts_to_fix.length () > 0)
1287 stmts_to_fix.pop ();
1288 propagate_op_to_single_use (name, stmt, def);
1289 break;
1292 /* We might have a multiply of two __builtin_pow* calls, and
1293 the operand might be hiding in the rightmost one. Likewise
1294 this can happen for a negate. */
1295 if (opcode == MULT_EXPR
1296 && gimple_assign_rhs_code (stmt) == opcode
1297 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1298 && has_single_use (gimple_assign_rhs2 (stmt)))
1300 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1301 if (stmt_is_power_of_op (stmt2, op))
1303 if (decrement_power (stmt2) == 1)
1304 propagate_op_to_single_use (op, stmt2, def);
1305 else
1306 stmts_to_fix.safe_push (stmt2);
1307 break;
1309 else if (is_gimple_assign (stmt2)
1310 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1312 if (gimple_assign_rhs1 (stmt2) == op)
1314 tree cst = build_minus_one_cst (TREE_TYPE (op));
1315 propagate_op_to_single_use (cst, stmt2, def);
1316 break;
1318 else if (integer_minus_onep (op)
1319 || real_minus_onep (op))
1321 stmts_to_fix.safe_push (stmt2);
1322 gimple_assign_set_rhs_code
1323 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1324 break;
1329 /* Continue walking the chain. */
1330 gcc_assert (name != op
1331 && TREE_CODE (name) == SSA_NAME);
1332 stmt = SSA_NAME_DEF_STMT (name);
1333 stmts_to_fix.safe_push (stmt);
1335 while (1);
1337 if (stmts_to_fix.length () > 0)
1338 make_new_ssa_for_all_defs (def, op, stmts_to_fix);
1341 /* Returns true if statement S1 dominates statement S2. Like
1342 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1344 static bool
1345 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1347 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1349 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1350 SSA_NAME. Assume it lives at the beginning of function and
1351 thus dominates everything. */
1352 if (!bb1 || s1 == s2)
1353 return true;
1355 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1356 if (!bb2)
1357 return false;
1359 if (bb1 == bb2)
1361 /* PHIs in the same basic block are assumed to be
1362 executed all in parallel, if only one stmt is a PHI,
1363 it dominates the other stmt in the same basic block. */
1364 if (gimple_code (s1) == GIMPLE_PHI)
1365 return true;
1367 if (gimple_code (s2) == GIMPLE_PHI)
1368 return false;
1370 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1372 if (gimple_uid (s1) < gimple_uid (s2))
1373 return true;
1375 if (gimple_uid (s1) > gimple_uid (s2))
1376 return false;
1378 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1379 unsigned int uid = gimple_uid (s1);
1380 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1382 gimple *s = gsi_stmt (gsi);
1383 if (gimple_uid (s) != uid)
1384 break;
1385 if (s == s2)
1386 return true;
1389 return false;
1392 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1395 /* Insert STMT after INSERT_POINT. */
1397 static void
1398 insert_stmt_after (gimple *stmt, gimple *insert_point)
1400 gimple_stmt_iterator gsi;
1401 basic_block bb;
1403 if (gimple_code (insert_point) == GIMPLE_PHI)
1404 bb = gimple_bb (insert_point);
1405 else if (!stmt_ends_bb_p (insert_point))
1407 gsi = gsi_for_stmt (insert_point);
1408 gimple_set_uid (stmt, gimple_uid (insert_point));
1409 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1410 return;
1412 else
1413 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1414 thus if it must end a basic block, it should be a call that can
1415 throw, or some assignment that can throw. If it throws, the LHS
1416 of it will not be initialized though, so only valid places using
1417 the SSA_NAME should be dominated by the fallthru edge. */
1418 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1419 gsi = gsi_after_labels (bb);
1420 if (gsi_end_p (gsi))
1422 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1423 gimple_set_uid (stmt,
1424 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1426 else
1427 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1428 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1431 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1432 the result. Places the statement after the definition of either
1433 OP1 or OP2. Returns the new statement. */
1435 static gimple *
1436 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1438 gimple *op1def = NULL, *op2def = NULL;
1439 gimple_stmt_iterator gsi;
1440 tree op;
1441 gassign *sum;
1443 /* Create the addition statement. */
1444 op = make_ssa_name (type);
1445 sum = gimple_build_assign (op, opcode, op1, op2);
1447 /* Find an insertion place and insert. */
1448 if (TREE_CODE (op1) == SSA_NAME)
1449 op1def = SSA_NAME_DEF_STMT (op1);
1450 if (TREE_CODE (op2) == SSA_NAME)
1451 op2def = SSA_NAME_DEF_STMT (op2);
1452 if ((!op1def || gimple_nop_p (op1def))
1453 && (!op2def || gimple_nop_p (op2def)))
1455 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1456 if (gsi_end_p (gsi))
1458 gimple_stmt_iterator gsi2
1459 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1460 gimple_set_uid (sum,
1461 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1463 else
1464 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1465 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1467 else
1469 gimple *insert_point;
1470 if ((!op1def || gimple_nop_p (op1def))
1471 || (op2def && !gimple_nop_p (op2def)
1472 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1473 insert_point = op2def;
1474 else
1475 insert_point = op1def;
1476 insert_stmt_after (sum, insert_point);
1478 update_stmt (sum);
1480 return sum;
1483 /* Perform un-distribution of divisions and multiplications.
1484 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1485 to (A + B) / X for real X.
1487 The algorithm is organized as follows.
1489 - First we walk the addition chain *OPS looking for summands that
1490 are defined by a multiplication or a real division. This results
1491 in the candidates bitmap with relevant indices into *OPS.
1493 - Second we build the chains of multiplications or divisions for
1494 these candidates, counting the number of occurrences of (operand, code)
1495 pairs in all of the candidates chains.
1497 - Third we sort the (operand, code) pairs by number of occurrence and
1498 process them starting with the pair with the most uses.
1500 * For each such pair we walk the candidates again to build a
1501 second candidate bitmap noting all multiplication/division chains
1502 that have at least one occurrence of (operand, code).
1504 * We build an alternate addition chain only covering these
1505 candidates with one (operand, code) operation removed from their
1506 multiplication/division chain.
1508 * The first candidate gets replaced by the alternate addition chain
1509 multiplied/divided by the operand.
1511 * All candidate chains get disabled for further processing and
1512 processing of (operand, code) pairs continues.
1514 The alternate addition chains built are re-processed by the main
1515 reassociation algorithm which allows optimizing a * x * y + b * y * x
1516 to (a + b ) * x * y in one invocation of the reassociation pass. */
1518 static bool
1519 undistribute_ops_list (enum tree_code opcode,
1520 vec<operand_entry *> *ops, struct loop *loop)
1522 unsigned int length = ops->length ();
1523 operand_entry *oe1;
1524 unsigned i, j;
1525 unsigned nr_candidates, nr_candidates2;
1526 sbitmap_iterator sbi0;
1527 vec<operand_entry *> *subops;
1528 bool changed = false;
1529 int next_oecount_id = 0;
1531 if (length <= 1
1532 || opcode != PLUS_EXPR)
1533 return false;
1535 /* Build a list of candidates to process. */
1536 auto_sbitmap candidates (length);
1537 bitmap_clear (candidates);
1538 nr_candidates = 0;
1539 FOR_EACH_VEC_ELT (*ops, i, oe1)
1541 enum tree_code dcode;
1542 gimple *oe1def;
1544 if (TREE_CODE (oe1->op) != SSA_NAME)
1545 continue;
1546 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1547 if (!is_gimple_assign (oe1def))
1548 continue;
1549 dcode = gimple_assign_rhs_code (oe1def);
1550 if ((dcode != MULT_EXPR
1551 && dcode != RDIV_EXPR)
1552 || !is_reassociable_op (oe1def, dcode, loop))
1553 continue;
1555 bitmap_set_bit (candidates, i);
1556 nr_candidates++;
1559 if (nr_candidates < 2)
1560 return false;
1562 if (dump_file && (dump_flags & TDF_DETAILS))
1564 fprintf (dump_file, "searching for un-distribute opportunities ");
1565 print_generic_expr (dump_file,
1566 (*ops)[bitmap_first_set_bit (candidates)]->op, 0);
1567 fprintf (dump_file, " %d\n", nr_candidates);
1570 /* Build linearized sub-operand lists and the counting table. */
1571 cvec.create (0);
1573 hash_table<oecount_hasher> ctable (15);
1575 /* ??? Macro arguments cannot have multi-argument template types in
1576 them. This typedef is needed to workaround that limitation. */
1577 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1578 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1579 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1581 gimple *oedef;
1582 enum tree_code oecode;
1583 unsigned j;
1585 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1586 oecode = gimple_assign_rhs_code (oedef);
1587 linearize_expr_tree (&subops[i], oedef,
1588 associative_tree_code (oecode), false);
1590 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1592 oecount c;
1593 int *slot;
1594 int idx;
1595 c.oecode = oecode;
1596 c.cnt = 1;
1597 c.id = next_oecount_id++;
1598 c.op = oe1->op;
1599 cvec.safe_push (c);
1600 idx = cvec.length () + 41;
1601 slot = ctable.find_slot (idx, INSERT);
1602 if (!*slot)
1604 *slot = idx;
1606 else
1608 cvec.pop ();
1609 cvec[*slot - 42].cnt++;
1614 /* Sort the counting table. */
1615 cvec.qsort (oecount_cmp);
1617 if (dump_file && (dump_flags & TDF_DETAILS))
1619 oecount *c;
1620 fprintf (dump_file, "Candidates:\n");
1621 FOR_EACH_VEC_ELT (cvec, j, c)
1623 fprintf (dump_file, " %u %s: ", c->cnt,
1624 c->oecode == MULT_EXPR
1625 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1626 print_generic_expr (dump_file, c->op, 0);
1627 fprintf (dump_file, "\n");
1631 /* Process the (operand, code) pairs in order of most occurrence. */
1632 auto_sbitmap candidates2 (length);
1633 while (!cvec.is_empty ())
1635 oecount *c = &cvec.last ();
1636 if (c->cnt < 2)
1637 break;
1639 /* Now collect the operands in the outer chain that contain
1640 the common operand in their inner chain. */
1641 bitmap_clear (candidates2);
1642 nr_candidates2 = 0;
1643 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1645 gimple *oedef;
1646 enum tree_code oecode;
1647 unsigned j;
1648 tree op = (*ops)[i]->op;
1650 /* If we undistributed in this chain already this may be
1651 a constant. */
1652 if (TREE_CODE (op) != SSA_NAME)
1653 continue;
1655 oedef = SSA_NAME_DEF_STMT (op);
1656 oecode = gimple_assign_rhs_code (oedef);
1657 if (oecode != c->oecode)
1658 continue;
1660 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1662 if (oe1->op == c->op)
1664 bitmap_set_bit (candidates2, i);
1665 ++nr_candidates2;
1666 break;
1671 if (nr_candidates2 >= 2)
1673 operand_entry *oe1, *oe2;
1674 gimple *prod;
1675 int first = bitmap_first_set_bit (candidates2);
1677 /* Build the new addition chain. */
1678 oe1 = (*ops)[first];
1679 if (dump_file && (dump_flags & TDF_DETAILS))
1681 fprintf (dump_file, "Building (");
1682 print_generic_expr (dump_file, oe1->op, 0);
1684 zero_one_operation (&oe1->op, c->oecode, c->op);
1685 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1687 gimple *sum;
1688 oe2 = (*ops)[i];
1689 if (dump_file && (dump_flags & TDF_DETAILS))
1691 fprintf (dump_file, " + ");
1692 print_generic_expr (dump_file, oe2->op, 0);
1694 zero_one_operation (&oe2->op, c->oecode, c->op);
1695 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1696 oe1->op, oe2->op, opcode);
1697 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1698 oe2->rank = 0;
1699 oe1->op = gimple_get_lhs (sum);
1702 /* Apply the multiplication/division. */
1703 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1704 oe1->op, c->op, c->oecode);
1705 if (dump_file && (dump_flags & TDF_DETAILS))
1707 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1708 print_generic_expr (dump_file, c->op, 0);
1709 fprintf (dump_file, "\n");
1712 /* Record it in the addition chain and disable further
1713 undistribution with this op. */
1714 oe1->op = gimple_assign_lhs (prod);
1715 oe1->rank = get_rank (oe1->op);
1716 subops[first].release ();
1718 changed = true;
1721 cvec.pop ();
1724 for (i = 0; i < ops->length (); ++i)
1725 subops[i].release ();
1726 free (subops);
1727 cvec.release ();
1729 return changed;
1732 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1733 expression, examine the other OPS to see if any of them are comparisons
1734 of the same values, which we may be able to combine or eliminate.
1735 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1737 static bool
1738 eliminate_redundant_comparison (enum tree_code opcode,
1739 vec<operand_entry *> *ops,
1740 unsigned int currindex,
1741 operand_entry *curr)
1743 tree op1, op2;
1744 enum tree_code lcode, rcode;
1745 gimple *def1, *def2;
1746 int i;
1747 operand_entry *oe;
1749 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
1750 return false;
1752 /* Check that CURR is a comparison. */
1753 if (TREE_CODE (curr->op) != SSA_NAME)
1754 return false;
1755 def1 = SSA_NAME_DEF_STMT (curr->op);
1756 if (!is_gimple_assign (def1))
1757 return false;
1758 lcode = gimple_assign_rhs_code (def1);
1759 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
1760 return false;
1761 op1 = gimple_assign_rhs1 (def1);
1762 op2 = gimple_assign_rhs2 (def1);
1764 /* Now look for a similar comparison in the remaining OPS. */
1765 for (i = currindex + 1; ops->iterate (i, &oe); i++)
1767 tree t;
1769 if (TREE_CODE (oe->op) != SSA_NAME)
1770 continue;
1771 def2 = SSA_NAME_DEF_STMT (oe->op);
1772 if (!is_gimple_assign (def2))
1773 continue;
1774 rcode = gimple_assign_rhs_code (def2);
1775 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1776 continue;
1778 /* If we got here, we have a match. See if we can combine the
1779 two comparisons. */
1780 if (opcode == BIT_IOR_EXPR)
1781 t = maybe_fold_or_comparisons (lcode, op1, op2,
1782 rcode, gimple_assign_rhs1 (def2),
1783 gimple_assign_rhs2 (def2));
1784 else
1785 t = maybe_fold_and_comparisons (lcode, op1, op2,
1786 rcode, gimple_assign_rhs1 (def2),
1787 gimple_assign_rhs2 (def2));
1788 if (!t)
1789 continue;
1791 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
1792 always give us a boolean_type_node value back. If the original
1793 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
1794 we need to convert. */
1795 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
1796 t = fold_convert (TREE_TYPE (curr->op), t);
1798 if (TREE_CODE (t) != INTEGER_CST
1799 && !operand_equal_p (t, curr->op, 0))
1801 enum tree_code subcode;
1802 tree newop1, newop2;
1803 if (!COMPARISON_CLASS_P (t))
1804 continue;
1805 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1806 STRIP_USELESS_TYPE_CONVERSION (newop1);
1807 STRIP_USELESS_TYPE_CONVERSION (newop2);
1808 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
1809 continue;
1812 if (dump_file && (dump_flags & TDF_DETAILS))
1814 fprintf (dump_file, "Equivalence: ");
1815 print_generic_expr (dump_file, curr->op, 0);
1816 fprintf (dump_file, " %s ", op_symbol_code (opcode));
1817 print_generic_expr (dump_file, oe->op, 0);
1818 fprintf (dump_file, " -> ");
1819 print_generic_expr (dump_file, t, 0);
1820 fprintf (dump_file, "\n");
1823 /* Now we can delete oe, as it has been subsumed by the new combined
1824 expression t. */
1825 ops->ordered_remove (i);
1826 reassociate_stats.ops_eliminated ++;
1828 /* If t is the same as curr->op, we're done. Otherwise we must
1829 replace curr->op with t. Special case is if we got a constant
1830 back, in which case we add it to the end instead of in place of
1831 the current entry. */
1832 if (TREE_CODE (t) == INTEGER_CST)
1834 ops->ordered_remove (currindex);
1835 add_to_ops_vec (ops, t);
1837 else if (!operand_equal_p (t, curr->op, 0))
1839 gimple *sum;
1840 enum tree_code subcode;
1841 tree newop1;
1842 tree newop2;
1843 gcc_assert (COMPARISON_CLASS_P (t));
1844 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
1845 STRIP_USELESS_TYPE_CONVERSION (newop1);
1846 STRIP_USELESS_TYPE_CONVERSION (newop2);
1847 gcc_checking_assert (is_gimple_val (newop1)
1848 && is_gimple_val (newop2));
1849 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
1850 curr->op = gimple_get_lhs (sum);
1852 return true;
1855 return false;
1859 /* Transform repeated addition of same values into multiply with
1860 constant. */
1861 static bool
1862 transform_add_to_multiply (vec<operand_entry *> *ops)
1864 operand_entry *oe;
1865 tree op = NULL_TREE;
1866 int j;
1867 int i, start = -1, end = 0, count = 0;
1868 auto_vec<std::pair <int, int> > indxs;
1869 bool changed = false;
1871 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1872 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
1873 || !flag_unsafe_math_optimizations))
1874 return false;
1876 /* Look for repeated operands. */
1877 FOR_EACH_VEC_ELT (*ops, i, oe)
1879 if (start == -1)
1881 count = 1;
1882 op = oe->op;
1883 start = i;
1885 else if (operand_equal_p (oe->op, op, 0))
1887 count++;
1888 end = i;
1890 else
1892 if (count > 1)
1893 indxs.safe_push (std::make_pair (start, end));
1894 count = 1;
1895 op = oe->op;
1896 start = i;
1900 if (count > 1)
1901 indxs.safe_push (std::make_pair (start, end));
1903 for (j = indxs.length () - 1; j >= 0; --j)
1905 /* Convert repeated operand addition to multiplication. */
1906 start = indxs[j].first;
1907 end = indxs[j].second;
1908 op = (*ops)[start]->op;
1909 count = end - start + 1;
1910 for (i = end; i >= start; --i)
1911 ops->unordered_remove (i);
1912 tree tmp = make_ssa_name (TREE_TYPE (op));
1913 tree cst = build_int_cst (integer_type_node, count);
1914 gassign *mul_stmt
1915 = gimple_build_assign (tmp, MULT_EXPR,
1916 op, fold_convert (TREE_TYPE (op), cst));
1917 gimple_set_visited (mul_stmt, true);
1918 add_to_ops_vec (ops, tmp, mul_stmt);
1919 changed = true;
1922 return changed;
1926 /* Perform various identities and other optimizations on the list of
1927 operand entries, stored in OPS. The tree code for the binary
1928 operation between all the operands is OPCODE. */
1930 static void
1931 optimize_ops_list (enum tree_code opcode,
1932 vec<operand_entry *> *ops)
1934 unsigned int length = ops->length ();
1935 unsigned int i;
1936 operand_entry *oe;
1937 operand_entry *oelast = NULL;
1938 bool iterate = false;
1940 if (length == 1)
1941 return;
1943 oelast = ops->last ();
1945 /* If the last two are constants, pop the constants off, merge them
1946 and try the next two. */
1947 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
1949 operand_entry *oelm1 = (*ops)[length - 2];
1951 if (oelm1->rank == 0
1952 && is_gimple_min_invariant (oelm1->op)
1953 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
1954 TREE_TYPE (oelast->op)))
1956 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
1957 oelm1->op, oelast->op);
1959 if (folded && is_gimple_min_invariant (folded))
1961 if (dump_file && (dump_flags & TDF_DETAILS))
1962 fprintf (dump_file, "Merging constants\n");
1964 ops->pop ();
1965 ops->pop ();
1967 add_to_ops_vec (ops, folded);
1968 reassociate_stats.constants_eliminated++;
1970 optimize_ops_list (opcode, ops);
1971 return;
1976 eliminate_using_constants (opcode, ops);
1977 oelast = NULL;
1979 for (i = 0; ops->iterate (i, &oe);)
1981 bool done = false;
1983 if (eliminate_not_pairs (opcode, ops, i, oe))
1984 return;
1985 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
1986 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
1987 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
1989 if (done)
1990 return;
1991 iterate = true;
1992 oelast = NULL;
1993 continue;
1995 oelast = oe;
1996 i++;
1999 length = ops->length ();
2000 oelast = ops->last ();
2002 if (iterate)
2003 optimize_ops_list (opcode, ops);
2006 /* The following functions are subroutines to optimize_range_tests and allow
2007 it to try to change a logical combination of comparisons into a range
2008 test.
2010 For example, both
2011 X == 2 || X == 5 || X == 3 || X == 4
2013 X >= 2 && X <= 5
2014 are converted to
2015 (unsigned) (X - 2) <= 3
2017 For more information see comments above fold_test_range in fold-const.c,
2018 this implementation is for GIMPLE. */
2020 struct range_entry
2022 tree exp;
2023 tree low;
2024 tree high;
2025 bool in_p;
2026 bool strict_overflow_p;
2027 unsigned int idx, next;
2030 /* This is similar to make_range in fold-const.c, but on top of
2031 GIMPLE instead of trees. If EXP is non-NULL, it should be
2032 an SSA_NAME and STMT argument is ignored, otherwise STMT
2033 argument should be a GIMPLE_COND. */
2035 static void
2036 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2038 int in_p;
2039 tree low, high;
2040 bool is_bool, strict_overflow_p;
2042 r->exp = NULL_TREE;
2043 r->in_p = false;
2044 r->strict_overflow_p = false;
2045 r->low = NULL_TREE;
2046 r->high = NULL_TREE;
2047 if (exp != NULL_TREE
2048 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2049 return;
2051 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2052 and see if we can refine the range. Some of the cases below may not
2053 happen, but it doesn't seem worth worrying about this. We "continue"
2054 the outer loop when we've changed something; otherwise we "break"
2055 the switch, which will "break" the while. */
2056 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2057 high = low;
2058 in_p = 0;
2059 strict_overflow_p = false;
2060 is_bool = false;
2061 if (exp == NULL_TREE)
2062 is_bool = true;
2063 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2065 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2066 is_bool = true;
2067 else
2068 return;
2070 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2071 is_bool = true;
2073 while (1)
2075 enum tree_code code;
2076 tree arg0, arg1, exp_type;
2077 tree nexp;
2078 location_t loc;
2080 if (exp != NULL_TREE)
2082 if (TREE_CODE (exp) != SSA_NAME
2083 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2084 break;
2086 stmt = SSA_NAME_DEF_STMT (exp);
2087 if (!is_gimple_assign (stmt))
2088 break;
2090 code = gimple_assign_rhs_code (stmt);
2091 arg0 = gimple_assign_rhs1 (stmt);
2092 arg1 = gimple_assign_rhs2 (stmt);
2093 exp_type = TREE_TYPE (exp);
2095 else
2097 code = gimple_cond_code (stmt);
2098 arg0 = gimple_cond_lhs (stmt);
2099 arg1 = gimple_cond_rhs (stmt);
2100 exp_type = boolean_type_node;
2103 if (TREE_CODE (arg0) != SSA_NAME)
2104 break;
2105 loc = gimple_location (stmt);
2106 switch (code)
2108 case BIT_NOT_EXPR:
2109 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2110 /* Ensure the range is either +[-,0], +[0,0],
2111 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2112 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2113 or similar expression of unconditional true or
2114 false, it should not be negated. */
2115 && ((high && integer_zerop (high))
2116 || (low && integer_onep (low))))
2118 in_p = !in_p;
2119 exp = arg0;
2120 continue;
2122 break;
2123 case SSA_NAME:
2124 exp = arg0;
2125 continue;
2126 CASE_CONVERT:
2127 if (is_bool)
2128 goto do_default;
2129 if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2131 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2132 is_bool = true;
2133 else
2134 return;
2136 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2137 is_bool = true;
2138 goto do_default;
2139 case EQ_EXPR:
2140 case NE_EXPR:
2141 case LT_EXPR:
2142 case LE_EXPR:
2143 case GE_EXPR:
2144 case GT_EXPR:
2145 is_bool = true;
2146 /* FALLTHRU */
2147 default:
2148 if (!is_bool)
2149 return;
2150 do_default:
2151 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2152 &low, &high, &in_p,
2153 &strict_overflow_p);
2154 if (nexp != NULL_TREE)
2156 exp = nexp;
2157 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2158 continue;
2160 break;
2162 break;
2164 if (is_bool)
2166 r->exp = exp;
2167 r->in_p = in_p;
2168 r->low = low;
2169 r->high = high;
2170 r->strict_overflow_p = strict_overflow_p;
2174 /* Comparison function for qsort. Sort entries
2175 without SSA_NAME exp first, then with SSA_NAMEs sorted
2176 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2177 by increasing ->low and if ->low is the same, by increasing
2178 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2179 maximum. */
2181 static int
2182 range_entry_cmp (const void *a, const void *b)
2184 const struct range_entry *p = (const struct range_entry *) a;
2185 const struct range_entry *q = (const struct range_entry *) b;
2187 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2189 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2191 /* Group range_entries for the same SSA_NAME together. */
2192 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2193 return -1;
2194 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2195 return 1;
2196 /* If ->low is different, NULL low goes first, then by
2197 ascending low. */
2198 if (p->low != NULL_TREE)
2200 if (q->low != NULL_TREE)
2202 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2203 p->low, q->low);
2204 if (tem && integer_onep (tem))
2205 return -1;
2206 tem = fold_binary (GT_EXPR, boolean_type_node,
2207 p->low, q->low);
2208 if (tem && integer_onep (tem))
2209 return 1;
2211 else
2212 return 1;
2214 else if (q->low != NULL_TREE)
2215 return -1;
2216 /* If ->high is different, NULL high goes last, before that by
2217 ascending high. */
2218 if (p->high != NULL_TREE)
2220 if (q->high != NULL_TREE)
2222 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2223 p->high, q->high);
2224 if (tem && integer_onep (tem))
2225 return -1;
2226 tem = fold_binary (GT_EXPR, boolean_type_node,
2227 p->high, q->high);
2228 if (tem && integer_onep (tem))
2229 return 1;
2231 else
2232 return -1;
2234 else if (q->high != NULL_TREE)
2235 return 1;
2236 /* If both ranges are the same, sort below by ascending idx. */
2238 else
2239 return 1;
2241 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2242 return -1;
2244 if (p->idx < q->idx)
2245 return -1;
2246 else
2248 gcc_checking_assert (p->idx > q->idx);
2249 return 1;
2253 /* Helper routine of optimize_range_test.
2254 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2255 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2256 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2257 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2258 an array of COUNT pointers to other ranges. Return
2259 true if the range merge has been successful.
2260 If OPCODE is ERROR_MARK, this is called from within
2261 maybe_optimize_range_tests and is performing inter-bb range optimization.
2262 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2263 oe->rank. */
2265 static bool
2266 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2267 struct range_entry **otherrangep,
2268 unsigned int count, enum tree_code opcode,
2269 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2270 bool in_p, tree low, tree high, bool strict_overflow_p)
2272 operand_entry *oe = (*ops)[range->idx];
2273 tree op = oe->op;
2274 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2275 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2276 location_t loc = gimple_location (stmt);
2277 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2278 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2279 in_p, low, high);
2280 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2281 gimple_stmt_iterator gsi;
2282 unsigned int i, uid;
2284 if (tem == NULL_TREE)
2285 return false;
2287 /* If op is default def SSA_NAME, there is no place to insert the
2288 new comparison. Give up, unless we can use OP itself as the
2289 range test. */
2290 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2292 if (op == range->exp
2293 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2294 || TREE_CODE (optype) == BOOLEAN_TYPE)
2295 && (op == tem
2296 || (TREE_CODE (tem) == EQ_EXPR
2297 && TREE_OPERAND (tem, 0) == op
2298 && integer_onep (TREE_OPERAND (tem, 1))))
2299 && opcode != BIT_IOR_EXPR
2300 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2302 stmt = NULL;
2303 tem = op;
2305 else
2306 return false;
2309 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2310 warning_at (loc, OPT_Wstrict_overflow,
2311 "assuming signed overflow does not occur "
2312 "when simplifying range test");
2314 if (dump_file && (dump_flags & TDF_DETAILS))
2316 struct range_entry *r;
2317 fprintf (dump_file, "Optimizing range tests ");
2318 print_generic_expr (dump_file, range->exp, 0);
2319 fprintf (dump_file, " %c[", range->in_p ? '+' : '-');
2320 print_generic_expr (dump_file, range->low, 0);
2321 fprintf (dump_file, ", ");
2322 print_generic_expr (dump_file, range->high, 0);
2323 fprintf (dump_file, "]");
2324 for (i = 0; i < count; i++)
2326 if (otherrange)
2327 r = otherrange + i;
2328 else
2329 r = otherrangep[i];
2330 fprintf (dump_file, " and %c[", r->in_p ? '+' : '-');
2331 print_generic_expr (dump_file, r->low, 0);
2332 fprintf (dump_file, ", ");
2333 print_generic_expr (dump_file, r->high, 0);
2334 fprintf (dump_file, "]");
2336 fprintf (dump_file, "\n into ");
2337 print_generic_expr (dump_file, tem, 0);
2338 fprintf (dump_file, "\n");
2341 if (opcode == BIT_IOR_EXPR
2342 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2343 tem = invert_truthvalue_loc (loc, tem);
2345 tem = fold_convert_loc (loc, optype, tem);
2346 if (stmt)
2348 gsi = gsi_for_stmt (stmt);
2349 uid = gimple_uid (stmt);
2351 else
2353 gsi = gsi_none ();
2354 uid = 0;
2356 if (stmt == NULL)
2357 gcc_checking_assert (tem == op);
2358 /* In rare cases range->exp can be equal to lhs of stmt.
2359 In that case we have to insert after the stmt rather then before
2360 it. If stmt is a PHI, insert it at the start of the basic block. */
2361 else if (op != range->exp)
2363 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2364 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2365 GSI_SAME_STMT);
2366 gsi_prev (&gsi);
2368 else if (gimple_code (stmt) != GIMPLE_PHI)
2370 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2371 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false,
2372 GSI_CONTINUE_LINKING);
2374 else
2376 gsi = gsi_after_labels (gimple_bb (stmt));
2377 if (!gsi_end_p (gsi))
2378 uid = gimple_uid (gsi_stmt (gsi));
2379 else
2381 gsi = gsi_start_bb (gimple_bb (stmt));
2382 uid = 1;
2383 while (!gsi_end_p (gsi))
2385 uid = gimple_uid (gsi_stmt (gsi));
2386 gsi_next (&gsi);
2389 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2390 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true,
2391 GSI_SAME_STMT);
2392 if (gsi_end_p (gsi))
2393 gsi = gsi_last_bb (gimple_bb (stmt));
2394 else
2395 gsi_prev (&gsi);
2397 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2398 if (gimple_uid (gsi_stmt (gsi)))
2399 break;
2400 else
2401 gimple_set_uid (gsi_stmt (gsi), uid);
2403 oe->op = tem;
2404 range->exp = exp;
2405 range->low = low;
2406 range->high = high;
2407 range->in_p = in_p;
2408 range->strict_overflow_p = false;
2410 for (i = 0; i < count; i++)
2412 if (otherrange)
2413 range = otherrange + i;
2414 else
2415 range = otherrangep[i];
2416 oe = (*ops)[range->idx];
2417 /* Now change all the other range test immediate uses, so that
2418 those tests will be optimized away. */
2419 if (opcode == ERROR_MARK)
2421 if (oe->op)
2422 oe->op = build_int_cst (TREE_TYPE (oe->op),
2423 oe->rank == BIT_IOR_EXPR ? 0 : 1);
2424 else
2425 oe->op = (oe->rank == BIT_IOR_EXPR
2426 ? boolean_false_node : boolean_true_node);
2428 else
2429 oe->op = error_mark_node;
2430 range->exp = NULL_TREE;
2432 return true;
2435 /* Optimize X == CST1 || X == CST2
2436 if popcount (CST1 ^ CST2) == 1 into
2437 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
2438 Similarly for ranges. E.g.
2439 X != 2 && X != 3 && X != 10 && X != 11
2440 will be transformed by the previous optimization into
2441 !((X - 2U) <= 1U || (X - 10U) <= 1U)
2442 and this loop can transform that into
2443 !(((X & ~8) - 2U) <= 1U). */
2445 static bool
2446 optimize_range_tests_xor (enum tree_code opcode, tree type,
2447 tree lowi, tree lowj, tree highi, tree highj,
2448 vec<operand_entry *> *ops,
2449 struct range_entry *rangei,
2450 struct range_entry *rangej)
2452 tree lowxor, highxor, tem, exp;
2453 /* Check lowi ^ lowj == highi ^ highj and
2454 popcount (lowi ^ lowj) == 1. */
2455 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
2456 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
2457 return false;
2458 if (!integer_pow2p (lowxor))
2459 return false;
2460 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2461 if (!tree_int_cst_equal (lowxor, highxor))
2462 return false;
2464 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2465 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem);
2466 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2467 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2468 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2469 NULL, rangei->in_p, lowj, highj,
2470 rangei->strict_overflow_p
2471 || rangej->strict_overflow_p))
2472 return true;
2473 return false;
2476 /* Optimize X == CST1 || X == CST2
2477 if popcount (CST2 - CST1) == 1 into
2478 ((X - CST1) & ~(CST2 - CST1)) == 0.
2479 Similarly for ranges. E.g.
2480 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
2481 || X == 75 || X == 45
2482 will be transformed by the previous optimization into
2483 (X - 43U) <= 3U || (X - 75U) <= 3U
2484 and this loop can transform that into
2485 ((X - 43U) & ~(75U - 43U)) <= 3U. */
2486 static bool
2487 optimize_range_tests_diff (enum tree_code opcode, tree type,
2488 tree lowi, tree lowj, tree highi, tree highj,
2489 vec<operand_entry *> *ops,
2490 struct range_entry *rangei,
2491 struct range_entry *rangej)
2493 tree tem1, tem2, mask;
2494 /* Check highi - lowi == highj - lowj. */
2495 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
2496 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2497 return false;
2498 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
2499 if (!tree_int_cst_equal (tem1, tem2))
2500 return false;
2501 /* Check popcount (lowj - lowi) == 1. */
2502 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
2503 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2504 return false;
2505 if (!integer_pow2p (tem1))
2506 return false;
2508 type = unsigned_type_for (type);
2509 tem1 = fold_convert (type, tem1);
2510 tem2 = fold_convert (type, tem2);
2511 lowi = fold_convert (type, lowi);
2512 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2513 tem1 = fold_binary (MINUS_EXPR, type,
2514 fold_convert (type, rangei->exp), lowi);
2515 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
2516 lowj = build_int_cst (type, 0);
2517 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
2518 NULL, rangei->in_p, lowj, tem2,
2519 rangei->strict_overflow_p
2520 || rangej->strict_overflow_p))
2521 return true;
2522 return false;
2525 /* It does some common checks for function optimize_range_tests_xor and
2526 optimize_range_tests_diff.
2527 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
2528 Else it calls optimize_range_tests_diff. */
2530 static bool
2531 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
2532 bool optimize_xor, vec<operand_entry *> *ops,
2533 struct range_entry *ranges)
2535 int i, j;
2536 bool any_changes = false;
2537 for (i = first; i < length; i++)
2539 tree lowi, highi, lowj, highj, type, tem;
2541 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2542 continue;
2543 type = TREE_TYPE (ranges[i].exp);
2544 if (!INTEGRAL_TYPE_P (type))
2545 continue;
2546 lowi = ranges[i].low;
2547 if (lowi == NULL_TREE)
2548 lowi = TYPE_MIN_VALUE (type);
2549 highi = ranges[i].high;
2550 if (highi == NULL_TREE)
2551 continue;
2552 for (j = i + 1; j < length && j < i + 64; j++)
2554 bool changes;
2555 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
2556 continue;
2557 lowj = ranges[j].low;
2558 if (lowj == NULL_TREE)
2559 continue;
2560 highj = ranges[j].high;
2561 if (highj == NULL_TREE)
2562 highj = TYPE_MAX_VALUE (type);
2563 /* Check lowj > highi. */
2564 tem = fold_binary (GT_EXPR, boolean_type_node,
2565 lowj, highi);
2566 if (tem == NULL_TREE || !integer_onep (tem))
2567 continue;
2568 if (optimize_xor)
2569 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
2570 highi, highj, ops,
2571 ranges + i, ranges + j);
2572 else
2573 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
2574 highi, highj, ops,
2575 ranges + i, ranges + j);
2576 if (changes)
2578 any_changes = true;
2579 break;
2583 return any_changes;
2586 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
2587 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
2588 EXP on success, NULL otherwise. */
2590 static tree
2591 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
2592 wide_int *mask, tree *totallowp)
2594 tree tem = int_const_binop (MINUS_EXPR, high, low);
2595 if (tem == NULL_TREE
2596 || TREE_CODE (tem) != INTEGER_CST
2597 || TREE_OVERFLOW (tem)
2598 || tree_int_cst_sgn (tem) == -1
2599 || compare_tree_int (tem, prec) != -1)
2600 return NULL_TREE;
2602 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
2603 *mask = wi::shifted_mask (0, max, false, prec);
2604 if (TREE_CODE (exp) == BIT_AND_EXPR
2605 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2607 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
2608 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
2609 if (wi::popcount (msk) == 1
2610 && wi::ltu_p (msk, prec - max))
2612 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
2613 max += msk.to_uhwi ();
2614 exp = TREE_OPERAND (exp, 0);
2615 if (integer_zerop (low)
2616 && TREE_CODE (exp) == PLUS_EXPR
2617 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
2619 tree ret = TREE_OPERAND (exp, 0);
2620 STRIP_NOPS (ret);
2621 widest_int bias
2622 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
2623 TYPE_PRECISION (TREE_TYPE (low))));
2624 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
2625 if (totallowp)
2627 *totallowp = tbias;
2628 return ret;
2630 else if (!tree_int_cst_lt (totallow, tbias))
2631 return NULL_TREE;
2632 bias = wi::to_widest (tbias);
2633 bias -= wi::to_widest (totallow);
2634 if (bias >= 0 && bias < prec - max)
2636 *mask = wi::lshift (*mask, bias);
2637 return ret;
2642 if (totallowp)
2643 return exp;
2644 if (!tree_int_cst_lt (totallow, low))
2645 return exp;
2646 tem = int_const_binop (MINUS_EXPR, low, totallow);
2647 if (tem == NULL_TREE
2648 || TREE_CODE (tem) != INTEGER_CST
2649 || TREE_OVERFLOW (tem)
2650 || compare_tree_int (tem, prec - max) == 1)
2651 return NULL_TREE;
2653 *mask = wi::lshift (*mask, wi::to_widest (tem));
2654 return exp;
2657 /* Attempt to optimize small range tests using bit test.
2658 E.g.
2659 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
2660 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
2661 has been by earlier optimizations optimized into:
2662 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
2663 As all the 43 through 82 range is less than 64 numbers,
2664 for 64-bit word targets optimize that into:
2665 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
2667 static bool
2668 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
2669 vec<operand_entry *> *ops,
2670 struct range_entry *ranges)
2672 int i, j;
2673 bool any_changes = false;
2674 int prec = GET_MODE_BITSIZE (word_mode);
2675 auto_vec<struct range_entry *, 64> candidates;
2677 for (i = first; i < length - 2; i++)
2679 tree lowi, highi, lowj, highj, type;
2681 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
2682 continue;
2683 type = TREE_TYPE (ranges[i].exp);
2684 if (!INTEGRAL_TYPE_P (type))
2685 continue;
2686 lowi = ranges[i].low;
2687 if (lowi == NULL_TREE)
2688 lowi = TYPE_MIN_VALUE (type);
2689 highi = ranges[i].high;
2690 if (highi == NULL_TREE)
2691 continue;
2692 wide_int mask;
2693 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
2694 highi, &mask, &lowi);
2695 if (exp == NULL_TREE)
2696 continue;
2697 bool strict_overflow_p = ranges[i].strict_overflow_p;
2698 candidates.truncate (0);
2699 int end = MIN (i + 64, length);
2700 for (j = i + 1; j < end; j++)
2702 tree exp2;
2703 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
2704 continue;
2705 if (ranges[j].exp == exp)
2707 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
2709 exp2 = TREE_OPERAND (ranges[j].exp, 0);
2710 if (exp2 == exp)
2712 else if (TREE_CODE (exp2) == PLUS_EXPR)
2714 exp2 = TREE_OPERAND (exp2, 0);
2715 STRIP_NOPS (exp2);
2716 if (exp2 != exp)
2717 continue;
2719 else
2720 continue;
2722 else
2723 continue;
2724 lowj = ranges[j].low;
2725 if (lowj == NULL_TREE)
2726 continue;
2727 highj = ranges[j].high;
2728 if (highj == NULL_TREE)
2729 highj = TYPE_MAX_VALUE (type);
2730 wide_int mask2;
2731 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
2732 highj, &mask2, NULL);
2733 if (exp2 != exp)
2734 continue;
2735 mask |= mask2;
2736 strict_overflow_p |= ranges[j].strict_overflow_p;
2737 candidates.safe_push (&ranges[j]);
2740 /* If we need otherwise 3 or more comparisons, use a bit test. */
2741 if (candidates.length () >= 2)
2743 tree high = wide_int_to_tree (TREE_TYPE (lowi),
2744 wi::to_widest (lowi)
2745 + prec - 1 - wi::clz (mask));
2746 operand_entry *oe = (*ops)[ranges[i].idx];
2747 tree op = oe->op;
2748 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2749 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2750 location_t loc = gimple_location (stmt);
2751 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2753 /* See if it isn't cheaper to pretend the minimum value of the
2754 range is 0, if maximum value is small enough.
2755 We can avoid then subtraction of the minimum value, but the
2756 mask constant could be perhaps more expensive. */
2757 if (compare_tree_int (lowi, 0) > 0
2758 && compare_tree_int (high, prec) < 0)
2760 int cost_diff;
2761 HOST_WIDE_INT m = tree_to_uhwi (lowi);
2762 rtx reg = gen_raw_REG (word_mode, 10000);
2763 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
2764 cost_diff = set_rtx_cost (gen_rtx_PLUS (word_mode, reg,
2765 GEN_INT (-m)), speed_p);
2766 rtx r = immed_wide_int_const (mask, word_mode);
2767 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
2768 word_mode, speed_p);
2769 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
2770 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
2771 word_mode, speed_p);
2772 if (cost_diff > 0)
2774 mask = wi::lshift (mask, m);
2775 lowi = build_zero_cst (TREE_TYPE (lowi));
2779 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2780 false, lowi, high);
2781 if (tem == NULL_TREE || is_gimple_val (tem))
2782 continue;
2783 tree etype = unsigned_type_for (TREE_TYPE (exp));
2784 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
2785 fold_convert_loc (loc, etype, exp),
2786 fold_convert_loc (loc, etype, lowi));
2787 exp = fold_convert_loc (loc, integer_type_node, exp);
2788 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
2789 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
2790 build_int_cst (word_type, 1), exp);
2791 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
2792 wide_int_to_tree (word_type, mask));
2793 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
2794 build_zero_cst (word_type));
2795 if (is_gimple_val (exp))
2796 continue;
2798 /* The shift might have undefined behavior if TEM is true,
2799 but reassociate_bb isn't prepared to have basic blocks
2800 split when it is running. So, temporarily emit a code
2801 with BIT_IOR_EXPR instead of &&, and fix it up in
2802 branch_fixup. */
2803 gimple_seq seq;
2804 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
2805 gcc_assert (TREE_CODE (tem) == SSA_NAME);
2806 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
2807 gimple_seq seq2;
2808 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
2809 gimple_seq_add_seq_without_update (&seq, seq2);
2810 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2811 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
2812 gimple *g = gimple_build_assign (make_ssa_name (optype),
2813 BIT_IOR_EXPR, tem, exp);
2814 gimple_set_location (g, loc);
2815 gimple_seq_add_stmt_without_update (&seq, g);
2816 exp = gimple_assign_lhs (g);
2817 tree val = build_zero_cst (optype);
2818 if (update_range_test (&ranges[i], NULL, candidates.address (),
2819 candidates.length (), opcode, ops, exp,
2820 seq, false, val, val, strict_overflow_p))
2822 any_changes = true;
2823 reassoc_branch_fixups.safe_push (tem);
2825 else
2826 gimple_seq_discard (seq);
2829 return any_changes;
2832 /* Optimize range tests, similarly how fold_range_test optimizes
2833 it on trees. The tree code for the binary
2834 operation between all the operands is OPCODE.
2835 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
2836 maybe_optimize_range_tests for inter-bb range optimization.
2837 In that case if oe->op is NULL, oe->id is bb->index whose
2838 GIMPLE_COND is && or ||ed into the test, and oe->rank says
2839 the actual opcode. */
2841 static bool
2842 optimize_range_tests (enum tree_code opcode,
2843 vec<operand_entry *> *ops)
2845 unsigned int length = ops->length (), i, j, first;
2846 operand_entry *oe;
2847 struct range_entry *ranges;
2848 bool any_changes = false;
2850 if (length == 1)
2851 return false;
2853 ranges = XNEWVEC (struct range_entry, length);
2854 for (i = 0; i < length; i++)
2856 oe = (*ops)[i];
2857 ranges[i].idx = i;
2858 init_range_entry (ranges + i, oe->op,
2859 oe->op
2860 ? NULL
2861 : last_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
2862 /* For | invert it now, we will invert it again before emitting
2863 the optimized expression. */
2864 if (opcode == BIT_IOR_EXPR
2865 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2866 ranges[i].in_p = !ranges[i].in_p;
2869 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
2870 for (i = 0; i < length; i++)
2871 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
2872 break;
2874 /* Try to merge ranges. */
2875 for (first = i; i < length; i++)
2877 tree low = ranges[i].low;
2878 tree high = ranges[i].high;
2879 int in_p = ranges[i].in_p;
2880 bool strict_overflow_p = ranges[i].strict_overflow_p;
2881 int update_fail_count = 0;
2883 for (j = i + 1; j < length; j++)
2885 if (ranges[i].exp != ranges[j].exp)
2886 break;
2887 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
2888 ranges[j].in_p, ranges[j].low, ranges[j].high))
2889 break;
2890 strict_overflow_p |= ranges[j].strict_overflow_p;
2893 if (j == i + 1)
2894 continue;
2896 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
2897 opcode, ops, ranges[i].exp, NULL, in_p,
2898 low, high, strict_overflow_p))
2900 i = j - 1;
2901 any_changes = true;
2903 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
2904 while update_range_test would fail. */
2905 else if (update_fail_count == 64)
2906 i = j - 1;
2907 else
2908 ++update_fail_count;
2911 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
2912 ops, ranges);
2914 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
2915 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
2916 ops, ranges);
2917 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
2918 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
2919 ops, ranges);
2921 if (any_changes && opcode != ERROR_MARK)
2923 j = 0;
2924 FOR_EACH_VEC_ELT (*ops, i, oe)
2926 if (oe->op == error_mark_node)
2927 continue;
2928 else if (i != j)
2929 (*ops)[j] = oe;
2930 j++;
2932 ops->truncate (j);
2935 XDELETEVEC (ranges);
2936 return any_changes;
2939 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
2940 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
2941 otherwise the comparison code. */
2943 static tree_code
2944 ovce_extract_ops (tree var, gassign **rets, bool *reti)
2946 if (TREE_CODE (var) != SSA_NAME)
2947 return ERROR_MARK;
2949 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
2950 if (stmt == NULL)
2951 return ERROR_MARK;
2953 /* ??? If we start creating more COND_EXPR, we could perform
2954 this same optimization with them. For now, simplify. */
2955 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
2956 return ERROR_MARK;
2958 tree cond = gimple_assign_rhs1 (stmt);
2959 tree_code cmp = TREE_CODE (cond);
2960 if (TREE_CODE_CLASS (cmp) != tcc_comparison)
2961 return ERROR_MARK;
2963 /* ??? For now, allow only canonical true and false result vectors.
2964 We could expand this to other constants should the need arise,
2965 but at the moment we don't create them. */
2966 tree t = gimple_assign_rhs2 (stmt);
2967 tree f = gimple_assign_rhs3 (stmt);
2968 bool inv;
2969 if (integer_all_onesp (t))
2970 inv = false;
2971 else if (integer_all_onesp (f))
2973 cmp = invert_tree_comparison (cmp, false);
2974 inv = true;
2976 else
2977 return ERROR_MARK;
2978 if (!integer_zerop (f))
2979 return ERROR_MARK;
2981 /* Success! */
2982 if (rets)
2983 *rets = stmt;
2984 if (reti)
2985 *reti = inv;
2986 return cmp;
2989 /* Optimize the condition of VEC_COND_EXPRs which have been combined
2990 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
2992 static bool
2993 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
2995 unsigned int length = ops->length (), i, j;
2996 bool any_changes = false;
2998 if (length == 1)
2999 return false;
3001 for (i = 0; i < length; ++i)
3003 tree elt0 = (*ops)[i]->op;
3005 gassign *stmt0;
3006 bool invert;
3007 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert);
3008 if (cmp0 == ERROR_MARK)
3009 continue;
3011 for (j = i + 1; j < length; ++j)
3013 tree &elt1 = (*ops)[j]->op;
3015 gassign *stmt1;
3016 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL);
3017 if (cmp1 == ERROR_MARK)
3018 continue;
3020 tree cond0 = gimple_assign_rhs1 (stmt0);
3021 tree x0 = TREE_OPERAND (cond0, 0);
3022 tree y0 = TREE_OPERAND (cond0, 1);
3024 tree cond1 = gimple_assign_rhs1 (stmt1);
3025 tree x1 = TREE_OPERAND (cond1, 0);
3026 tree y1 = TREE_OPERAND (cond1, 1);
3028 tree comb;
3029 if (opcode == BIT_AND_EXPR)
3030 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3031 else if (opcode == BIT_IOR_EXPR)
3032 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1);
3033 else
3034 gcc_unreachable ();
3035 if (comb == NULL)
3036 continue;
3038 /* Success! */
3039 if (dump_file && (dump_flags & TDF_DETAILS))
3041 fprintf (dump_file, "Transforming ");
3042 print_generic_expr (dump_file, cond0, 0);
3043 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
3044 print_generic_expr (dump_file, cond1, 0);
3045 fprintf (dump_file, " into ");
3046 print_generic_expr (dump_file, comb, 0);
3047 fputc ('\n', dump_file);
3050 gimple_assign_set_rhs1 (stmt0, comb);
3051 if (invert)
3052 std::swap (*gimple_assign_rhs2_ptr (stmt0),
3053 *gimple_assign_rhs3_ptr (stmt0));
3054 update_stmt (stmt0);
3056 elt1 = error_mark_node;
3057 any_changes = true;
3061 if (any_changes)
3063 operand_entry *oe;
3064 j = 0;
3065 FOR_EACH_VEC_ELT (*ops, i, oe)
3067 if (oe->op == error_mark_node)
3068 continue;
3069 else if (i != j)
3070 (*ops)[j] = oe;
3071 j++;
3073 ops->truncate (j);
3076 return any_changes;
3079 /* Return true if STMT is a cast like:
3080 <bb N>:
3082 _123 = (int) _234;
3084 <bb M>:
3085 # _345 = PHI <_123(N), 1(...), 1(...)>
3086 where _234 has bool type, _123 has single use and
3087 bb N has a single successor M. This is commonly used in
3088 the last block of a range test.
3090 Also Return true if STMT is tcc_compare like:
3091 <bb N>:
3093 _234 = a_2(D) == 2;
3095 <bb M>:
3096 # _345 = PHI <_234(N), 1(...), 1(...)>
3097 _346 = (int) _345;
3098 where _234 has booltype, single use and
3099 bb N has a single successor M. This is commonly used in
3100 the last block of a range test. */
3102 static bool
3103 final_range_test_p (gimple *stmt)
3105 basic_block bb, rhs_bb, lhs_bb;
3106 edge e;
3107 tree lhs, rhs;
3108 use_operand_p use_p;
3109 gimple *use_stmt;
3111 if (!gimple_assign_cast_p (stmt)
3112 && (!is_gimple_assign (stmt)
3113 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3114 != tcc_comparison)))
3115 return false;
3116 bb = gimple_bb (stmt);
3117 if (!single_succ_p (bb))
3118 return false;
3119 e = single_succ_edge (bb);
3120 if (e->flags & EDGE_COMPLEX)
3121 return false;
3123 lhs = gimple_assign_lhs (stmt);
3124 rhs = gimple_assign_rhs1 (stmt);
3125 if (gimple_assign_cast_p (stmt)
3126 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3127 || TREE_CODE (rhs) != SSA_NAME
3128 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
3129 return false;
3131 if (!gimple_assign_cast_p (stmt)
3132 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
3133 return false;
3135 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
3136 if (!single_imm_use (lhs, &use_p, &use_stmt))
3137 return false;
3139 if (gimple_code (use_stmt) != GIMPLE_PHI
3140 || gimple_bb (use_stmt) != e->dest)
3141 return false;
3143 /* And that the rhs is defined in the same loop. */
3144 if (gimple_assign_cast_p (stmt))
3146 if (TREE_CODE (rhs) != SSA_NAME
3147 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
3148 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
3149 return false;
3151 else
3153 if (TREE_CODE (lhs) != SSA_NAME
3154 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
3155 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
3156 return false;
3159 return true;
3162 /* Return true if BB is suitable basic block for inter-bb range test
3163 optimization. If BACKWARD is true, BB should be the only predecessor
3164 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
3165 or compared with to find a common basic block to which all conditions
3166 branch to if true resp. false. If BACKWARD is false, TEST_BB should
3167 be the only predecessor of BB. */
3169 static bool
3170 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
3171 bool backward)
3173 edge_iterator ei, ei2;
3174 edge e, e2;
3175 gimple *stmt;
3176 gphi_iterator gsi;
3177 bool other_edge_seen = false;
3178 bool is_cond;
3180 if (test_bb == bb)
3181 return false;
3182 /* Check last stmt first. */
3183 stmt = last_stmt (bb);
3184 if (stmt == NULL
3185 || (gimple_code (stmt) != GIMPLE_COND
3186 && (backward || !final_range_test_p (stmt)))
3187 || gimple_visited_p (stmt)
3188 || stmt_could_throw_p (stmt)
3189 || *other_bb == bb)
3190 return false;
3191 is_cond = gimple_code (stmt) == GIMPLE_COND;
3192 if (is_cond)
3194 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
3195 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
3196 to *OTHER_BB (if not set yet, try to find it out). */
3197 if (EDGE_COUNT (bb->succs) != 2)
3198 return false;
3199 FOR_EACH_EDGE (e, ei, bb->succs)
3201 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3202 return false;
3203 if (e->dest == test_bb)
3205 if (backward)
3206 continue;
3207 else
3208 return false;
3210 if (e->dest == bb)
3211 return false;
3212 if (*other_bb == NULL)
3214 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
3215 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
3216 return false;
3217 else if (e->dest == e2->dest)
3218 *other_bb = e->dest;
3219 if (*other_bb == NULL)
3220 return false;
3222 if (e->dest == *other_bb)
3223 other_edge_seen = true;
3224 else if (backward)
3225 return false;
3227 if (*other_bb == NULL || !other_edge_seen)
3228 return false;
3230 else if (single_succ (bb) != *other_bb)
3231 return false;
3233 /* Now check all PHIs of *OTHER_BB. */
3234 e = find_edge (bb, *other_bb);
3235 e2 = find_edge (test_bb, *other_bb);
3236 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3238 gphi *phi = gsi.phi ();
3239 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
3240 corresponding to BB and TEST_BB predecessor must be the same. */
3241 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
3242 gimple_phi_arg_def (phi, e2->dest_idx), 0))
3244 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
3245 one of the PHIs should have the lhs of the last stmt in
3246 that block as PHI arg and that PHI should have 0 or 1
3247 corresponding to it in all other range test basic blocks
3248 considered. */
3249 if (!is_cond)
3251 if (gimple_phi_arg_def (phi, e->dest_idx)
3252 == gimple_assign_lhs (stmt)
3253 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
3254 || integer_onep (gimple_phi_arg_def (phi,
3255 e2->dest_idx))))
3256 continue;
3258 else
3260 gimple *test_last = last_stmt (test_bb);
3261 if (gimple_code (test_last) != GIMPLE_COND
3262 && gimple_phi_arg_def (phi, e2->dest_idx)
3263 == gimple_assign_lhs (test_last)
3264 && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx))
3265 || integer_onep (gimple_phi_arg_def (phi, e->dest_idx))))
3266 continue;
3269 return false;
3272 return true;
3275 /* Return true if BB doesn't have side-effects that would disallow
3276 range test optimization, all SSA_NAMEs set in the bb are consumed
3277 in the bb and there are no PHIs. */
3279 static bool
3280 no_side_effect_bb (basic_block bb)
3282 gimple_stmt_iterator gsi;
3283 gimple *last;
3285 if (!gimple_seq_empty_p (phi_nodes (bb)))
3286 return false;
3287 last = last_stmt (bb);
3288 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3290 gimple *stmt = gsi_stmt (gsi);
3291 tree lhs;
3292 imm_use_iterator imm_iter;
3293 use_operand_p use_p;
3295 if (is_gimple_debug (stmt))
3296 continue;
3297 if (gimple_has_side_effects (stmt))
3298 return false;
3299 if (stmt == last)
3300 return true;
3301 if (!is_gimple_assign (stmt))
3302 return false;
3303 lhs = gimple_assign_lhs (stmt);
3304 if (TREE_CODE (lhs) != SSA_NAME)
3305 return false;
3306 if (gimple_assign_rhs_could_trap_p (stmt))
3307 return false;
3308 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
3310 gimple *use_stmt = USE_STMT (use_p);
3311 if (is_gimple_debug (use_stmt))
3312 continue;
3313 if (gimple_bb (use_stmt) != bb)
3314 return false;
3317 return false;
3320 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3321 return true and fill in *OPS recursively. */
3323 static bool
3324 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3325 struct loop *loop)
3327 gimple *stmt = SSA_NAME_DEF_STMT (var);
3328 tree rhs[2];
3329 int i;
3331 if (!is_reassociable_op (stmt, code, loop))
3332 return false;
3334 rhs[0] = gimple_assign_rhs1 (stmt);
3335 rhs[1] = gimple_assign_rhs2 (stmt);
3336 gimple_set_visited (stmt, true);
3337 for (i = 0; i < 2; i++)
3338 if (TREE_CODE (rhs[i]) == SSA_NAME
3339 && !get_ops (rhs[i], code, ops, loop)
3340 && has_single_use (rhs[i]))
3342 operand_entry *oe = operand_entry_pool.allocate ();
3344 oe->op = rhs[i];
3345 oe->rank = code;
3346 oe->id = 0;
3347 oe->count = 1;
3348 oe->stmt_to_insert = NULL;
3349 ops->safe_push (oe);
3351 return true;
3354 /* Find the ops that were added by get_ops starting from VAR, see if
3355 they were changed during update_range_test and if yes, create new
3356 stmts. */
3358 static tree
3359 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3360 unsigned int *pidx, struct loop *loop)
3362 gimple *stmt = SSA_NAME_DEF_STMT (var);
3363 tree rhs[4];
3364 int i;
3366 if (!is_reassociable_op (stmt, code, loop))
3367 return NULL;
3369 rhs[0] = gimple_assign_rhs1 (stmt);
3370 rhs[1] = gimple_assign_rhs2 (stmt);
3371 rhs[2] = rhs[0];
3372 rhs[3] = rhs[1];
3373 for (i = 0; i < 2; i++)
3374 if (TREE_CODE (rhs[i]) == SSA_NAME)
3376 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
3377 if (rhs[2 + i] == NULL_TREE)
3379 if (has_single_use (rhs[i]))
3380 rhs[2 + i] = ops[(*pidx)++]->op;
3381 else
3382 rhs[2 + i] = rhs[i];
3385 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
3386 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
3388 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3389 var = make_ssa_name (TREE_TYPE (var));
3390 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
3391 rhs[2], rhs[3]);
3392 gimple_set_uid (g, gimple_uid (stmt));
3393 gimple_set_visited (g, true);
3394 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3396 return var;
3399 /* Structure to track the initial value passed to get_ops and
3400 the range in the ops vector for each basic block. */
3402 struct inter_bb_range_test_entry
3404 tree op;
3405 unsigned int first_idx, last_idx;
3408 /* Inter-bb range test optimization.
3410 Returns TRUE if a gimple conditional is optimized to a true/false,
3411 otherwise return FALSE.
3413 This indicates to the caller that it should run a CFG cleanup pass
3414 once reassociation is completed. */
3416 static bool
3417 maybe_optimize_range_tests (gimple *stmt)
3419 basic_block first_bb = gimple_bb (stmt);
3420 basic_block last_bb = first_bb;
3421 basic_block other_bb = NULL;
3422 basic_block bb;
3423 edge_iterator ei;
3424 edge e;
3425 auto_vec<operand_entry *> ops;
3426 auto_vec<inter_bb_range_test_entry> bbinfo;
3427 bool any_changes = false;
3428 bool cfg_cleanup_needed = false;
3430 /* Consider only basic blocks that end with GIMPLE_COND or
3431 a cast statement satisfying final_range_test_p. All
3432 but the last bb in the first_bb .. last_bb range
3433 should end with GIMPLE_COND. */
3434 if (gimple_code (stmt) == GIMPLE_COND)
3436 if (EDGE_COUNT (first_bb->succs) != 2)
3437 return cfg_cleanup_needed;
3439 else if (final_range_test_p (stmt))
3440 other_bb = single_succ (first_bb);
3441 else
3442 return cfg_cleanup_needed;
3444 if (stmt_could_throw_p (stmt))
3445 return cfg_cleanup_needed;
3447 /* As relative ordering of post-dominator sons isn't fixed,
3448 maybe_optimize_range_tests can be called first on any
3449 bb in the range we want to optimize. So, start searching
3450 backwards, if first_bb can be set to a predecessor. */
3451 while (single_pred_p (first_bb))
3453 basic_block pred_bb = single_pred (first_bb);
3454 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true))
3455 break;
3456 if (!no_side_effect_bb (first_bb))
3457 break;
3458 first_bb = pred_bb;
3460 /* If first_bb is last_bb, other_bb hasn't been computed yet.
3461 Before starting forward search in last_bb successors, find
3462 out the other_bb. */
3463 if (first_bb == last_bb)
3465 other_bb = NULL;
3466 /* As non-GIMPLE_COND last stmt always terminates the range,
3467 if forward search didn't discover anything, just give up. */
3468 if (gimple_code (stmt) != GIMPLE_COND)
3469 return cfg_cleanup_needed;
3470 /* Look at both successors. Either it ends with a GIMPLE_COND
3471 and satisfies suitable_cond_bb, or ends with a cast and
3472 other_bb is that cast's successor. */
3473 FOR_EACH_EDGE (e, ei, first_bb->succs)
3474 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
3475 || e->dest == first_bb)
3476 return cfg_cleanup_needed;
3477 else if (single_pred_p (e->dest))
3479 stmt = last_stmt (e->dest);
3480 if (stmt
3481 && gimple_code (stmt) == GIMPLE_COND
3482 && EDGE_COUNT (e->dest->succs) == 2)
3484 if (suitable_cond_bb (first_bb, e->dest, &other_bb, true))
3485 break;
3486 else
3487 other_bb = NULL;
3489 else if (stmt
3490 && final_range_test_p (stmt)
3491 && find_edge (first_bb, single_succ (e->dest)))
3493 other_bb = single_succ (e->dest);
3494 if (other_bb == first_bb)
3495 other_bb = NULL;
3498 if (other_bb == NULL)
3499 return cfg_cleanup_needed;
3501 /* Now do the forward search, moving last_bb to successor bbs
3502 that aren't other_bb. */
3503 while (EDGE_COUNT (last_bb->succs) == 2)
3505 FOR_EACH_EDGE (e, ei, last_bb->succs)
3506 if (e->dest != other_bb)
3507 break;
3508 if (e == NULL)
3509 break;
3510 if (!single_pred_p (e->dest))
3511 break;
3512 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false))
3513 break;
3514 if (!no_side_effect_bb (e->dest))
3515 break;
3516 last_bb = e->dest;
3518 if (first_bb == last_bb)
3519 return cfg_cleanup_needed;
3520 /* Here basic blocks first_bb through last_bb's predecessor
3521 end with GIMPLE_COND, all of them have one of the edges to
3522 other_bb and another to another block in the range,
3523 all blocks except first_bb don't have side-effects and
3524 last_bb ends with either GIMPLE_COND, or cast satisfying
3525 final_range_test_p. */
3526 for (bb = last_bb; ; bb = single_pred (bb))
3528 enum tree_code code;
3529 tree lhs, rhs;
3530 inter_bb_range_test_entry bb_ent;
3532 bb_ent.op = NULL_TREE;
3533 bb_ent.first_idx = ops.length ();
3534 bb_ent.last_idx = bb_ent.first_idx;
3535 e = find_edge (bb, other_bb);
3536 stmt = last_stmt (bb);
3537 gimple_set_visited (stmt, true);
3538 if (gimple_code (stmt) != GIMPLE_COND)
3540 use_operand_p use_p;
3541 gimple *phi;
3542 edge e2;
3543 unsigned int d;
3545 lhs = gimple_assign_lhs (stmt);
3546 rhs = gimple_assign_rhs1 (stmt);
3547 gcc_assert (bb == last_bb);
3549 /* stmt is
3550 _123 = (int) _234;
3552 _234 = a_2(D) == 2;
3554 followed by:
3555 <bb M>:
3556 # _345 = PHI <_123(N), 1(...), 1(...)>
3558 or 0 instead of 1. If it is 0, the _234
3559 range test is anded together with all the
3560 other range tests, if it is 1, it is ored with
3561 them. */
3562 single_imm_use (lhs, &use_p, &phi);
3563 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
3564 e2 = find_edge (first_bb, other_bb);
3565 d = e2->dest_idx;
3566 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
3567 if (integer_zerop (gimple_phi_arg_def (phi, d)))
3568 code = BIT_AND_EXPR;
3569 else
3571 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
3572 code = BIT_IOR_EXPR;
3575 /* If _234 SSA_NAME_DEF_STMT is
3576 _234 = _567 | _789;
3577 (or &, corresponding to 1/0 in the phi arguments,
3578 push into ops the individual range test arguments
3579 of the bitwise or resp. and, recursively. */
3580 if (TREE_CODE (rhs) == SSA_NAME
3581 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3582 != tcc_comparison)
3583 && !get_ops (rhs, code, &ops,
3584 loop_containing_stmt (stmt))
3585 && has_single_use (rhs))
3587 /* Otherwise, push the _234 range test itself. */
3588 operand_entry *oe = operand_entry_pool.allocate ();
3590 oe->op = rhs;
3591 oe->rank = code;
3592 oe->id = 0;
3593 oe->count = 1;
3594 oe->stmt_to_insert = NULL;
3595 ops.safe_push (oe);
3596 bb_ent.last_idx++;
3597 bb_ent.op = rhs;
3599 else if (is_gimple_assign (stmt)
3600 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
3601 == tcc_comparison)
3602 && !get_ops (lhs, code, &ops,
3603 loop_containing_stmt (stmt))
3604 && has_single_use (lhs))
3606 operand_entry *oe = operand_entry_pool.allocate ();
3607 oe->op = lhs;
3608 oe->rank = code;
3609 oe->id = 0;
3610 oe->count = 1;
3611 ops.safe_push (oe);
3612 bb_ent.last_idx++;
3613 bb_ent.op = lhs;
3615 else
3617 bb_ent.last_idx = ops.length ();
3618 bb_ent.op = rhs;
3620 bbinfo.safe_push (bb_ent);
3621 continue;
3623 /* Otherwise stmt is GIMPLE_COND. */
3624 code = gimple_cond_code (stmt);
3625 lhs = gimple_cond_lhs (stmt);
3626 rhs = gimple_cond_rhs (stmt);
3627 if (TREE_CODE (lhs) == SSA_NAME
3628 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3629 && ((code != EQ_EXPR && code != NE_EXPR)
3630 || rhs != boolean_false_node
3631 /* Either push into ops the individual bitwise
3632 or resp. and operands, depending on which
3633 edge is other_bb. */
3634 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
3635 ^ (code == EQ_EXPR))
3636 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
3637 loop_containing_stmt (stmt))))
3639 /* Or push the GIMPLE_COND stmt itself. */
3640 operand_entry *oe = operand_entry_pool.allocate ();
3642 oe->op = NULL;
3643 oe->rank = (e->flags & EDGE_TRUE_VALUE)
3644 ? BIT_IOR_EXPR : BIT_AND_EXPR;
3645 /* oe->op = NULL signs that there is no SSA_NAME
3646 for the range test, and oe->id instead is the
3647 basic block number, at which's end the GIMPLE_COND
3648 is. */
3649 oe->id = bb->index;
3650 oe->count = 1;
3651 oe->stmt_to_insert = NULL;
3652 ops.safe_push (oe);
3653 bb_ent.op = NULL;
3654 bb_ent.last_idx++;
3656 else if (ops.length () > bb_ent.first_idx)
3658 bb_ent.op = lhs;
3659 bb_ent.last_idx = ops.length ();
3661 bbinfo.safe_push (bb_ent);
3662 if (bb == first_bb)
3663 break;
3665 if (ops.length () > 1)
3666 any_changes = optimize_range_tests (ERROR_MARK, &ops);
3667 if (any_changes)
3669 unsigned int idx, max_idx = 0;
3670 /* update_ops relies on has_single_use predicates returning the
3671 same values as it did during get_ops earlier. Additionally it
3672 never removes statements, only adds new ones and it should walk
3673 from the single imm use and check the predicate already before
3674 making those changes.
3675 On the other side, the handling of GIMPLE_COND directly can turn
3676 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
3677 it needs to be done in a separate loop afterwards. */
3678 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3680 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3681 && bbinfo[idx].op != NULL_TREE)
3683 tree new_op;
3685 max_idx = idx;
3686 stmt = last_stmt (bb);
3687 new_op = update_ops (bbinfo[idx].op,
3688 (enum tree_code)
3689 ops[bbinfo[idx].first_idx]->rank,
3690 ops, &bbinfo[idx].first_idx,
3691 loop_containing_stmt (stmt));
3692 if (new_op == NULL_TREE)
3694 gcc_assert (bb == last_bb);
3695 new_op = ops[bbinfo[idx].first_idx++]->op;
3697 if (bbinfo[idx].op != new_op)
3699 imm_use_iterator iter;
3700 use_operand_p use_p;
3701 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
3703 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
3704 if (is_gimple_debug (use_stmt))
3705 continue;
3706 else if (gimple_code (use_stmt) == GIMPLE_COND
3707 || gimple_code (use_stmt) == GIMPLE_PHI)
3708 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3709 SET_USE (use_p, new_op);
3710 else if ((is_gimple_assign (use_stmt)
3711 && (TREE_CODE_CLASS
3712 (gimple_assign_rhs_code (use_stmt))
3713 == tcc_comparison)))
3714 cast_or_tcc_cmp_stmt = use_stmt;
3715 else if (gimple_assign_cast_p (use_stmt))
3716 cast_or_tcc_cmp_stmt = use_stmt;
3717 else
3718 gcc_unreachable ();
3720 if (cast_or_tcc_cmp_stmt)
3722 gcc_assert (bb == last_bb);
3723 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
3724 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
3725 enum tree_code rhs_code
3726 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
3727 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
3728 : CONVERT_EXPR;
3729 gassign *g;
3730 if (is_gimple_min_invariant (new_op))
3732 new_op = fold_convert (TREE_TYPE (lhs), new_op);
3733 g = gimple_build_assign (new_lhs, new_op);
3735 else
3736 g = gimple_build_assign (new_lhs, rhs_code, new_op);
3737 gimple_stmt_iterator gsi
3738 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
3739 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
3740 gimple_set_visited (g, true);
3741 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3742 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
3743 if (is_gimple_debug (use_stmt))
3744 continue;
3745 else if (gimple_code (use_stmt) == GIMPLE_COND
3746 || gimple_code (use_stmt) == GIMPLE_PHI)
3747 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
3748 SET_USE (use_p, new_lhs);
3749 else
3750 gcc_unreachable ();
3754 if (bb == first_bb)
3755 break;
3757 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
3759 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
3760 && bbinfo[idx].op == NULL_TREE
3761 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
3763 gcond *cond_stmt = as_a <gcond *> (last_stmt (bb));
3765 if (idx > max_idx)
3766 max_idx = idx;
3768 /* If we collapse the conditional to a true/false
3769 condition, then bubble that knowledge up to our caller. */
3770 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
3772 gimple_cond_make_false (cond_stmt);
3773 cfg_cleanup_needed = true;
3775 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
3777 gimple_cond_make_true (cond_stmt);
3778 cfg_cleanup_needed = true;
3780 else
3782 gimple_cond_set_code (cond_stmt, NE_EXPR);
3783 gimple_cond_set_lhs (cond_stmt,
3784 ops[bbinfo[idx].first_idx]->op);
3785 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
3787 update_stmt (cond_stmt);
3789 if (bb == first_bb)
3790 break;
3793 /* The above changes could result in basic blocks after the first
3794 modified one, up to and including last_bb, to be executed even if
3795 they would not be in the original program. If the value ranges of
3796 assignment lhs' in those bbs were dependent on the conditions
3797 guarding those basic blocks which now can change, the VRs might
3798 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
3799 are only used within the same bb, it should be not a big deal if
3800 we just reset all the VRs in those bbs. See PR68671. */
3801 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
3802 reset_flow_sensitive_info_in_bb (bb);
3804 return cfg_cleanup_needed;
3807 /* Return true if OPERAND is defined by a PHI node which uses the LHS
3808 of STMT in it's operands. This is also known as a "destructive
3809 update" operation. */
3811 static bool
3812 is_phi_for_stmt (gimple *stmt, tree operand)
3814 gimple *def_stmt;
3815 gphi *def_phi;
3816 tree lhs;
3817 use_operand_p arg_p;
3818 ssa_op_iter i;
3820 if (TREE_CODE (operand) != SSA_NAME)
3821 return false;
3823 lhs = gimple_assign_lhs (stmt);
3825 def_stmt = SSA_NAME_DEF_STMT (operand);
3826 def_phi = dyn_cast <gphi *> (def_stmt);
3827 if (!def_phi)
3828 return false;
3830 FOR_EACH_PHI_ARG (arg_p, def_phi, i, SSA_OP_USE)
3831 if (lhs == USE_FROM_PTR (arg_p))
3832 return true;
3833 return false;
3836 /* Remove def stmt of VAR if VAR has zero uses and recurse
3837 on rhs1 operand if so. */
3839 static void
3840 remove_visited_stmt_chain (tree var)
3842 gimple *stmt;
3843 gimple_stmt_iterator gsi;
3845 while (1)
3847 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
3848 return;
3849 stmt = SSA_NAME_DEF_STMT (var);
3850 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
3852 var = gimple_assign_rhs1 (stmt);
3853 gsi = gsi_for_stmt (stmt);
3854 reassoc_remove_stmt (&gsi);
3855 release_defs (stmt);
3857 else
3858 return;
3862 /* This function checks three consequtive operands in
3863 passed operands vector OPS starting from OPINDEX and
3864 swaps two operands if it is profitable for binary operation
3865 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
3867 We pair ops with the same rank if possible.
3869 The alternative we try is to see if STMT is a destructive
3870 update style statement, which is like:
3871 b = phi (a, ...)
3872 a = c + b;
3873 In that case, we want to use the destructive update form to
3874 expose the possible vectorizer sum reduction opportunity.
3875 In that case, the third operand will be the phi node. This
3876 check is not performed if STMT is null.
3878 We could, of course, try to be better as noted above, and do a
3879 lot of work to try to find these opportunities in >3 operand
3880 cases, but it is unlikely to be worth it. */
3882 static void
3883 swap_ops_for_binary_stmt (vec<operand_entry *> ops,
3884 unsigned int opindex, gimple *stmt)
3886 operand_entry *oe1, *oe2, *oe3;
3888 oe1 = ops[opindex];
3889 oe2 = ops[opindex + 1];
3890 oe3 = ops[opindex + 2];
3892 if ((oe1->rank == oe2->rank
3893 && oe2->rank != oe3->rank)
3894 || (stmt && is_phi_for_stmt (stmt, oe3->op)
3895 && !is_phi_for_stmt (stmt, oe1->op)
3896 && !is_phi_for_stmt (stmt, oe2->op)))
3897 std::swap (*oe1, *oe3);
3898 else if ((oe1->rank == oe3->rank
3899 && oe2->rank != oe3->rank)
3900 || (stmt && is_phi_for_stmt (stmt, oe2->op)
3901 && !is_phi_for_stmt (stmt, oe1->op)
3902 && !is_phi_for_stmt (stmt, oe3->op)))
3903 std::swap (*oe1, *oe2);
3906 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
3907 two definitions, otherwise return STMT. */
3909 static inline gimple *
3910 find_insert_point (gimple *stmt, tree rhs1, tree rhs2)
3912 if (TREE_CODE (rhs1) == SSA_NAME
3913 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
3914 stmt = SSA_NAME_DEF_STMT (rhs1);
3915 if (TREE_CODE (rhs2) == SSA_NAME
3916 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
3917 stmt = SSA_NAME_DEF_STMT (rhs2);
3918 return stmt;
3921 /* If the stmt that defines operand has to be inserted, insert it
3922 before the use. */
3923 static void
3924 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
3926 gcc_assert (is_gimple_assign (stmt_to_insert));
3927 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
3928 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
3929 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2);
3930 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
3931 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
3933 /* If the insert point is not stmt, then insert_point would be
3934 the point where operand rhs1 or rhs2 is defined. In this case,
3935 stmt_to_insert has to be inserted afterwards. This would
3936 only happen when the stmt insertion point is flexible. */
3937 if (stmt == insert_point)
3938 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
3939 else
3940 insert_stmt_after (stmt_to_insert, insert_point);
3944 /* Recursively rewrite our linearized statements so that the operators
3945 match those in OPS[OPINDEX], putting the computation in rank
3946 order. Return new lhs. */
3948 static tree
3949 rewrite_expr_tree (gimple *stmt, unsigned int opindex,
3950 vec<operand_entry *> ops, bool changed)
3952 tree rhs1 = gimple_assign_rhs1 (stmt);
3953 tree rhs2 = gimple_assign_rhs2 (stmt);
3954 tree lhs = gimple_assign_lhs (stmt);
3955 operand_entry *oe;
3957 /* The final recursion case for this function is that you have
3958 exactly two operations left.
3959 If we had exactly one op in the entire list to start with, we
3960 would have never called this function, and the tail recursion
3961 rewrites them one at a time. */
3962 if (opindex + 2 == ops.length ())
3964 operand_entry *oe1, *oe2;
3966 oe1 = ops[opindex];
3967 oe2 = ops[opindex + 1];
3969 if (rhs1 != oe1->op || rhs2 != oe2->op)
3971 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3972 unsigned int uid = gimple_uid (stmt);
3974 if (dump_file && (dump_flags & TDF_DETAILS))
3976 fprintf (dump_file, "Transforming ");
3977 print_gimple_stmt (dump_file, stmt, 0, 0);
3980 /* If the stmt that defines operand has to be inserted, insert it
3981 before the use. */
3982 if (oe1->stmt_to_insert)
3983 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
3984 if (oe2->stmt_to_insert)
3985 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
3986 /* Even when changed is false, reassociation could have e.g. removed
3987 some redundant operations, so unless we are just swapping the
3988 arguments or unless there is no change at all (then we just
3989 return lhs), force creation of a new SSA_NAME. */
3990 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
3992 gimple *insert_point
3993 = find_insert_point (stmt, oe1->op, oe2->op);
3994 lhs = make_ssa_name (TREE_TYPE (lhs));
3995 stmt
3996 = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
3997 oe1->op, oe2->op);
3998 gimple_set_uid (stmt, uid);
3999 gimple_set_visited (stmt, true);
4000 if (insert_point == gsi_stmt (gsi))
4001 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4002 else
4003 insert_stmt_after (stmt, insert_point);
4005 else
4007 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op)
4008 == stmt);
4009 gimple_assign_set_rhs1 (stmt, oe1->op);
4010 gimple_assign_set_rhs2 (stmt, oe2->op);
4011 update_stmt (stmt);
4014 if (rhs1 != oe1->op && rhs1 != oe2->op)
4015 remove_visited_stmt_chain (rhs1);
4017 if (dump_file && (dump_flags & TDF_DETAILS))
4019 fprintf (dump_file, " into ");
4020 print_gimple_stmt (dump_file, stmt, 0, 0);
4023 return lhs;
4026 /* If we hit here, we should have 3 or more ops left. */
4027 gcc_assert (opindex + 2 < ops.length ());
4029 /* Rewrite the next operator. */
4030 oe = ops[opindex];
4032 /* If the stmt that defines operand has to be inserted, insert it
4033 before the use. */
4034 if (oe->stmt_to_insert)
4035 insert_stmt_before_use (stmt, oe->stmt_to_insert);
4037 /* Recurse on the LHS of the binary operator, which is guaranteed to
4038 be the non-leaf side. */
4039 tree new_rhs1
4040 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops,
4041 changed || oe->op != rhs2);
4043 if (oe->op != rhs2 || new_rhs1 != rhs1)
4045 if (dump_file && (dump_flags & TDF_DETAILS))
4047 fprintf (dump_file, "Transforming ");
4048 print_gimple_stmt (dump_file, stmt, 0, 0);
4051 /* If changed is false, this is either opindex == 0
4052 or all outer rhs2's were equal to corresponding oe->op,
4053 and powi_result is NULL.
4054 That means lhs is equivalent before and after reassociation.
4055 Otherwise ensure the old lhs SSA_NAME is not reused and
4056 create a new stmt as well, so that any debug stmts will be
4057 properly adjusted. */
4058 if (changed)
4060 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4061 unsigned int uid = gimple_uid (stmt);
4062 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op);
4064 lhs = make_ssa_name (TREE_TYPE (lhs));
4065 stmt = gimple_build_assign (lhs, gimple_assign_rhs_code (stmt),
4066 new_rhs1, oe->op);
4067 gimple_set_uid (stmt, uid);
4068 gimple_set_visited (stmt, true);
4069 if (insert_point == gsi_stmt (gsi))
4070 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4071 else
4072 insert_stmt_after (stmt, insert_point);
4074 else
4076 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op)
4077 == stmt);
4078 gimple_assign_set_rhs1 (stmt, new_rhs1);
4079 gimple_assign_set_rhs2 (stmt, oe->op);
4080 update_stmt (stmt);
4083 if (dump_file && (dump_flags & TDF_DETAILS))
4085 fprintf (dump_file, " into ");
4086 print_gimple_stmt (dump_file, stmt, 0, 0);
4089 return lhs;
4092 /* Find out how many cycles we need to compute statements chain.
4093 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
4094 maximum number of independent statements we may execute per cycle. */
4096 static int
4097 get_required_cycles (int ops_num, int cpu_width)
4099 int res;
4100 int elog;
4101 unsigned int rest;
4103 /* While we have more than 2 * cpu_width operands
4104 we may reduce number of operands by cpu_width
4105 per cycle. */
4106 res = ops_num / (2 * cpu_width);
4108 /* Remained operands count may be reduced twice per cycle
4109 until we have only one operand. */
4110 rest = (unsigned)(ops_num - res * cpu_width);
4111 elog = exact_log2 (rest);
4112 if (elog >= 0)
4113 res += elog;
4114 else
4115 res += floor_log2 (rest) + 1;
4117 return res;
4120 /* Returns an optimal number of registers to use for computation of
4121 given statements. */
4123 static int
4124 get_reassociation_width (int ops_num, enum tree_code opc,
4125 machine_mode mode)
4127 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH);
4128 int width;
4129 int width_min;
4130 int cycles_best;
4132 if (param_width > 0)
4133 width = param_width;
4134 else
4135 width = targetm.sched.reassociation_width (opc, mode);
4137 if (width == 1)
4138 return width;
4140 /* Get the minimal time required for sequence computation. */
4141 cycles_best = get_required_cycles (ops_num, width);
4143 /* Check if we may use less width and still compute sequence for
4144 the same time. It will allow us to reduce registers usage.
4145 get_required_cycles is monotonically increasing with lower width
4146 so we can perform a binary search for the minimal width that still
4147 results in the optimal cycle count. */
4148 width_min = 1;
4149 while (width > width_min)
4151 int width_mid = (width + width_min) / 2;
4153 if (get_required_cycles (ops_num, width_mid) == cycles_best)
4154 width = width_mid;
4155 else if (width_min < width_mid)
4156 width_min = width_mid;
4157 else
4158 break;
4161 return width;
4164 /* Recursively rewrite our linearized statements so that the operators
4165 match those in OPS[OPINDEX], putting the computation in rank
4166 order and trying to allow operations to be executed in
4167 parallel. */
4169 static void
4170 rewrite_expr_tree_parallel (gassign *stmt, int width,
4171 vec<operand_entry *> ops)
4173 enum tree_code opcode = gimple_assign_rhs_code (stmt);
4174 int op_num = ops.length ();
4175 int stmt_num = op_num - 1;
4176 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
4177 int op_index = op_num - 1;
4178 int stmt_index = 0;
4179 int ready_stmts_end = 0;
4180 int i = 0;
4181 gimple *stmt1 = NULL, *stmt2 = NULL;
4182 tree last_rhs1 = gimple_assign_rhs1 (stmt);
4184 /* We start expression rewriting from the top statements.
4185 So, in this loop we create a full list of statements
4186 we will work with. */
4187 stmts[stmt_num - 1] = stmt;
4188 for (i = stmt_num - 2; i >= 0; i--)
4189 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
4191 for (i = 0; i < stmt_num; i++)
4193 tree op1, op2;
4195 /* Determine whether we should use results of
4196 already handled statements or not. */
4197 if (ready_stmts_end == 0
4198 && (i - stmt_index >= width || op_index < 1))
4199 ready_stmts_end = i;
4201 /* Now we choose operands for the next statement. Non zero
4202 value in ready_stmts_end means here that we should use
4203 the result of already generated statements as new operand. */
4204 if (ready_stmts_end > 0)
4206 op1 = gimple_assign_lhs (stmts[stmt_index++]);
4207 if (ready_stmts_end > stmt_index)
4208 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4209 else if (op_index >= 0)
4211 operand_entry *oe = ops[op_index--];
4212 stmt2 = oe->stmt_to_insert;
4213 op2 = oe->op;
4215 else
4217 gcc_assert (stmt_index < i);
4218 op2 = gimple_assign_lhs (stmts[stmt_index++]);
4221 if (stmt_index >= ready_stmts_end)
4222 ready_stmts_end = 0;
4224 else
4226 if (op_index > 1)
4227 swap_ops_for_binary_stmt (ops, op_index - 2, NULL);
4228 operand_entry *oe2 = ops[op_index--];
4229 operand_entry *oe1 = ops[op_index--];
4230 op2 = oe2->op;
4231 stmt2 = oe2->stmt_to_insert;
4232 op1 = oe1->op;
4233 stmt1 = oe1->stmt_to_insert;
4236 /* If we emit the last statement then we should put
4237 operands into the last statement. It will also
4238 break the loop. */
4239 if (op_index < 0 && stmt_index == i)
4240 i = stmt_num - 1;
4242 if (dump_file && (dump_flags & TDF_DETAILS))
4244 fprintf (dump_file, "Transforming ");
4245 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4248 /* If the stmt that defines operand has to be inserted, insert it
4249 before the use. */
4250 if (stmt1)
4251 insert_stmt_before_use (stmts[i], stmt1);
4252 if (stmt2)
4253 insert_stmt_before_use (stmts[i], stmt2);
4254 stmt1 = stmt2 = NULL;
4256 /* We keep original statement only for the last one. All
4257 others are recreated. */
4258 if (i == stmt_num - 1)
4260 gimple_assign_set_rhs1 (stmts[i], op1);
4261 gimple_assign_set_rhs2 (stmts[i], op2);
4262 update_stmt (stmts[i]);
4264 else
4266 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
4268 if (dump_file && (dump_flags & TDF_DETAILS))
4270 fprintf (dump_file, " into ");
4271 print_gimple_stmt (dump_file, stmts[i], 0, 0);
4275 remove_visited_stmt_chain (last_rhs1);
4278 /* Transform STMT, which is really (A +B) + (C + D) into the left
4279 linear form, ((A+B)+C)+D.
4280 Recurse on D if necessary. */
4282 static void
4283 linearize_expr (gimple *stmt)
4285 gimple_stmt_iterator gsi;
4286 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4287 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4288 gimple *oldbinrhs = binrhs;
4289 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4290 gimple *newbinrhs = NULL;
4291 struct loop *loop = loop_containing_stmt (stmt);
4292 tree lhs = gimple_assign_lhs (stmt);
4294 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4295 && is_reassociable_op (binrhs, rhscode, loop));
4297 gsi = gsi_for_stmt (stmt);
4299 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
4300 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
4301 gimple_assign_rhs_code (binrhs),
4302 gimple_assign_lhs (binlhs),
4303 gimple_assign_rhs2 (binrhs));
4304 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
4305 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
4306 gimple_set_uid (binrhs, gimple_uid (stmt));
4308 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
4309 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4311 if (dump_file && (dump_flags & TDF_DETAILS))
4313 fprintf (dump_file, "Linearized: ");
4314 print_gimple_stmt (dump_file, stmt, 0, 0);
4317 reassociate_stats.linearized++;
4318 update_stmt (stmt);
4320 gsi = gsi_for_stmt (oldbinrhs);
4321 reassoc_remove_stmt (&gsi);
4322 release_defs (oldbinrhs);
4324 gimple_set_visited (stmt, true);
4325 gimple_set_visited (binlhs, true);
4326 gimple_set_visited (binrhs, true);
4328 /* Tail recurse on the new rhs if it still needs reassociation. */
4329 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
4330 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
4331 want to change the algorithm while converting to tuples. */
4332 linearize_expr (stmt);
4335 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
4336 it. Otherwise, return NULL. */
4338 static gimple *
4339 get_single_immediate_use (tree lhs)
4341 use_operand_p immuse;
4342 gimple *immusestmt;
4344 if (TREE_CODE (lhs) == SSA_NAME
4345 && single_imm_use (lhs, &immuse, &immusestmt)
4346 && is_gimple_assign (immusestmt))
4347 return immusestmt;
4349 return NULL;
4352 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
4353 representing the negated value. Insertions of any necessary
4354 instructions go before GSI.
4355 This function is recursive in that, if you hand it "a_5" as the
4356 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
4357 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
4359 static tree
4360 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
4362 gimple *negatedefstmt = NULL;
4363 tree resultofnegate;
4364 gimple_stmt_iterator gsi;
4365 unsigned int uid;
4367 /* If we are trying to negate a name, defined by an add, negate the
4368 add operands instead. */
4369 if (TREE_CODE (tonegate) == SSA_NAME)
4370 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
4371 if (TREE_CODE (tonegate) == SSA_NAME
4372 && is_gimple_assign (negatedefstmt)
4373 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
4374 && has_single_use (gimple_assign_lhs (negatedefstmt))
4375 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
4377 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
4378 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
4379 tree lhs = gimple_assign_lhs (negatedefstmt);
4380 gimple *g;
4382 gsi = gsi_for_stmt (negatedefstmt);
4383 rhs1 = negate_value (rhs1, &gsi);
4385 gsi = gsi_for_stmt (negatedefstmt);
4386 rhs2 = negate_value (rhs2, &gsi);
4388 gsi = gsi_for_stmt (negatedefstmt);
4389 lhs = make_ssa_name (TREE_TYPE (lhs));
4390 gimple_set_visited (negatedefstmt, true);
4391 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
4392 gimple_set_uid (g, gimple_uid (negatedefstmt));
4393 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4394 return lhs;
4397 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
4398 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
4399 NULL_TREE, true, GSI_SAME_STMT);
4400 gsi = *gsip;
4401 uid = gimple_uid (gsi_stmt (gsi));
4402 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
4404 gimple *stmt = gsi_stmt (gsi);
4405 if (gimple_uid (stmt) != 0)
4406 break;
4407 gimple_set_uid (stmt, uid);
4409 return resultofnegate;
4412 /* Return true if we should break up the subtract in STMT into an add
4413 with negate. This is true when we the subtract operands are really
4414 adds, or the subtract itself is used in an add expression. In
4415 either case, breaking up the subtract into an add with negate
4416 exposes the adds to reassociation. */
4418 static bool
4419 should_break_up_subtract (gimple *stmt)
4421 tree lhs = gimple_assign_lhs (stmt);
4422 tree binlhs = gimple_assign_rhs1 (stmt);
4423 tree binrhs = gimple_assign_rhs2 (stmt);
4424 gimple *immusestmt;
4425 struct loop *loop = loop_containing_stmt (stmt);
4427 if (TREE_CODE (binlhs) == SSA_NAME
4428 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4429 return true;
4431 if (TREE_CODE (binrhs) == SSA_NAME
4432 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
4433 return true;
4435 if (TREE_CODE (lhs) == SSA_NAME
4436 && (immusestmt = get_single_immediate_use (lhs))
4437 && is_gimple_assign (immusestmt)
4438 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
4439 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
4440 return true;
4441 return false;
4444 /* Transform STMT from A - B into A + -B. */
4446 static void
4447 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
4449 tree rhs1 = gimple_assign_rhs1 (stmt);
4450 tree rhs2 = gimple_assign_rhs2 (stmt);
4452 if (dump_file && (dump_flags & TDF_DETAILS))
4454 fprintf (dump_file, "Breaking up subtract ");
4455 print_gimple_stmt (dump_file, stmt, 0, 0);
4458 rhs2 = negate_value (rhs2, gsip);
4459 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
4460 update_stmt (stmt);
4463 /* Determine whether STMT is a builtin call that raises an SSA name
4464 to an integer power and has only one use. If so, and this is early
4465 reassociation and unsafe math optimizations are permitted, place
4466 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
4467 If any of these conditions does not hold, return FALSE. */
4469 static bool
4470 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
4472 tree arg1;
4473 REAL_VALUE_TYPE c, cint;
4475 switch (gimple_call_combined_fn (stmt))
4477 CASE_CFN_POW:
4478 if (flag_errno_math)
4479 return false;
4481 *base = gimple_call_arg (stmt, 0);
4482 arg1 = gimple_call_arg (stmt, 1);
4484 if (TREE_CODE (arg1) != REAL_CST)
4485 return false;
4487 c = TREE_REAL_CST (arg1);
4489 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
4490 return false;
4492 *exponent = real_to_integer (&c);
4493 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
4494 if (!real_identical (&c, &cint))
4495 return false;
4497 break;
4499 CASE_CFN_POWI:
4500 *base = gimple_call_arg (stmt, 0);
4501 arg1 = gimple_call_arg (stmt, 1);
4503 if (!tree_fits_shwi_p (arg1))
4504 return false;
4506 *exponent = tree_to_shwi (arg1);
4507 break;
4509 default:
4510 return false;
4513 /* Expanding negative exponents is generally unproductive, so we don't
4514 complicate matters with those. Exponents of zero and one should
4515 have been handled by expression folding. */
4516 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
4517 return false;
4519 return true;
4522 /* Try to derive and add operand entry for OP to *OPS. Return false if
4523 unsuccessful. */
4525 static bool
4526 try_special_add_to_ops (vec<operand_entry *> *ops,
4527 enum tree_code code,
4528 tree op, gimple* def_stmt)
4530 tree base = NULL_TREE;
4531 HOST_WIDE_INT exponent = 0;
4533 if (TREE_CODE (op) != SSA_NAME
4534 || ! has_single_use (op))
4535 return false;
4537 if (code == MULT_EXPR
4538 && reassoc_insert_powi_p
4539 && flag_unsafe_math_optimizations
4540 && is_gimple_call (def_stmt)
4541 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
4543 add_repeat_to_ops_vec (ops, base, exponent);
4544 gimple_set_visited (def_stmt, true);
4545 return true;
4547 else if (code == MULT_EXPR
4548 && is_gimple_assign (def_stmt)
4549 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
4550 && !HONOR_SNANS (TREE_TYPE (op))
4551 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
4552 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op))))
4554 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4555 tree cst = build_minus_one_cst (TREE_TYPE (op));
4556 add_to_ops_vec (ops, rhs1);
4557 add_to_ops_vec (ops, cst);
4558 gimple_set_visited (def_stmt, true);
4559 return true;
4562 return false;
4565 /* Recursively linearize a binary expression that is the RHS of STMT.
4566 Place the operands of the expression tree in the vector named OPS. */
4568 static void
4569 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
4570 bool is_associative, bool set_visited)
4572 tree binlhs = gimple_assign_rhs1 (stmt);
4573 tree binrhs = gimple_assign_rhs2 (stmt);
4574 gimple *binlhsdef = NULL, *binrhsdef = NULL;
4575 bool binlhsisreassoc = false;
4576 bool binrhsisreassoc = false;
4577 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4578 struct loop *loop = loop_containing_stmt (stmt);
4580 if (set_visited)
4581 gimple_set_visited (stmt, true);
4583 if (TREE_CODE (binlhs) == SSA_NAME)
4585 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
4586 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
4587 && !stmt_could_throw_p (binlhsdef));
4590 if (TREE_CODE (binrhs) == SSA_NAME)
4592 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
4593 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
4594 && !stmt_could_throw_p (binrhsdef));
4597 /* If the LHS is not reassociable, but the RHS is, we need to swap
4598 them. If neither is reassociable, there is nothing we can do, so
4599 just put them in the ops vector. If the LHS is reassociable,
4600 linearize it. If both are reassociable, then linearize the RHS
4601 and the LHS. */
4603 if (!binlhsisreassoc)
4605 /* If this is not a associative operation like division, give up. */
4606 if (!is_associative)
4608 add_to_ops_vec (ops, binrhs);
4609 return;
4612 if (!binrhsisreassoc)
4614 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4615 add_to_ops_vec (ops, binrhs);
4617 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
4618 add_to_ops_vec (ops, binlhs);
4620 return;
4623 if (dump_file && (dump_flags & TDF_DETAILS))
4625 fprintf (dump_file, "swapping operands of ");
4626 print_gimple_stmt (dump_file, stmt, 0, 0);
4629 swap_ssa_operands (stmt,
4630 gimple_assign_rhs1_ptr (stmt),
4631 gimple_assign_rhs2_ptr (stmt));
4632 update_stmt (stmt);
4634 if (dump_file && (dump_flags & TDF_DETAILS))
4636 fprintf (dump_file, " is now ");
4637 print_gimple_stmt (dump_file, stmt, 0, 0);
4640 /* We want to make it so the lhs is always the reassociative op,
4641 so swap. */
4642 std::swap (binlhs, binrhs);
4644 else if (binrhsisreassoc)
4646 linearize_expr (stmt);
4647 binlhs = gimple_assign_rhs1 (stmt);
4648 binrhs = gimple_assign_rhs2 (stmt);
4651 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
4652 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
4653 rhscode, loop));
4654 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
4655 is_associative, set_visited);
4657 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
4658 add_to_ops_vec (ops, binrhs);
4661 /* Repropagate the negates back into subtracts, since no other pass
4662 currently does it. */
4664 static void
4665 repropagate_negates (void)
4667 unsigned int i = 0;
4668 tree negate;
4670 FOR_EACH_VEC_ELT (plus_negates, i, negate)
4672 gimple *user = get_single_immediate_use (negate);
4674 if (!user || !is_gimple_assign (user))
4675 continue;
4677 /* The negate operand can be either operand of a PLUS_EXPR
4678 (it can be the LHS if the RHS is a constant for example).
4680 Force the negate operand to the RHS of the PLUS_EXPR, then
4681 transform the PLUS_EXPR into a MINUS_EXPR. */
4682 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
4684 /* If the negated operand appears on the LHS of the
4685 PLUS_EXPR, exchange the operands of the PLUS_EXPR
4686 to force the negated operand to the RHS of the PLUS_EXPR. */
4687 if (gimple_assign_rhs1 (user) == negate)
4689 swap_ssa_operands (user,
4690 gimple_assign_rhs1_ptr (user),
4691 gimple_assign_rhs2_ptr (user));
4694 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
4695 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
4696 if (gimple_assign_rhs2 (user) == negate)
4698 tree rhs1 = gimple_assign_rhs1 (user);
4699 tree rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
4700 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4701 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
4702 update_stmt (user);
4705 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
4707 if (gimple_assign_rhs1 (user) == negate)
4709 /* We have
4710 x = -a
4711 y = x - b
4712 which we transform into
4713 x = a + b
4714 y = -x .
4715 This pushes down the negate which we possibly can merge
4716 into some other operation, hence insert it into the
4717 plus_negates vector. */
4718 gimple *feed = SSA_NAME_DEF_STMT (negate);
4719 tree a = gimple_assign_rhs1 (feed);
4720 tree b = gimple_assign_rhs2 (user);
4721 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
4722 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
4723 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
4724 gimple *g = gimple_build_assign (x, PLUS_EXPR, a, b);
4725 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4726 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
4727 user = gsi_stmt (gsi2);
4728 update_stmt (user);
4729 reassoc_remove_stmt (&gsi);
4730 release_defs (feed);
4731 plus_negates.safe_push (gimple_assign_lhs (user));
4733 else
4735 /* Transform "x = -a; y = b - x" into "y = b + a", getting
4736 rid of one operation. */
4737 gimple *feed = SSA_NAME_DEF_STMT (negate);
4738 tree a = gimple_assign_rhs1 (feed);
4739 tree rhs1 = gimple_assign_rhs1 (user);
4740 gimple_stmt_iterator gsi = gsi_for_stmt (user);
4741 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, a);
4742 update_stmt (gsi_stmt (gsi));
4748 /* Returns true if OP is of a type for which we can do reassociation.
4749 That is for integral or non-saturating fixed-point types, and for
4750 floating point type when associative-math is enabled. */
4752 static bool
4753 can_reassociate_p (tree op)
4755 tree type = TREE_TYPE (op);
4756 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
4757 || NON_SAT_FIXED_POINT_TYPE_P (type)
4758 || (flag_associative_math && FLOAT_TYPE_P (type)))
4759 return true;
4760 return false;
4763 /* Break up subtract operations in block BB.
4765 We do this top down because we don't know whether the subtract is
4766 part of a possible chain of reassociation except at the top.
4768 IE given
4769 d = f + g
4770 c = a + e
4771 b = c - d
4772 q = b - r
4773 k = t - q
4775 we want to break up k = t - q, but we won't until we've transformed q
4776 = b - r, which won't be broken up until we transform b = c - d.
4778 En passant, clear the GIMPLE visited flag on every statement
4779 and set UIDs within each basic block. */
4781 static void
4782 break_up_subtract_bb (basic_block bb)
4784 gimple_stmt_iterator gsi;
4785 basic_block son;
4786 unsigned int uid = 1;
4788 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4790 gimple *stmt = gsi_stmt (gsi);
4791 gimple_set_visited (stmt, false);
4792 gimple_set_uid (stmt, uid++);
4794 if (!is_gimple_assign (stmt)
4795 || !can_reassociate_p (gimple_assign_lhs (stmt)))
4796 continue;
4798 /* Look for simple gimple subtract operations. */
4799 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
4801 if (!can_reassociate_p (gimple_assign_rhs1 (stmt))
4802 || !can_reassociate_p (gimple_assign_rhs2 (stmt)))
4803 continue;
4805 /* Check for a subtract used only in an addition. If this
4806 is the case, transform it into add of a negate for better
4807 reassociation. IE transform C = A-B into C = A + -B if C
4808 is only used in an addition. */
4809 if (should_break_up_subtract (stmt))
4810 break_up_subtract (stmt, &gsi);
4812 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
4813 && can_reassociate_p (gimple_assign_rhs1 (stmt)))
4814 plus_negates.safe_push (gimple_assign_lhs (stmt));
4816 for (son = first_dom_son (CDI_DOMINATORS, bb);
4817 son;
4818 son = next_dom_son (CDI_DOMINATORS, son))
4819 break_up_subtract_bb (son);
4822 /* Used for repeated factor analysis. */
4823 struct repeat_factor
4825 /* An SSA name that occurs in a multiply chain. */
4826 tree factor;
4828 /* Cached rank of the factor. */
4829 unsigned rank;
4831 /* Number of occurrences of the factor in the chain. */
4832 HOST_WIDE_INT count;
4834 /* An SSA name representing the product of this factor and
4835 all factors appearing later in the repeated factor vector. */
4836 tree repr;
4840 static vec<repeat_factor> repeat_factor_vec;
4842 /* Used for sorting the repeat factor vector. Sort primarily by
4843 ascending occurrence count, secondarily by descending rank. */
4845 static int
4846 compare_repeat_factors (const void *x1, const void *x2)
4848 const repeat_factor *rf1 = (const repeat_factor *) x1;
4849 const repeat_factor *rf2 = (const repeat_factor *) x2;
4851 if (rf1->count != rf2->count)
4852 return rf1->count - rf2->count;
4854 return rf2->rank - rf1->rank;
4857 /* Look for repeated operands in OPS in the multiply tree rooted at
4858 STMT. Replace them with an optimal sequence of multiplies and powi
4859 builtin calls, and remove the used operands from OPS. Return an
4860 SSA name representing the value of the replacement sequence. */
4862 static tree
4863 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
4865 unsigned i, j, vec_len;
4866 int ii;
4867 operand_entry *oe;
4868 repeat_factor *rf1, *rf2;
4869 repeat_factor rfnew;
4870 tree result = NULL_TREE;
4871 tree target_ssa, iter_result;
4872 tree type = TREE_TYPE (gimple_get_lhs (stmt));
4873 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
4874 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4875 gimple *mul_stmt, *pow_stmt;
4877 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
4878 target. */
4879 if (!powi_fndecl)
4880 return NULL_TREE;
4882 /* Allocate the repeated factor vector. */
4883 repeat_factor_vec.create (10);
4885 /* Scan the OPS vector for all SSA names in the product and build
4886 up a vector of occurrence counts for each factor. */
4887 FOR_EACH_VEC_ELT (*ops, i, oe)
4889 if (TREE_CODE (oe->op) == SSA_NAME)
4891 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4893 if (rf1->factor == oe->op)
4895 rf1->count += oe->count;
4896 break;
4900 if (j >= repeat_factor_vec.length ())
4902 rfnew.factor = oe->op;
4903 rfnew.rank = oe->rank;
4904 rfnew.count = oe->count;
4905 rfnew.repr = NULL_TREE;
4906 repeat_factor_vec.safe_push (rfnew);
4911 /* Sort the repeated factor vector by (a) increasing occurrence count,
4912 and (b) decreasing rank. */
4913 repeat_factor_vec.qsort (compare_repeat_factors);
4915 /* It is generally best to combine as many base factors as possible
4916 into a product before applying __builtin_powi to the result.
4917 However, the sort order chosen for the repeated factor vector
4918 allows us to cache partial results for the product of the base
4919 factors for subsequent use. When we already have a cached partial
4920 result from a previous iteration, it is best to make use of it
4921 before looking for another __builtin_pow opportunity.
4923 As an example, consider x * x * y * y * y * z * z * z * z.
4924 We want to first compose the product x * y * z, raise it to the
4925 second power, then multiply this by y * z, and finally multiply
4926 by z. This can be done in 5 multiplies provided we cache y * z
4927 for use in both expressions:
4929 t1 = y * z
4930 t2 = t1 * x
4931 t3 = t2 * t2
4932 t4 = t1 * t3
4933 result = t4 * z
4935 If we instead ignored the cached y * z and first multiplied by
4936 the __builtin_pow opportunity z * z, we would get the inferior:
4938 t1 = y * z
4939 t2 = t1 * x
4940 t3 = t2 * t2
4941 t4 = z * z
4942 t5 = t3 * t4
4943 result = t5 * y */
4945 vec_len = repeat_factor_vec.length ();
4947 /* Repeatedly look for opportunities to create a builtin_powi call. */
4948 while (true)
4950 HOST_WIDE_INT power;
4952 /* First look for the largest cached product of factors from
4953 preceding iterations. If found, create a builtin_powi for
4954 it if the minimum occurrence count for its factors is at
4955 least 2, or just use this cached product as our next
4956 multiplicand if the minimum occurrence count is 1. */
4957 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
4959 if (rf1->repr && rf1->count > 0)
4960 break;
4963 if (j < vec_len)
4965 power = rf1->count;
4967 if (power == 1)
4969 iter_result = rf1->repr;
4971 if (dump_file && (dump_flags & TDF_DETAILS))
4973 unsigned elt;
4974 repeat_factor *rf;
4975 fputs ("Multiplying by cached product ", dump_file);
4976 for (elt = j; elt < vec_len; elt++)
4978 rf = &repeat_factor_vec[elt];
4979 print_generic_expr (dump_file, rf->factor, 0);
4980 if (elt < vec_len - 1)
4981 fputs (" * ", dump_file);
4983 fputs ("\n", dump_file);
4986 else
4988 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
4989 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
4990 build_int_cst (integer_type_node,
4991 power));
4992 gimple_call_set_lhs (pow_stmt, iter_result);
4993 gimple_set_location (pow_stmt, gimple_location (stmt));
4994 gimple_set_uid (pow_stmt, gimple_uid (stmt));
4995 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
4997 if (dump_file && (dump_flags & TDF_DETAILS))
4999 unsigned elt;
5000 repeat_factor *rf;
5001 fputs ("Building __builtin_pow call for cached product (",
5002 dump_file);
5003 for (elt = j; elt < vec_len; elt++)
5005 rf = &repeat_factor_vec[elt];
5006 print_generic_expr (dump_file, rf->factor, 0);
5007 if (elt < vec_len - 1)
5008 fputs (" * ", dump_file);
5010 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
5011 power);
5015 else
5017 /* Otherwise, find the first factor in the repeated factor
5018 vector whose occurrence count is at least 2. If no such
5019 factor exists, there are no builtin_powi opportunities
5020 remaining. */
5021 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
5023 if (rf1->count >= 2)
5024 break;
5027 if (j >= vec_len)
5028 break;
5030 power = rf1->count;
5032 if (dump_file && (dump_flags & TDF_DETAILS))
5034 unsigned elt;
5035 repeat_factor *rf;
5036 fputs ("Building __builtin_pow call for (", dump_file);
5037 for (elt = j; elt < vec_len; elt++)
5039 rf = &repeat_factor_vec[elt];
5040 print_generic_expr (dump_file, rf->factor, 0);
5041 if (elt < vec_len - 1)
5042 fputs (" * ", dump_file);
5044 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
5047 reassociate_stats.pows_created++;
5049 /* Visit each element of the vector in reverse order (so that
5050 high-occurrence elements are visited first, and within the
5051 same occurrence count, lower-ranked elements are visited
5052 first). Form a linear product of all elements in this order
5053 whose occurrencce count is at least that of element J.
5054 Record the SSA name representing the product of each element
5055 with all subsequent elements in the vector. */
5056 if (j == vec_len - 1)
5057 rf1->repr = rf1->factor;
5058 else
5060 for (ii = vec_len - 2; ii >= (int)j; ii--)
5062 tree op1, op2;
5064 rf1 = &repeat_factor_vec[ii];
5065 rf2 = &repeat_factor_vec[ii + 1];
5067 /* Init the last factor's representative to be itself. */
5068 if (!rf2->repr)
5069 rf2->repr = rf2->factor;
5071 op1 = rf1->factor;
5072 op2 = rf2->repr;
5074 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
5075 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
5076 op1, op2);
5077 gimple_set_location (mul_stmt, gimple_location (stmt));
5078 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5079 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5080 rf1->repr = target_ssa;
5082 /* Don't reprocess the multiply we just introduced. */
5083 gimple_set_visited (mul_stmt, true);
5087 /* Form a call to __builtin_powi for the maximum product
5088 just formed, raised to the power obtained earlier. */
5089 rf1 = &repeat_factor_vec[j];
5090 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
5091 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
5092 build_int_cst (integer_type_node,
5093 power));
5094 gimple_call_set_lhs (pow_stmt, iter_result);
5095 gimple_set_location (pow_stmt, gimple_location (stmt));
5096 gimple_set_uid (pow_stmt, gimple_uid (stmt));
5097 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
5100 /* If we previously formed at least one other builtin_powi call,
5101 form the product of this one and those others. */
5102 if (result)
5104 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
5105 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
5106 result, iter_result);
5107 gimple_set_location (mul_stmt, gimple_location (stmt));
5108 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5109 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
5110 gimple_set_visited (mul_stmt, true);
5111 result = new_result;
5113 else
5114 result = iter_result;
5116 /* Decrement the occurrence count of each element in the product
5117 by the count found above, and remove this many copies of each
5118 factor from OPS. */
5119 for (i = j; i < vec_len; i++)
5121 unsigned k = power;
5122 unsigned n;
5124 rf1 = &repeat_factor_vec[i];
5125 rf1->count -= power;
5127 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
5129 if (oe->op == rf1->factor)
5131 if (oe->count <= k)
5133 ops->ordered_remove (n);
5134 k -= oe->count;
5136 if (k == 0)
5137 break;
5139 else
5141 oe->count -= k;
5142 break;
5149 /* At this point all elements in the repeated factor vector have a
5150 remaining occurrence count of 0 or 1, and those with a count of 1
5151 don't have cached representatives. Re-sort the ops vector and
5152 clean up. */
5153 ops->qsort (sort_by_operand_rank);
5154 repeat_factor_vec.release ();
5156 /* Return the final product computed herein. Note that there may
5157 still be some elements with single occurrence count left in OPS;
5158 those will be handled by the normal reassociation logic. */
5159 return result;
5162 /* Attempt to optimize
5163 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
5164 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
5166 static void
5167 attempt_builtin_copysign (vec<operand_entry *> *ops)
5169 operand_entry *oe;
5170 unsigned int i;
5171 unsigned int length = ops->length ();
5172 tree cst = ops->last ()->op;
5174 if (length == 1 || TREE_CODE (cst) != REAL_CST)
5175 return;
5177 FOR_EACH_VEC_ELT (*ops, i, oe)
5179 if (TREE_CODE (oe->op) == SSA_NAME
5180 && has_single_use (oe->op))
5182 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5183 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
5185 tree arg0, arg1;
5186 switch (gimple_call_combined_fn (old_call))
5188 CASE_CFN_COPYSIGN:
5189 arg0 = gimple_call_arg (old_call, 0);
5190 arg1 = gimple_call_arg (old_call, 1);
5191 /* The first argument of copysign must be a constant,
5192 otherwise there's nothing to do. */
5193 if (TREE_CODE (arg0) == REAL_CST)
5195 tree type = TREE_TYPE (arg0);
5196 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
5197 /* If we couldn't fold to a single constant, skip it.
5198 That happens e.g. for inexact multiplication when
5199 -frounding-math. */
5200 if (mul == NULL_TREE)
5201 break;
5202 /* Instead of adjusting OLD_CALL, let's build a new
5203 call to not leak the LHS and prevent keeping bogus
5204 debug statements. DCE will clean up the old call. */
5205 gcall *new_call;
5206 if (gimple_call_internal_p (old_call))
5207 new_call = gimple_build_call_internal
5208 (IFN_COPYSIGN, 2, mul, arg1);
5209 else
5210 new_call = gimple_build_call
5211 (gimple_call_fndecl (old_call), 2, mul, arg1);
5212 tree lhs = make_ssa_name (type);
5213 gimple_call_set_lhs (new_call, lhs);
5214 gimple_set_location (new_call,
5215 gimple_location (old_call));
5216 insert_stmt_after (new_call, old_call);
5217 /* We've used the constant, get rid of it. */
5218 ops->pop ();
5219 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
5220 /* Handle the CST1 < 0 case by negating the result. */
5221 if (cst1_neg)
5223 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
5224 gimple *negate_stmt
5225 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
5226 insert_stmt_after (negate_stmt, new_call);
5227 oe->op = negrhs;
5229 else
5230 oe->op = lhs;
5231 if (dump_file && (dump_flags & TDF_DETAILS))
5233 fprintf (dump_file, "Optimizing copysign: ");
5234 print_generic_expr (dump_file, cst, 0);
5235 fprintf (dump_file, " * COPYSIGN (");
5236 print_generic_expr (dump_file, arg0, 0);
5237 fprintf (dump_file, ", ");
5238 print_generic_expr (dump_file, arg1, 0);
5239 fprintf (dump_file, ") into %sCOPYSIGN (",
5240 cst1_neg ? "-" : "");
5241 print_generic_expr (dump_file, mul, 0);
5242 fprintf (dump_file, ", ");
5243 print_generic_expr (dump_file, arg1, 0);
5244 fprintf (dump_file, "\n");
5246 return;
5248 break;
5249 default:
5250 break;
5257 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
5259 static void
5260 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
5262 tree rhs1;
5264 if (dump_file && (dump_flags & TDF_DETAILS))
5266 fprintf (dump_file, "Transforming ");
5267 print_gimple_stmt (dump_file, stmt, 0, 0);
5270 rhs1 = gimple_assign_rhs1 (stmt);
5271 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
5272 update_stmt (stmt);
5273 remove_visited_stmt_chain (rhs1);
5275 if (dump_file && (dump_flags & TDF_DETAILS))
5277 fprintf (dump_file, " into ");
5278 print_gimple_stmt (dump_file, stmt, 0, 0);
5282 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
5284 static void
5285 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
5286 tree rhs1, tree rhs2)
5288 if (dump_file && (dump_flags & TDF_DETAILS))
5290 fprintf (dump_file, "Transforming ");
5291 print_gimple_stmt (dump_file, stmt, 0, 0);
5294 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
5295 update_stmt (gsi_stmt (*gsi));
5296 remove_visited_stmt_chain (rhs1);
5298 if (dump_file && (dump_flags & TDF_DETAILS))
5300 fprintf (dump_file, " into ");
5301 print_gimple_stmt (dump_file, stmt, 0, 0);
5305 /* Reassociate expressions in basic block BB and its post-dominator as
5306 children.
5308 Bubble up return status from maybe_optimize_range_tests. */
5310 static bool
5311 reassociate_bb (basic_block bb)
5313 gimple_stmt_iterator gsi;
5314 basic_block son;
5315 gimple *stmt = last_stmt (bb);
5316 bool cfg_cleanup_needed = false;
5318 if (stmt && !gimple_visited_p (stmt))
5319 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
5321 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
5323 stmt = gsi_stmt (gsi);
5325 if (is_gimple_assign (stmt)
5326 && !stmt_could_throw_p (stmt))
5328 tree lhs, rhs1, rhs2;
5329 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5331 /* If this is not a gimple binary expression, there is
5332 nothing for us to do with it. */
5333 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5334 continue;
5336 /* If this was part of an already processed statement,
5337 we don't need to touch it again. */
5338 if (gimple_visited_p (stmt))
5340 /* This statement might have become dead because of previous
5341 reassociations. */
5342 if (has_zero_uses (gimple_get_lhs (stmt)))
5344 reassoc_remove_stmt (&gsi);
5345 release_defs (stmt);
5346 /* We might end up removing the last stmt above which
5347 places the iterator to the end of the sequence.
5348 Reset it to the last stmt in this case which might
5349 be the end of the sequence as well if we removed
5350 the last statement of the sequence. In which case
5351 we need to bail out. */
5352 if (gsi_end_p (gsi))
5354 gsi = gsi_last_bb (bb);
5355 if (gsi_end_p (gsi))
5356 break;
5359 continue;
5362 lhs = gimple_assign_lhs (stmt);
5363 rhs1 = gimple_assign_rhs1 (stmt);
5364 rhs2 = gimple_assign_rhs2 (stmt);
5366 /* For non-bit or min/max operations we can't associate
5367 all types. Verify that here. */
5368 if (rhs_code != BIT_IOR_EXPR
5369 && rhs_code != BIT_AND_EXPR
5370 && rhs_code != BIT_XOR_EXPR
5371 && rhs_code != MIN_EXPR
5372 && rhs_code != MAX_EXPR
5373 && (!can_reassociate_p (lhs)
5374 || !can_reassociate_p (rhs1)
5375 || !can_reassociate_p (rhs2)))
5376 continue;
5378 if (associative_tree_code (rhs_code))
5380 auto_vec<operand_entry *> ops;
5381 tree powi_result = NULL_TREE;
5382 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
5384 /* There may be no immediate uses left by the time we
5385 get here because we may have eliminated them all. */
5386 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
5387 continue;
5389 gimple_set_visited (stmt, true);
5390 linearize_expr_tree (&ops, stmt, true, true);
5391 ops.qsort (sort_by_operand_rank);
5392 optimize_ops_list (rhs_code, &ops);
5393 if (undistribute_ops_list (rhs_code, &ops,
5394 loop_containing_stmt (stmt)))
5396 ops.qsort (sort_by_operand_rank);
5397 optimize_ops_list (rhs_code, &ops);
5400 if (rhs_code == PLUS_EXPR
5401 && transform_add_to_multiply (&ops))
5402 ops.qsort (sort_by_operand_rank);
5404 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5406 if (is_vector)
5407 optimize_vec_cond_expr (rhs_code, &ops);
5408 else
5409 optimize_range_tests (rhs_code, &ops);
5412 if (rhs_code == MULT_EXPR && !is_vector)
5414 attempt_builtin_copysign (&ops);
5416 if (reassoc_insert_powi_p
5417 && flag_unsafe_math_optimizations)
5418 powi_result = attempt_builtin_powi (stmt, &ops);
5421 operand_entry *last;
5422 bool negate_result = false;
5423 if (ops.length () > 1
5424 && rhs_code == MULT_EXPR)
5426 last = ops.last ();
5427 if ((integer_minus_onep (last->op)
5428 || real_minus_onep (last->op))
5429 && !HONOR_SNANS (TREE_TYPE (lhs))
5430 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
5431 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
5433 ops.pop ();
5434 negate_result = true;
5438 tree new_lhs = lhs;
5439 /* If the operand vector is now empty, all operands were
5440 consumed by the __builtin_powi optimization. */
5441 if (ops.length () == 0)
5442 transform_stmt_to_copy (&gsi, stmt, powi_result);
5443 else if (ops.length () == 1)
5445 tree last_op = ops.last ()->op;
5447 /* If the stmt that defines operand has to be inserted, insert it
5448 before the use. */
5449 if (ops.last ()->stmt_to_insert)
5450 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
5451 if (powi_result)
5452 transform_stmt_to_multiply (&gsi, stmt, last_op,
5453 powi_result);
5454 else
5455 transform_stmt_to_copy (&gsi, stmt, last_op);
5457 else
5459 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5460 int ops_num = ops.length ();
5461 int width = get_reassociation_width (ops_num, rhs_code, mode);
5463 if (dump_file && (dump_flags & TDF_DETAILS))
5464 fprintf (dump_file,
5465 "Width = %d was chosen for reassociation\n", width);
5467 if (width > 1
5468 && ops.length () > 3)
5469 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
5470 width, ops);
5471 else
5473 /* When there are three operands left, we want
5474 to make sure the ones that get the double
5475 binary op are chosen wisely. */
5476 int len = ops.length ();
5477 if (len >= 3)
5478 swap_ops_for_binary_stmt (ops, len - 3, stmt);
5480 new_lhs = rewrite_expr_tree (stmt, 0, ops,
5481 powi_result != NULL
5482 || negate_result);
5485 /* If we combined some repeated factors into a
5486 __builtin_powi call, multiply that result by the
5487 reassociated operands. */
5488 if (powi_result)
5490 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
5491 tree type = TREE_TYPE (lhs);
5492 tree target_ssa = make_temp_ssa_name (type, NULL,
5493 "reassocpow");
5494 gimple_set_lhs (lhs_stmt, target_ssa);
5495 update_stmt (lhs_stmt);
5496 if (lhs != new_lhs)
5498 target_ssa = new_lhs;
5499 new_lhs = lhs;
5501 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
5502 powi_result, target_ssa);
5503 gimple_set_location (mul_stmt, gimple_location (stmt));
5504 gimple_set_uid (mul_stmt, gimple_uid (stmt));
5505 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
5509 if (negate_result)
5511 stmt = SSA_NAME_DEF_STMT (lhs);
5512 tree tmp = make_ssa_name (TREE_TYPE (lhs));
5513 gimple_set_lhs (stmt, tmp);
5514 if (lhs != new_lhs)
5515 tmp = new_lhs;
5516 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
5517 tmp);
5518 gimple_set_uid (neg_stmt, gimple_uid (stmt));
5519 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
5520 update_stmt (stmt);
5525 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
5526 son;
5527 son = next_dom_son (CDI_POST_DOMINATORS, son))
5528 cfg_cleanup_needed |= reassociate_bb (son);
5530 return cfg_cleanup_needed;
5533 /* Add jumps around shifts for range tests turned into bit tests.
5534 For each SSA_NAME VAR we have code like:
5535 VAR = ...; // final stmt of range comparison
5536 // bit test here...;
5537 OTHERVAR = ...; // final stmt of the bit test sequence
5538 RES = VAR | OTHERVAR;
5539 Turn the above into:
5540 VAR = ...;
5541 if (VAR != 0)
5542 goto <l3>;
5543 else
5544 goto <l2>;
5545 <l2>:
5546 // bit test here...;
5547 OTHERVAR = ...;
5548 <l3>:
5549 # RES = PHI<1(l1), OTHERVAR(l2)>; */
5551 static void
5552 branch_fixup (void)
5554 tree var;
5555 unsigned int i;
5557 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
5559 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
5560 gimple *use_stmt;
5561 use_operand_p use;
5562 bool ok = single_imm_use (var, &use, &use_stmt);
5563 gcc_assert (ok
5564 && is_gimple_assign (use_stmt)
5565 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
5566 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
5568 basic_block cond_bb = gimple_bb (def_stmt);
5569 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
5570 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
5572 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
5573 gimple *g = gimple_build_cond (NE_EXPR, var,
5574 build_zero_cst (TREE_TYPE (var)),
5575 NULL_TREE, NULL_TREE);
5576 location_t loc = gimple_location (use_stmt);
5577 gimple_set_location (g, loc);
5578 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
5580 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
5581 etrue->probability = REG_BR_PROB_BASE / 2;
5582 etrue->count = cond_bb->count / 2;
5583 edge efalse = find_edge (cond_bb, then_bb);
5584 efalse->flags = EDGE_FALSE_VALUE;
5585 efalse->probability -= etrue->probability;
5586 efalse->count -= etrue->count;
5587 then_bb->count -= etrue->count;
5589 tree othervar = NULL_TREE;
5590 if (gimple_assign_rhs1 (use_stmt) == var)
5591 othervar = gimple_assign_rhs2 (use_stmt);
5592 else if (gimple_assign_rhs2 (use_stmt) == var)
5593 othervar = gimple_assign_rhs1 (use_stmt);
5594 else
5595 gcc_unreachable ();
5596 tree lhs = gimple_assign_lhs (use_stmt);
5597 gphi *phi = create_phi_node (lhs, merge_bb);
5598 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
5599 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
5600 gsi = gsi_for_stmt (use_stmt);
5601 gsi_remove (&gsi, true);
5603 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
5604 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
5606 reassoc_branch_fixups.release ();
5609 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
5610 void debug_ops_vector (vec<operand_entry *> ops);
5612 /* Dump the operand entry vector OPS to FILE. */
5614 void
5615 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
5617 operand_entry *oe;
5618 unsigned int i;
5620 FOR_EACH_VEC_ELT (ops, i, oe)
5622 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
5623 print_generic_expr (file, oe->op, 0);
5624 fprintf (file, "\n");
5628 /* Dump the operand entry vector OPS to STDERR. */
5630 DEBUG_FUNCTION void
5631 debug_ops_vector (vec<operand_entry *> ops)
5633 dump_ops_vector (stderr, ops);
5636 /* Bubble up return status from reassociate_bb. */
5638 static bool
5639 do_reassoc (void)
5641 break_up_subtract_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5642 return reassociate_bb (EXIT_BLOCK_PTR_FOR_FN (cfun));
5645 /* Initialize the reassociation pass. */
5647 static void
5648 init_reassoc (void)
5650 int i;
5651 long rank = 2;
5652 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
5654 /* Find the loops, so that we can prevent moving calculations in
5655 them. */
5656 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
5658 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
5660 next_operand_entry_id = 0;
5662 /* Reverse RPO (Reverse Post Order) will give us something where
5663 deeper loops come later. */
5664 pre_and_rev_post_order_compute (NULL, bbs, false);
5665 bb_rank = XCNEWVEC (long, last_basic_block_for_fn (cfun));
5666 operand_rank = new hash_map<tree, long>;
5668 /* Give each default definition a distinct rank. This includes
5669 parameters and the static chain. Walk backwards over all
5670 SSA names so that we get proper rank ordering according
5671 to tree_swap_operands_p. */
5672 for (i = num_ssa_names - 1; i > 0; --i)
5674 tree name = ssa_name (i);
5675 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
5676 insert_operand_rank (name, ++rank);
5679 /* Set up rank for each BB */
5680 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
5681 bb_rank[bbs[i]] = ++rank << 16;
5683 free (bbs);
5684 calculate_dominance_info (CDI_POST_DOMINATORS);
5685 plus_negates = vNULL;
5688 /* Cleanup after the reassociation pass, and print stats if
5689 requested. */
5691 static void
5692 fini_reassoc (void)
5694 statistics_counter_event (cfun, "Linearized",
5695 reassociate_stats.linearized);
5696 statistics_counter_event (cfun, "Constants eliminated",
5697 reassociate_stats.constants_eliminated);
5698 statistics_counter_event (cfun, "Ops eliminated",
5699 reassociate_stats.ops_eliminated);
5700 statistics_counter_event (cfun, "Statements rewritten",
5701 reassociate_stats.rewritten);
5702 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
5703 reassociate_stats.pows_encountered);
5704 statistics_counter_event (cfun, "Built-in powi calls created",
5705 reassociate_stats.pows_created);
5707 delete operand_rank;
5708 operand_entry_pool.release ();
5709 free (bb_rank);
5710 plus_negates.release ();
5711 free_dominance_info (CDI_POST_DOMINATORS);
5712 loop_optimizer_finalize ();
5715 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
5716 insertion of __builtin_powi calls.
5718 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
5719 optimization of a gimple conditional. Otherwise returns zero. */
5721 static unsigned int
5722 execute_reassoc (bool insert_powi_p)
5724 reassoc_insert_powi_p = insert_powi_p;
5726 init_reassoc ();
5728 bool cfg_cleanup_needed;
5729 cfg_cleanup_needed = do_reassoc ();
5730 repropagate_negates ();
5731 branch_fixup ();
5733 fini_reassoc ();
5734 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
5737 namespace {
5739 const pass_data pass_data_reassoc =
5741 GIMPLE_PASS, /* type */
5742 "reassoc", /* name */
5743 OPTGROUP_NONE, /* optinfo_flags */
5744 TV_TREE_REASSOC, /* tv_id */
5745 ( PROP_cfg | PROP_ssa ), /* properties_required */
5746 0, /* properties_provided */
5747 0, /* properties_destroyed */
5748 0, /* todo_flags_start */
5749 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
5752 class pass_reassoc : public gimple_opt_pass
5754 public:
5755 pass_reassoc (gcc::context *ctxt)
5756 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
5759 /* opt_pass methods: */
5760 opt_pass * clone () { return new pass_reassoc (m_ctxt); }
5761 void set_pass_param (unsigned int n, bool param)
5763 gcc_assert (n == 0);
5764 insert_powi_p = param;
5766 virtual bool gate (function *) { return flag_tree_reassoc != 0; }
5767 virtual unsigned int execute (function *)
5768 { return execute_reassoc (insert_powi_p); }
5770 private:
5771 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
5772 point 3a in the pass header comment. */
5773 bool insert_powi_p;
5774 }; // class pass_reassoc
5776 } // anon namespace
5778 gimple_opt_pass *
5779 make_pass_reassoc (gcc::context *ctxt)
5781 return new pass_reassoc (ctxt);